[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Target / X86 / X86InstrAVX512.td
blobc92abc7e8c95d56228615090ceb3a989a78a18e2
1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT).  These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                       string suffix = ""> {
22   RegisterClass RC = rc;
23   ValueType EltVT = eltvt;
24   int NumElts = numelts;
26   // Corresponding mask register class.
27   RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29   // Corresponding mask register pair class.
30   RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                               !cast<RegisterOperand>("VK" # NumElts # "Pair"));
33   // Corresponding write-mask register class.
34   RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
36   // The mask VT.
37   ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
39   // Suffix used in the instruction mnemonic.
40   string Suffix = suffix;
42   // VTName is a string name for vector VT. For vector types it will be
43   // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44   // It is a little bit complex for scalar types, where NumElts = 1.
45   // In this case we build v4f32 or v2f64
46   string VTName = "v" # !if (!eq (NumElts, 1),
47                         !if (!eq (EltVT.Size, 16), 8,
48                         !if (!eq (EltVT.Size, 32), 4,
49                         !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
51   // The vector VT.
52   ValueType VT = !cast<ValueType>(VTName);
54   string EltTypeName = !cast<string>(EltVT);
55   // Size of the element type in bits, e.g. 32 for v16i32.
56   string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
57   int EltSize = EltVT.Size;
59   // "i" for integer types and "f" for floating-point types
60   string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
62   // Size of RC in bits, e.g. 512 for VR512.
63   int Size = VT.Size;
65   // The corresponding memory operand, e.g. i512mem for VR512.
66   X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
67   X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
68   // FP scalar memory operand for intrinsics - ssmem/sdmem.
69   Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
70                            !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
71                            !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)));
73   // Load patterns
74   PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
76   PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
78   PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
79   PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
81   PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
82                                !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
83                                !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?)));
85   // The string to specify embedded broadcast in assembly.
86   string BroadcastStr = "{1to" # NumElts # "}";
88   // 8-bit compressed displacement tuple/subvector format.  This is only
89   // defined for NumElts <= 8.
90   CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91                                !cast<CD8VForm>("CD8VT" # NumElts), ?);
93   SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94                           !if (!eq (Size, 256), sub_ymm, ?));
96   Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97                      !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
98                      !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
99                      SSEPackedInt)));
101   RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
102                       !if (!eq (EltTypeName, "f16"), FR16X,
103                       FR64X));
105   dag ImmAllZerosV = (VT immAllZerosV);
107   string ZSuffix = !if (!eq (Size, 128), "Z128",
108                    !if (!eq (Size, 256), "Z256", "Z"));
111 def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
112 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
113 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
114 def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
115 def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
116 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
117 def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
119 // "x" in v32i8x_info means RC = VR256X
120 def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
121 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
122 def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
123 def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
124 def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
125 def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
126 def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
128 def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
129 def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
130 def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
131 def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
132 def v8f16x_info  : X86VectorVTInfo<8,  f16, VR128X, "ph">;
133 def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
134 def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
136 // We map scalar types to the smallest (128-bit) vector type
137 // with the appropriate element type. This allows to use the same masking logic.
138 def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
139 def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
140 def f16x_info    : X86VectorVTInfo<1,  f16, VR128X, "sh">;
141 def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
142 def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
144 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
145                            X86VectorVTInfo i128> {
146   X86VectorVTInfo info512 = i512;
147   X86VectorVTInfo info256 = i256;
148   X86VectorVTInfo info128 = i128;
151 def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
152                                              v16i8x_info>;
153 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
154                                              v8i16x_info>;
155 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
156                                              v4i32x_info>;
157 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
158                                              v2i64x_info>;
159 def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
160                                              v8f16x_info>;
161 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
162                                              v4f32x_info>;
163 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
164                                              v2f64x_info>;
166 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
167                        ValueType _vt> {
168   RegisterClass KRC = _krc;
169   RegisterClass KRCWM = _krcwm;
170   ValueType KVT = _vt;
173 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
174 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
175 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
176 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
177 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
178 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
179 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
181 // Used for matching masked operations. Ensures the operation part only has a
182 // single use.
183 def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
184                            (vselect node:$mask, node:$src1, node:$src2), [{
185   return isProfitableToFormMaskedOp(N);
186 }]>;
188 def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
189                               (X86selects node:$mask, node:$src1, node:$src2), [{
190   return isProfitableToFormMaskedOp(N);
191 }]>;
193 // This multiclass generates the masking variants from the non-masking
194 // variant.  It only provides the assembly pieces for the masking variants.
195 // It assumes custom ISel patterns for masking which can be provided as
196 // template arguments.
197 multiclass AVX512_maskable_custom<bits<8> O, Format F,
198                                   dag Outs,
199                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
200                                   string OpcodeStr,
201                                   string AttSrcAsm, string IntelSrcAsm,
202                                   list<dag> Pattern,
203                                   list<dag> MaskingPattern,
204                                   list<dag> ZeroMaskingPattern,
205                                   string MaskingConstraint = "",
206                                   bit IsCommutable = 0,
207                                   bit IsKCommutable = 0,
208                                   bit IsKZCommutable = IsCommutable> {
209   let isCommutable = IsCommutable in
210     def NAME: AVX512<O, F, Outs, Ins,
211                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
212                                      "$dst, "#IntelSrcAsm#"}",
213                        Pattern>;
215   // Prefer over VMOV*rrk Pat<>
216   let isCommutable = IsKCommutable in
217     def NAME#k: AVX512<O, F, Outs, MaskingIns,
218                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
219                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
220                        MaskingPattern>,
221               EVEX_K {
222       // In case of the 3src subclass this is overridden with a let.
223       string Constraints = MaskingConstraint;
224     }
226   // Zero mask does not add any restrictions to commute operands transformation.
227   // So, it is Ok to use IsCommutable instead of IsKCommutable.
228   let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
229     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
230                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
231                                      "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
232                        ZeroMaskingPattern>,
233               EVEX_KZ;
237 // Common base class of AVX512_maskable and AVX512_maskable_3src.
238 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
239                                   dag Outs,
240                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
241                                   string OpcodeStr,
242                                   string AttSrcAsm, string IntelSrcAsm,
243                                   dag RHS, dag MaskingRHS,
244                                   SDPatternOperator Select = vselect_mask,
245                                   string MaskingConstraint = "",
246                                   bit IsCommutable = 0,
247                                   bit IsKCommutable = 0,
248                                   bit IsKZCommutable = IsCommutable> :
249   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
250                          AttSrcAsm, IntelSrcAsm,
251                          [(set _.RC:$dst, RHS)],
252                          [(set _.RC:$dst, MaskingRHS)],
253                          [(set _.RC:$dst,
254                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
255                          MaskingConstraint, IsCommutable,
256                          IsKCommutable, IsKZCommutable>;
258 // This multiclass generates the unconditional/non-masking, the masking and
259 // the zero-masking variant of the vector instruction.  In the masking case, the
260 // preserved vector elements come from a new dummy input operand tied to $dst.
261 // This version uses a separate dag for non-masking and masking.
262 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
263                            dag Outs, dag Ins, string OpcodeStr,
264                            string AttSrcAsm, string IntelSrcAsm,
265                            dag RHS, dag MaskRHS,
266                            bit IsCommutable = 0, bit IsKCommutable = 0,
267                            bit IsKZCommutable = IsCommutable> :
268    AVX512_maskable_custom<O, F, Outs, Ins,
269                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
270                           !con((ins _.KRCWM:$mask), Ins),
271                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
272                           [(set _.RC:$dst, RHS)],
273                           [(set _.RC:$dst,
274                               (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
275                           [(set _.RC:$dst,
276                               (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
277                           "$src0 = $dst", IsCommutable, IsKCommutable,
278                           IsKZCommutable>;
280 // This multiclass generates the unconditional/non-masking, the masking and
281 // the zero-masking variant of the vector instruction.  In the masking case, the
282 // preserved vector elements come from a new dummy input operand tied to $dst.
283 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
284                            dag Outs, dag Ins, string OpcodeStr,
285                            string AttSrcAsm, string IntelSrcAsm,
286                            dag RHS,
287                            bit IsCommutable = 0, bit IsKCommutable = 0,
288                            bit IsKZCommutable = IsCommutable,
289                            SDPatternOperator Select = vselect_mask> :
290    AVX512_maskable_common<O, F, _, Outs, Ins,
291                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
292                           !con((ins _.KRCWM:$mask), Ins),
293                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
294                           (Select _.KRCWM:$mask, RHS, _.RC:$src0),
295                           Select, "$src0 = $dst", IsCommutable, IsKCommutable,
296                           IsKZCommutable>;
298 // This multiclass generates the unconditional/non-masking, the masking and
299 // the zero-masking variant of the scalar instruction.
300 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
301                            dag Outs, dag Ins, string OpcodeStr,
302                            string AttSrcAsm, string IntelSrcAsm,
303                            dag RHS> :
304    AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
305                    RHS, 0, 0, 0, X86selects_mask>;
307 // Similar to AVX512_maskable but in this case one of the source operands
308 // ($src1) is already tied to $dst so we just use that for the preserved
309 // vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
310 // $src1.
311 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
312                                 dag Outs, dag NonTiedIns, string OpcodeStr,
313                                 string AttSrcAsm, string IntelSrcAsm,
314                                 dag RHS,
315                                 bit IsCommutable = 0,
316                                 bit IsKCommutable = 0,
317                                 SDPatternOperator Select = vselect_mask,
318                                 bit MaskOnly = 0> :
319    AVX512_maskable_common<O, F, _, Outs,
320                           !con((ins _.RC:$src1), NonTiedIns),
321                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
322                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
323                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
324                           !if(MaskOnly, (null_frag), RHS),
325                           (Select _.KRCWM:$mask, RHS, _.RC:$src1),
326                           Select, "", IsCommutable, IsKCommutable>;
328 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
329 // operand differs from the output VT. This requires a bitconvert on
330 // the preserved vector going into the vselect.
331 // NOTE: The unmasked pattern is disabled.
332 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
333                                      X86VectorVTInfo InVT,
334                                      dag Outs, dag NonTiedIns, string OpcodeStr,
335                                      string AttSrcAsm, string IntelSrcAsm,
336                                      dag RHS, bit IsCommutable = 0> :
337    AVX512_maskable_common<O, F, OutVT, Outs,
338                           !con((ins InVT.RC:$src1), NonTiedIns),
339                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
340                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
341                           OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
342                           (vselect_mask InVT.KRCWM:$mask, RHS,
343                            (bitconvert InVT.RC:$src1)),
344                            vselect_mask, "", IsCommutable>;
346 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
347                                      dag Outs, dag NonTiedIns, string OpcodeStr,
348                                      string AttSrcAsm, string IntelSrcAsm,
349                                      dag RHS,
350                                      bit IsCommutable = 0,
351                                      bit IsKCommutable = 0,
352                                      bit MaskOnly = 0> :
353    AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
354                         IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
355                         X86selects_mask, MaskOnly>;
357 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
358                                   dag Outs, dag Ins,
359                                   string OpcodeStr,
360                                   string AttSrcAsm, string IntelSrcAsm,
361                                   list<dag> Pattern> :
362    AVX512_maskable_custom<O, F, Outs, Ins,
363                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
364                           !con((ins _.KRCWM:$mask), Ins),
365                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
366                           "$src0 = $dst">;
368 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
369                                        dag Outs, dag NonTiedIns,
370                                        string OpcodeStr,
371                                        string AttSrcAsm, string IntelSrcAsm,
372                                        list<dag> Pattern> :
373    AVX512_maskable_custom<O, F, Outs,
374                           !con((ins _.RC:$src1), NonTiedIns),
375                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
376                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
377                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
378                           "">;
380 // Instruction with mask that puts result in mask register,
381 // like "compare" and "vptest"
382 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
383                                   dag Outs,
384                                   dag Ins, dag MaskingIns,
385                                   string OpcodeStr,
386                                   string AttSrcAsm, string IntelSrcAsm,
387                                   list<dag> Pattern,
388                                   list<dag> MaskingPattern,
389                                   bit IsCommutable = 0> {
390     let isCommutable = IsCommutable in {
391     def NAME: AVX512<O, F, Outs, Ins,
392                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
393                                      "$dst, "#IntelSrcAsm#"}",
394                        Pattern>;
396     def NAME#k: AVX512<O, F, Outs, MaskingIns,
397                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
398                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
399                        MaskingPattern>, EVEX_K;
400     }
403 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
404                                   dag Outs,
405                                   dag Ins, dag MaskingIns,
406                                   string OpcodeStr,
407                                   string AttSrcAsm, string IntelSrcAsm,
408                                   dag RHS, dag MaskingRHS,
409                                   bit IsCommutable = 0> :
410   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
411                          AttSrcAsm, IntelSrcAsm,
412                          [(set _.KRC:$dst, RHS)],
413                          [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
415 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
416                            dag Outs, dag Ins, string OpcodeStr,
417                            string AttSrcAsm, string IntelSrcAsm,
418                            dag RHS, dag RHS_su, bit IsCommutable = 0> :
419    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
420                           !con((ins _.KRCWM:$mask), Ins),
421                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
422                           (and _.KRCWM:$mask, RHS_su), IsCommutable>;
424 // Used by conversion instructions.
425 multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
426                                   dag Outs,
427                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
428                                   string OpcodeStr,
429                                   string AttSrcAsm, string IntelSrcAsm,
430                                   dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
431   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
432                          AttSrcAsm, IntelSrcAsm,
433                          [(set _.RC:$dst, RHS)],
434                          [(set _.RC:$dst, MaskingRHS)],
435                          [(set _.RC:$dst, ZeroMaskingRHS)],
436                          "$src0 = $dst">;
438 multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
439                                dag Outs, dag NonTiedIns, string OpcodeStr,
440                                string AttSrcAsm, string IntelSrcAsm,
441                                dag RHS, dag MaskingRHS, bit IsCommutable,
442                                bit IsKCommutable> :
443    AVX512_maskable_custom<O, F, Outs,
444                           !con((ins _.RC:$src1), NonTiedIns),
445                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
446                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
447                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
448                           [(set _.RC:$dst, RHS)],
449                           [(set _.RC:$dst,
450                             (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
451                           [(set _.RC:$dst,
452                             (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
453                           "", IsCommutable, IsKCommutable>;
455 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
456 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
457 // swizzled by ExecutionDomainFix to pxor.
458 // We set canFoldAsLoad because this can be converted to a constant-pool
459 // load of an all-zeros value if folding it would be beneficial.
460 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
461     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
462 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
463                [(set VR512:$dst, (v16i32 immAllZerosV))]>;
464 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
465                [(set VR512:$dst, (v16i32 immAllOnesV))]>;
468 let Predicates = [HasAVX512] in {
469 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
470 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
471 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
472 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
473 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
476 // Alias instructions that allow VPTERNLOG to be used with a mask to create
477 // a mix of all ones and all zeros elements. This is done this way to force
478 // the same register to be used as input for all three sources.
479 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
480 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
481                                 (ins VK16WM:$mask), "",
482                            [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
483                                                       (v16i32 immAllOnesV),
484                                                       (v16i32 immAllZerosV)))]>;
485 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
486                                 (ins VK8WM:$mask), "",
487                 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
488                                            (v8i64 immAllOnesV),
489                                            (v8i64 immAllZerosV)))]>;
492 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
493     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
494 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
495                [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
496 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
497                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
500 let Predicates = [HasAVX512] in {
501 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
502 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
503 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
504 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
505 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
506 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
507 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
508 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
509 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
510 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
513 let Predicates = [HasFP16] in {
514 def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
515 def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
516 def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
519 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
520 // This is expanded by ExpandPostRAPseudos.
521 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
522     isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
523   def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
524                           [(set FR32X:$dst, fp32imm0)]>;
525   def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
526                           [(set FR64X:$dst, fp64imm0)]>;
527   def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
528                             [(set VR128X:$dst, fp128imm0)]>;
531 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
532     isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasFP16] in {
533   def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
534                           [(set FR16X:$dst, fp16imm0)]>;
537 //===----------------------------------------------------------------------===//
538 // AVX-512 - VECTOR INSERT
541 // Supports two different pattern operators for mask and unmasked ops. Allows
542 // null_frag to be passed for one.
543 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
544                                   X86VectorVTInfo To,
545                                   SDPatternOperator vinsert_insert,
546                                   SDPatternOperator vinsert_for_mask,
547                                   X86FoldableSchedWrite sched> {
548   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
549     defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
550                    (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
551                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
552                    "$src3, $src2, $src1", "$src1, $src2, $src3",
553                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
554                                          (From.VT From.RC:$src2),
555                                          (iPTR imm)),
556                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
557                                            (From.VT From.RC:$src2),
558                                            (iPTR imm))>,
559                    AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
560     let mayLoad = 1 in
561     defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
562                    (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
563                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
564                    "$src3, $src2, $src1", "$src1, $src2, $src3",
565                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
566                                (From.VT (From.LdFrag addr:$src2)),
567                                (iPTR imm)),
568                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
569                                (From.VT (From.LdFrag addr:$src2)),
570                                (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
571                    EVEX_CD8<From.EltSize, From.CD8TupleForm>,
572                    Sched<[sched.Folded, sched.ReadAfterFold]>;
573   }
576 // Passes the same pattern operator for masked and unmasked ops.
577 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
578                             X86VectorVTInfo To,
579                             SDPatternOperator vinsert_insert,
580                             X86FoldableSchedWrite sched> :
581   vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
583 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
584                        X86VectorVTInfo To, PatFrag vinsert_insert,
585                        SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
586   let Predicates = p in {
587     def : Pat<(vinsert_insert:$ins
588                      (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
589               (To.VT (!cast<Instruction>(InstrStr#"rr")
590                      To.RC:$src1, From.RC:$src2,
591                      (INSERT_get_vinsert_imm To.RC:$ins)))>;
593     def : Pat<(vinsert_insert:$ins
594                   (To.VT To.RC:$src1),
595                   (From.VT (From.LdFrag addr:$src2)),
596                   (iPTR imm)),
597               (To.VT (!cast<Instruction>(InstrStr#"rm")
598                   To.RC:$src1, addr:$src2,
599                   (INSERT_get_vinsert_imm To.RC:$ins)))>;
600   }
603 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
604                             ValueType EltVT64, int Opcode256,
605                             X86FoldableSchedWrite sched> {
607   let Predicates = [HasVLX] in
608     defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
609                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
610                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
611                                  vinsert128_insert, sched>, EVEX_V256;
613   defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
614                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
615                                  X86VectorVTInfo<16, EltVT32, VR512>,
616                                  vinsert128_insert, sched>, EVEX_V512;
618   defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
619                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
620                                  X86VectorVTInfo< 8, EltVT64, VR512>,
621                                  vinsert256_insert, sched>, VEX_W, EVEX_V512;
623   // Even with DQI we'd like to only use these instructions for masking.
624   let Predicates = [HasVLX, HasDQI] in
625     defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
626                                    X86VectorVTInfo< 2, EltVT64, VR128X>,
627                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
628                                    null_frag, vinsert128_insert, sched>,
629                                    VEX_W1X, EVEX_V256;
631   // Even with DQI we'd like to only use these instructions for masking.
632   let Predicates = [HasDQI] in {
633     defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
634                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
635                                  X86VectorVTInfo< 8, EltVT64, VR512>,
636                                  null_frag, vinsert128_insert, sched>,
637                                  VEX_W, EVEX_V512;
639     defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
640                                    X86VectorVTInfo< 8, EltVT32, VR256X>,
641                                    X86VectorVTInfo<16, EltVT32, VR512>,
642                                    null_frag, vinsert256_insert, sched>,
643                                    EVEX_V512;
644   }
647 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
648 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
649 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
651 // Codegen pattern with the alternative types,
652 // Even with AVX512DQ we'll still use these for unmasked operations.
653 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
654               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
655 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
656               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
658 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
659               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
660 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
661               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
663 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
664               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
665 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
666               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
668 // Codegen pattern with the alternative types insert VEC128 into VEC256
669 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
670               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
671 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
672               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
673 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
674               vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16, HasVLX]>;
675 // Codegen pattern with the alternative types insert VEC128 into VEC512
676 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
677               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
678 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
679                vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
680 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
681               vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16]>;
682 // Codegen pattern with the alternative types insert VEC256 into VEC512
683 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
684               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
685 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
686               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
687 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
688               vinsert256_insert, INSERT_get_vinsert256_imm, [HasFP16]>;
691 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
692                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
693                                  PatFrag vinsert_insert,
694                                  SDNodeXForm INSERT_get_vinsert_imm,
695                                  list<Predicate> p> {
696 let Predicates = p in {
697   def : Pat<(Cast.VT
698              (vselect_mask Cast.KRCWM:$mask,
699                            (bitconvert
700                             (vinsert_insert:$ins (To.VT To.RC:$src1),
701                                                  (From.VT From.RC:$src2),
702                                                  (iPTR imm))),
703                            Cast.RC:$src0)),
704             (!cast<Instruction>(InstrStr#"rrk")
705              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
706              (INSERT_get_vinsert_imm To.RC:$ins))>;
707   def : Pat<(Cast.VT
708              (vselect_mask Cast.KRCWM:$mask,
709                            (bitconvert
710                             (vinsert_insert:$ins (To.VT To.RC:$src1),
711                                                  (From.VT
712                                                   (bitconvert
713                                                    (From.LdFrag addr:$src2))),
714                                                  (iPTR imm))),
715                            Cast.RC:$src0)),
716             (!cast<Instruction>(InstrStr#"rmk")
717              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
718              (INSERT_get_vinsert_imm To.RC:$ins))>;
720   def : Pat<(Cast.VT
721              (vselect_mask Cast.KRCWM:$mask,
722                            (bitconvert
723                             (vinsert_insert:$ins (To.VT To.RC:$src1),
724                                                  (From.VT From.RC:$src2),
725                                                  (iPTR imm))),
726                            Cast.ImmAllZerosV)),
727             (!cast<Instruction>(InstrStr#"rrkz")
728              Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
729              (INSERT_get_vinsert_imm To.RC:$ins))>;
730   def : Pat<(Cast.VT
731              (vselect_mask Cast.KRCWM:$mask,
732                            (bitconvert
733                             (vinsert_insert:$ins (To.VT To.RC:$src1),
734                                                  (From.VT (From.LdFrag addr:$src2)),
735                                                  (iPTR imm))),
736                            Cast.ImmAllZerosV)),
737             (!cast<Instruction>(InstrStr#"rmkz")
738              Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
739              (INSERT_get_vinsert_imm To.RC:$ins))>;
743 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
744                              v8f32x_info, vinsert128_insert,
745                              INSERT_get_vinsert128_imm, [HasVLX]>;
746 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
747                              v4f64x_info, vinsert128_insert,
748                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
750 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
751                              v8i32x_info, vinsert128_insert,
752                              INSERT_get_vinsert128_imm, [HasVLX]>;
753 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
754                              v8i32x_info, vinsert128_insert,
755                              INSERT_get_vinsert128_imm, [HasVLX]>;
756 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
757                              v8i32x_info, vinsert128_insert,
758                              INSERT_get_vinsert128_imm, [HasVLX]>;
759 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
760                              v4i64x_info, vinsert128_insert,
761                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
762 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
763                              v4i64x_info, vinsert128_insert,
764                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
765 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
766                              v4i64x_info, vinsert128_insert,
767                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
769 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
770                              v16f32_info, vinsert128_insert,
771                              INSERT_get_vinsert128_imm, [HasAVX512]>;
772 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
773                              v8f64_info, vinsert128_insert,
774                              INSERT_get_vinsert128_imm, [HasDQI]>;
776 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
777                              v16i32_info, vinsert128_insert,
778                              INSERT_get_vinsert128_imm, [HasAVX512]>;
779 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
780                              v16i32_info, vinsert128_insert,
781                              INSERT_get_vinsert128_imm, [HasAVX512]>;
782 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
783                              v16i32_info, vinsert128_insert,
784                              INSERT_get_vinsert128_imm, [HasAVX512]>;
785 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
786                              v8i64_info, vinsert128_insert,
787                              INSERT_get_vinsert128_imm, [HasDQI]>;
788 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
789                              v8i64_info, vinsert128_insert,
790                              INSERT_get_vinsert128_imm, [HasDQI]>;
791 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
792                              v8i64_info, vinsert128_insert,
793                              INSERT_get_vinsert128_imm, [HasDQI]>;
795 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
796                              v16f32_info, vinsert256_insert,
797                              INSERT_get_vinsert256_imm, [HasDQI]>;
798 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
799                              v8f64_info, vinsert256_insert,
800                              INSERT_get_vinsert256_imm, [HasAVX512]>;
802 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
803                              v16i32_info, vinsert256_insert,
804                              INSERT_get_vinsert256_imm, [HasDQI]>;
805 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
806                              v16i32_info, vinsert256_insert,
807                              INSERT_get_vinsert256_imm, [HasDQI]>;
808 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
809                              v16i32_info, vinsert256_insert,
810                              INSERT_get_vinsert256_imm, [HasDQI]>;
811 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
812                              v8i64_info, vinsert256_insert,
813                              INSERT_get_vinsert256_imm, [HasAVX512]>;
814 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
815                              v8i64_info, vinsert256_insert,
816                              INSERT_get_vinsert256_imm, [HasAVX512]>;
817 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
818                              v8i64_info, vinsert256_insert,
819                              INSERT_get_vinsert256_imm, [HasAVX512]>;
821 // vinsertps - insert f32 to XMM
822 let ExeDomain = SSEPackedSingle in {
823 let isCommutable = 1 in
824 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
825       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
826       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
827       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
828       EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
829 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
830       (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
831       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
832       [(set VR128X:$dst, (X86insertps VR128X:$src1,
833                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
834                           timm:$src3))]>,
835       EVEX_4V, EVEX_CD8<32, CD8VT1>,
836       Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
839 //===----------------------------------------------------------------------===//
840 // AVX-512 VECTOR EXTRACT
841 //---
843 // Supports two different pattern operators for mask and unmasked ops. Allows
844 // null_frag to be passed for one.
845 multiclass vextract_for_size_split<int Opcode,
846                                    X86VectorVTInfo From, X86VectorVTInfo To,
847                                    SDPatternOperator vextract_extract,
848                                    SDPatternOperator vextract_for_mask,
849                                    SchedWrite SchedRR, SchedWrite SchedMR> {
851   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
852     defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
853                 (ins From.RC:$src1, u8imm:$idx),
854                 "vextract" # To.EltTypeName # "x" # To.NumElts,
855                 "$idx, $src1", "$src1, $idx",
856                 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
857                 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
858                 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
860     def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
861                     (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
862                     "vextract" # To.EltTypeName # "x" # To.NumElts #
863                         "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
864                     [(store (To.VT (vextract_extract:$idx
865                                     (From.VT From.RC:$src1), (iPTR imm))),
866                              addr:$dst)]>, EVEX,
867                     Sched<[SchedMR]>;
869     let mayStore = 1, hasSideEffects = 0 in
870     def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
871                     (ins To.MemOp:$dst, To.KRCWM:$mask,
872                                         From.RC:$src1, u8imm:$idx),
873                      "vextract" # To.EltTypeName # "x" # To.NumElts #
874                           "\t{$idx, $src1, $dst {${mask}}|"
875                           "$dst {${mask}}, $src1, $idx}", []>,
876                     EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
877   }
880 // Passes the same pattern operator for masked and unmasked ops.
881 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
882                              X86VectorVTInfo To,
883                              SDPatternOperator vextract_extract,
884                              SchedWrite SchedRR, SchedWrite SchedMR> :
885   vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
887 // Codegen pattern for the alternative types
888 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
889                 X86VectorVTInfo To, PatFrag vextract_extract,
890                 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
891   let Predicates = p in {
892      def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
893                (To.VT (!cast<Instruction>(InstrStr#"rr")
894                           From.RC:$src1,
895                           (EXTRACT_get_vextract_imm To.RC:$ext)))>;
896      def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
897                               (iPTR imm))), addr:$dst),
898                (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
899                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
900   }
903 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
904                              ValueType EltVT64, int Opcode256,
905                              SchedWrite SchedRR, SchedWrite SchedMR> {
906   let Predicates = [HasAVX512] in {
907     defm NAME # "32x4Z" : vextract_for_size<Opcode128,
908                                    X86VectorVTInfo<16, EltVT32, VR512>,
909                                    X86VectorVTInfo< 4, EltVT32, VR128X>,
910                                    vextract128_extract, SchedRR, SchedMR>,
911                                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
912     defm NAME # "64x4Z" : vextract_for_size<Opcode256,
913                                    X86VectorVTInfo< 8, EltVT64, VR512>,
914                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
915                                    vextract256_extract, SchedRR, SchedMR>,
916                                        VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
917   }
918   let Predicates = [HasVLX] in
919     defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
920                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
921                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
922                                  vextract128_extract, SchedRR, SchedMR>,
923                                      EVEX_V256, EVEX_CD8<32, CD8VT4>;
925   // Even with DQI we'd like to only use these instructions for masking.
926   let Predicates = [HasVLX, HasDQI] in
927     defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
928                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
929                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
930                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
931                                      VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
933   // Even with DQI we'd like to only use these instructions for masking.
934   let Predicates = [HasDQI] in {
935     defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
936                                  X86VectorVTInfo< 8, EltVT64, VR512>,
937                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
938                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
939                                      VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
940     defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
941                                  X86VectorVTInfo<16, EltVT32, VR512>,
942                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
943                                  null_frag, vextract256_extract, SchedRR, SchedMR>,
944                                      EVEX_V512, EVEX_CD8<32, CD8VT8>;
945   }
948 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
949 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
950 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
952 // extract_subvector codegen patterns with the alternative types.
953 // Even with AVX512DQ we'll still use these for unmasked operations.
954 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
955           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
956 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
957           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
959 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
960           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
961 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
962           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
964 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
965           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
966 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
967           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
969 // Codegen pattern with the alternative types extract VEC128 from VEC256
970 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
971           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
972 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
973           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
974 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
975           vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16, HasVLX]>;
977 // Codegen pattern with the alternative types extract VEC128 from VEC512
978 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
979                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
980 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
981                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
982 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
983                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16]>;
984 // Codegen pattern with the alternative types extract VEC256 from VEC512
985 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
986                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
987 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
988                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
989 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
990                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasFP16]>;
993 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
994 // smaller extract to enable EVEX->VEX.
995 let Predicates = [NoVLX] in {
996 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
997           (v2i64 (VEXTRACTI128rr
998                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
999                   (iPTR 1)))>;
1000 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1001           (v2f64 (VEXTRACTF128rr
1002                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1003                   (iPTR 1)))>;
1004 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1005           (v4i32 (VEXTRACTI128rr
1006                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1007                   (iPTR 1)))>;
1008 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1009           (v4f32 (VEXTRACTF128rr
1010                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1011                   (iPTR 1)))>;
1012 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1013           (v8i16 (VEXTRACTI128rr
1014                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1015                   (iPTR 1)))>;
1016 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1017           (v16i8 (VEXTRACTI128rr
1018                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1019                   (iPTR 1)))>;
1022 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1023 // smaller extract to enable EVEX->VEX.
1024 let Predicates = [HasVLX] in {
1025 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1026           (v2i64 (VEXTRACTI32x4Z256rr
1027                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1028                   (iPTR 1)))>;
1029 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1030           (v2f64 (VEXTRACTF32x4Z256rr
1031                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1032                   (iPTR 1)))>;
1033 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1034           (v4i32 (VEXTRACTI32x4Z256rr
1035                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1036                   (iPTR 1)))>;
1037 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1038           (v4f32 (VEXTRACTF32x4Z256rr
1039                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1040                   (iPTR 1)))>;
1041 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1042           (v8i16 (VEXTRACTI32x4Z256rr
1043                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1044                   (iPTR 1)))>;
1045 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1046           (v16i8 (VEXTRACTI32x4Z256rr
1047                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1048                   (iPTR 1)))>;
1051 let Predicates = [HasFP16, HasVLX] in
1052 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1053           (v8f16 (VEXTRACTF32x4Z256rr
1054                   (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1055                   (iPTR 1)))>;
1058 // Additional patterns for handling a bitcast between the vselect and the
1059 // extract_subvector.
1060 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1061                                   X86VectorVTInfo To, X86VectorVTInfo Cast,
1062                                   PatFrag vextract_extract,
1063                                   SDNodeXForm EXTRACT_get_vextract_imm,
1064                                   list<Predicate> p> {
1065 let Predicates = p in {
1066   def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1067                                    (bitconvert
1068                                     (To.VT (vextract_extract:$ext
1069                                             (From.VT From.RC:$src), (iPTR imm)))),
1070                                    To.RC:$src0)),
1071             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1072                       Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1073                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1075   def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1076                                    (bitconvert
1077                                     (To.VT (vextract_extract:$ext
1078                                             (From.VT From.RC:$src), (iPTR imm)))),
1079                                    Cast.ImmAllZerosV)),
1080             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1081                       Cast.KRCWM:$mask, From.RC:$src,
1082                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1086 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1087                               v4f32x_info, vextract128_extract,
1088                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1089 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1090                               v2f64x_info, vextract128_extract,
1091                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1093 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1094                               v4i32x_info, vextract128_extract,
1095                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1096 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1097                               v4i32x_info, vextract128_extract,
1098                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1099 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1100                               v4i32x_info, vextract128_extract,
1101                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1102 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1103                               v2i64x_info, vextract128_extract,
1104                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1105 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1106                               v2i64x_info, vextract128_extract,
1107                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1108 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1109                               v2i64x_info, vextract128_extract,
1110                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1112 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1113                               v4f32x_info, vextract128_extract,
1114                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1115 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1116                               v2f64x_info, vextract128_extract,
1117                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1119 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1120                               v4i32x_info, vextract128_extract,
1121                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1122 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1123                               v4i32x_info, vextract128_extract,
1124                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1125 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1126                               v4i32x_info, vextract128_extract,
1127                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1128 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1129                               v2i64x_info, vextract128_extract,
1130                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1131 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1132                               v2i64x_info, vextract128_extract,
1133                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1134 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1135                               v2i64x_info, vextract128_extract,
1136                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1138 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1139                               v8f32x_info, vextract256_extract,
1140                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1141 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1142                               v4f64x_info, vextract256_extract,
1143                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1145 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1146                               v8i32x_info, vextract256_extract,
1147                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1148 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1149                               v8i32x_info, vextract256_extract,
1150                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1151 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1152                               v8i32x_info, vextract256_extract,
1153                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1154 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1155                               v4i64x_info, vextract256_extract,
1156                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1157 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1158                               v4i64x_info, vextract256_extract,
1159                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1160 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1161                               v4i64x_info, vextract256_extract,
1162                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1164 // vextractps - extract 32 bits from XMM
1165 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1166       (ins VR128X:$src1, u8imm:$src2),
1167       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1168       [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1169       EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1171 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1172       (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1173       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1174       [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1175                           addr:$dst)]>,
1176       EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1178 //===---------------------------------------------------------------------===//
1179 // AVX-512 BROADCAST
1180 //---
1181 // broadcast with a scalar argument.
1182 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1183                             string Name,
1184                             X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1185   def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1186             (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1187              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1188   def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1189                                        (X86VBroadcast SrcInfo.FRC:$src),
1190                                        DestInfo.RC:$src0)),
1191             (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1192              DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1193              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1194   def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1195                                        (X86VBroadcast SrcInfo.FRC:$src),
1196                                        DestInfo.ImmAllZerosV)),
1197             (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1198              DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1201 // Split version to allow mask and broadcast node to be different types. This
1202 // helps support the 32x2 broadcasts.
1203 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1204                                      string Name,
1205                                      SchedWrite SchedRR, SchedWrite SchedRM,
1206                                      X86VectorVTInfo MaskInfo,
1207                                      X86VectorVTInfo DestInfo,
1208                                      X86VectorVTInfo SrcInfo,
1209                                      bit IsConvertibleToThreeAddress,
1210                                      SDPatternOperator UnmaskedOp = X86VBroadcast,
1211                                      SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1212   let hasSideEffects = 0 in
1213   def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1214                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1215                     [(set MaskInfo.RC:$dst,
1216                       (MaskInfo.VT
1217                        (bitconvert
1218                         (DestInfo.VT
1219                          (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1220                     DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1221   def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1222                       (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1223                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1224                        "${dst} {${mask}} {z}, $src}"),
1225                        [(set MaskInfo.RC:$dst,
1226                          (vselect_mask MaskInfo.KRCWM:$mask,
1227                           (MaskInfo.VT
1228                            (bitconvert
1229                             (DestInfo.VT
1230                              (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1231                           MaskInfo.ImmAllZerosV))],
1232                        DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1233   let Constraints = "$src0 = $dst" in
1234   def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1235                      (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1236                           SrcInfo.RC:$src),
1237                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1238                      "${dst} {${mask}}, $src}"),
1239                      [(set MaskInfo.RC:$dst,
1240                        (vselect_mask MaskInfo.KRCWM:$mask,
1241                         (MaskInfo.VT
1242                          (bitconvert
1243                           (DestInfo.VT
1244                            (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1245                         MaskInfo.RC:$src0))],
1246                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1248   let hasSideEffects = 0, mayLoad = 1 in
1249   def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1250                     (ins SrcInfo.ScalarMemOp:$src),
1251                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1252                     [(set MaskInfo.RC:$dst,
1253                       (MaskInfo.VT
1254                        (bitconvert
1255                         (DestInfo.VT
1256                          (UnmaskedBcastOp addr:$src)))))],
1257                     DestInfo.ExeDomain>, T8PD, EVEX,
1258                     EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1260   def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1261                       (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1262                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1263                        "${dst} {${mask}} {z}, $src}"),
1264                        [(set MaskInfo.RC:$dst,
1265                          (vselect_mask MaskInfo.KRCWM:$mask,
1266                           (MaskInfo.VT
1267                            (bitconvert
1268                             (DestInfo.VT
1269                              (SrcInfo.BroadcastLdFrag addr:$src)))),
1270                           MaskInfo.ImmAllZerosV))],
1271                        DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1272                        EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1274   let Constraints = "$src0 = $dst",
1275       isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1276   def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1277                      (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1278                           SrcInfo.ScalarMemOp:$src),
1279                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1280                      "${dst} {${mask}}, $src}"),
1281                      [(set MaskInfo.RC:$dst,
1282                        (vselect_mask MaskInfo.KRCWM:$mask,
1283                         (MaskInfo.VT
1284                          (bitconvert
1285                           (DestInfo.VT
1286                            (SrcInfo.BroadcastLdFrag addr:$src)))),
1287                         MaskInfo.RC:$src0))],
1288                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1289                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1292 // Helper class to force mask and broadcast result to same type.
1293 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1294                                SchedWrite SchedRR, SchedWrite SchedRM,
1295                                X86VectorVTInfo DestInfo,
1296                                X86VectorVTInfo SrcInfo,
1297                                bit IsConvertibleToThreeAddress> :
1298   avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1299                             DestInfo, DestInfo, SrcInfo,
1300                             IsConvertibleToThreeAddress>;
1302 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1303                                                        AVX512VLVectorVTInfo _> {
1304   let Predicates = [HasAVX512] in {
1305     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1306                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1307               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1308                                       _.info128>,
1309               EVEX_V512;
1310   }
1312   let Predicates = [HasVLX] in {
1313     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1314                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1315                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1316                                          _.info128>,
1317                  EVEX_V256;
1318   }
1321 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1322                                                        AVX512VLVectorVTInfo _> {
1323   let Predicates = [HasAVX512] in {
1324     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1325                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1326               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1327                                       _.info128>,
1328               EVEX_V512;
1329   }
1331   let Predicates = [HasVLX] in {
1332     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1333                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1334                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1335                                          _.info128>,
1336                  EVEX_V256;
1337     defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1338                                      WriteFShuffle256Ld, _.info128, _.info128, 1>,
1339                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1340                                          _.info128>,
1341                  EVEX_V128;
1342   }
1344 defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1345                                        avx512vl_f32_info>;
1346 defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1347                                        avx512vl_f64_info>, VEX_W1X;
1349 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1350                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1351                                     RegisterClass SrcRC> {
1352   // Fold with a mask even if it has multiple uses since it is cheap.
1353   let ExeDomain = _.ExeDomain in
1354   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1355                           (ins SrcRC:$src),
1356                           "vpbroadcast"#_.Suffix, "$src", "$src",
1357                           (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1358                           /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1359                           T8PD, EVEX, Sched<[SchedRR]>;
1362 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1363                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1364                                     RegisterClass SrcRC, SubRegIndex Subreg> {
1365   let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1366   defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1367                          (outs _.RC:$dst), (ins GR32:$src),
1368                          !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1369                          !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1370                          "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1371                          "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1373   def : Pat <(_.VT (OpNode SrcRC:$src)),
1374              (!cast<Instruction>(Name#rr)
1375               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1377   // Fold with a mask even if it has multiple uses since it is cheap.
1378   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1379              (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1380               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1382   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1383              (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1384               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1387 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1388                       AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1389                       RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1390   let Predicates = [prd] in
1391     defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1392               OpNode, SrcRC, Subreg>, EVEX_V512;
1393   let Predicates = [prd, HasVLX] in {
1394     defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1395               _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1396     defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1397               _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1398   }
1401 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1402                                        SDPatternOperator OpNode,
1403                                        RegisterClass SrcRC, Predicate prd> {
1404   let Predicates = [prd] in
1405     defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1406                                       SrcRC>, EVEX_V512;
1407   let Predicates = [prd, HasVLX] in {
1408     defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1409                                          SrcRC>, EVEX_V256;
1410     defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1411                                          SrcRC>, EVEX_V128;
1412   }
1415 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1416                        avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1417 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1418                        avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1419                        HasBWI>;
1420 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1421                                                  X86VBroadcast, GR32, HasAVX512>;
1422 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1423                                                  X86VBroadcast, GR64, HasAVX512>, VEX_W;
1425 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1426                                         AVX512VLVectorVTInfo _, Predicate prd,
1427                                         bit IsConvertibleToThreeAddress> {
1428   let Predicates = [prd] in {
1429     defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1430                                    WriteShuffle256Ld, _.info512, _.info128,
1431                                    IsConvertibleToThreeAddress>,
1432                                   EVEX_V512;
1433   }
1434   let Predicates = [prd, HasVLX] in {
1435     defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1436                                     WriteShuffle256Ld, _.info256, _.info128,
1437                                     IsConvertibleToThreeAddress>,
1438                                  EVEX_V256;
1439     defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1440                                     WriteShuffleXLd, _.info128, _.info128,
1441                                     IsConvertibleToThreeAddress>,
1442                                  EVEX_V128;
1443   }
1446 defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1447                                            avx512vl_i8_info, HasBWI, 0>;
1448 defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1449                                            avx512vl_i16_info, HasBWI, 0>;
1450 defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1451                                            avx512vl_i32_info, HasAVX512, 1>;
1452 defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1453                                            avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1455 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1456                                       SDPatternOperator OpNode,
1457                                       X86VectorVTInfo _Dst,
1458                                       X86VectorVTInfo _Src> {
1459   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1460                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1461                            (_Dst.VT (OpNode addr:$src))>,
1462                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1463                            AVX5128IBase, EVEX;
1466 // This should be used for the AVX512DQ broadcast instructions. It disables
1467 // the unmasked patterns so that we only use the DQ instructions when masking
1468 //  is requested.
1469 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1470                                          SDPatternOperator OpNode,
1471                                          X86VectorVTInfo _Dst,
1472                                          X86VectorVTInfo _Src> {
1473   let hasSideEffects = 0, mayLoad = 1 in
1474   defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1475                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1476                            (null_frag),
1477                            (_Dst.VT (OpNode addr:$src))>,
1478                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1479                            AVX5128IBase, EVEX;
1481 let Predicates = [HasFP16] in {
1482   def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1483             (VPBROADCASTWZrm addr:$src)>;
1485   def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1486             (VPBROADCASTWZrr VR128X:$src)>;
1487   def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1488             (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1490 let Predicates = [HasVLX, HasFP16] in {
1491   def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1492             (VPBROADCASTWZ128rm addr:$src)>;
1493   def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1494             (VPBROADCASTWZ256rm addr:$src)>;
1496   def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1497             (VPBROADCASTWZ128rr VR128X:$src)>;
1498   def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1499             (VPBROADCASTWZ256rr VR128X:$src)>;
1501   def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1502             (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1503   def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1504             (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1507 //===----------------------------------------------------------------------===//
1508 // AVX-512 BROADCAST SUBVECTORS
1511 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1512                        X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1513                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1514 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1515                        X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1516                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1517 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1518                        X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W,
1519                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1520 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1521                        X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W,
1522                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1524 let Predicates = [HasAVX512] in {
1525 def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1526           (VBROADCASTF64X4rm addr:$src)>;
1527 def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1528           (VBROADCASTF64X4rm addr:$src)>;
1529 def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1530           (VBROADCASTF64X4rm addr:$src)>;
1531 def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1532           (VBROADCASTI64X4rm addr:$src)>;
1533 def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1534           (VBROADCASTI64X4rm addr:$src)>;
1535 def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1536           (VBROADCASTI64X4rm addr:$src)>;
1537 def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1538           (VBROADCASTI64X4rm addr:$src)>;
1540 def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1541           (VBROADCASTF32X4rm addr:$src)>;
1542 def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1543           (VBROADCASTF32X4rm addr:$src)>;
1544 def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1545           (VBROADCASTF32X4rm addr:$src)>;
1546 def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1547           (VBROADCASTI32X4rm addr:$src)>;
1548 def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1549           (VBROADCASTI32X4rm addr:$src)>;
1550 def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1551           (VBROADCASTI32X4rm addr:$src)>;
1552 def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1553           (VBROADCASTI32X4rm addr:$src)>;
1555 // Patterns for selects of bitcasted operations.
1556 def : Pat<(vselect_mask VK16WM:$mask,
1557                         (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1558                         (v16f32 immAllZerosV)),
1559           (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1560 def : Pat<(vselect_mask VK16WM:$mask,
1561                         (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1562                         VR512:$src0),
1563           (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1564 def : Pat<(vselect_mask VK16WM:$mask,
1565                         (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1566                         (v16i32 immAllZerosV)),
1567           (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1568 def : Pat<(vselect_mask VK16WM:$mask,
1569                         (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1570                         VR512:$src0),
1571           (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1573 def : Pat<(vselect_mask VK8WM:$mask,
1574                         (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1575                         (v8f64 immAllZerosV)),
1576           (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1577 def : Pat<(vselect_mask VK8WM:$mask,
1578                         (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1579                         VR512:$src0),
1580           (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1581 def : Pat<(vselect_mask VK8WM:$mask,
1582                         (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1583                         (v8i64 immAllZerosV)),
1584           (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1585 def : Pat<(vselect_mask VK8WM:$mask,
1586                         (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1587                         VR512:$src0),
1588           (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1591 let Predicates = [HasVLX] in {
1592 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1593                            X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1594                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1595 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1596                            X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1597                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1599 def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1600           (VBROADCASTF32X4Z256rm addr:$src)>;
1601 def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1602           (VBROADCASTF32X4Z256rm addr:$src)>;
1603 def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1604           (VBROADCASTF32X4Z256rm addr:$src)>;
1605 def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1606           (VBROADCASTI32X4Z256rm addr:$src)>;
1607 def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1608           (VBROADCASTI32X4Z256rm addr:$src)>;
1609 def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1610           (VBROADCASTI32X4Z256rm addr:$src)>;
1611 def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1612           (VBROADCASTI32X4Z256rm addr:$src)>;
1614 // Patterns for selects of bitcasted operations.
1615 def : Pat<(vselect_mask VK8WM:$mask,
1616                         (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1617                         (v8f32 immAllZerosV)),
1618           (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1619 def : Pat<(vselect_mask VK8WM:$mask,
1620                         (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1621                         VR256X:$src0),
1622           (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1623 def : Pat<(vselect_mask VK8WM:$mask,
1624                         (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1625                         (v8i32 immAllZerosV)),
1626           (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1627 def : Pat<(vselect_mask VK8WM:$mask,
1628                         (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1629                         VR256X:$src0),
1630           (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1633 let Predicates = [HasVLX, HasDQI] in {
1634 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1635                            X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
1636                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1637 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1638                            X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
1639                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1641 // Patterns for selects of bitcasted operations.
1642 def : Pat<(vselect_mask VK4WM:$mask,
1643                         (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1644                         (v4f64 immAllZerosV)),
1645           (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1646 def : Pat<(vselect_mask VK4WM:$mask,
1647                         (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1648                         VR256X:$src0),
1649           (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1650 def : Pat<(vselect_mask VK4WM:$mask,
1651                         (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1652                         (v4i64 immAllZerosV)),
1653           (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1654 def : Pat<(vselect_mask VK4WM:$mask,
1655                         (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1656                         VR256X:$src0),
1657           (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1660 let Predicates = [HasDQI] in {
1661 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1662                        X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W,
1663                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1664 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1665                        X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1666                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1667 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1668                        X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W,
1669                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1670 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1671                        X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1672                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1674 // Patterns for selects of bitcasted operations.
1675 def : Pat<(vselect_mask VK16WM:$mask,
1676                         (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1677                         (v16f32 immAllZerosV)),
1678           (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1679 def : Pat<(vselect_mask VK16WM:$mask,
1680                         (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1681                         VR512:$src0),
1682           (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1683 def : Pat<(vselect_mask VK16WM:$mask,
1684                         (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1685                         (v16i32 immAllZerosV)),
1686           (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1687 def : Pat<(vselect_mask VK16WM:$mask,
1688                         (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1689                         VR512:$src0),
1690           (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1692 def : Pat<(vselect_mask VK8WM:$mask,
1693                         (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1694                         (v8f64 immAllZerosV)),
1695           (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1696 def : Pat<(vselect_mask VK8WM:$mask,
1697                         (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1698                         VR512:$src0),
1699           (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1700 def : Pat<(vselect_mask VK8WM:$mask,
1701                         (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1702                         (v8i64 immAllZerosV)),
1703           (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1704 def : Pat<(vselect_mask VK8WM:$mask,
1705                         (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1706                         VR512:$src0),
1707           (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1710 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1711                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1712   let Predicates = [HasDQI] in
1713     defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1714                                           WriteShuffle256Ld, _Dst.info512,
1715                                           _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1716                                           EVEX_V512;
1717   let Predicates = [HasDQI, HasVLX] in
1718     defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1719                                           WriteShuffle256Ld, _Dst.info256,
1720                                           _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1721                                           EVEX_V256;
1724 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1725                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1726   avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1728   let Predicates = [HasDQI, HasVLX] in
1729     defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1730                                           WriteShuffleXLd, _Dst.info128,
1731                                           _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1732                                           EVEX_V128;
1735 defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1736                                           avx512vl_i32_info, avx512vl_i64_info>;
1737 defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1738                                           avx512vl_f32_info, avx512vl_f64_info>;
1740 //===----------------------------------------------------------------------===//
1741 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1742 //---
1743 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1744                                   X86VectorVTInfo _, RegisterClass KRC> {
1745   def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1746                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1747                   [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1748                   EVEX, Sched<[WriteShuffle]>;
1751 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1752                                  AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1753   let Predicates = [HasCDI] in
1754     defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1755   let Predicates = [HasCDI, HasVLX] in {
1756     defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1757     defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1758   }
1761 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1762                                                avx512vl_i32_info, VK16>;
1763 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1764                                                avx512vl_i64_info, VK8>, VEX_W;
1766 //===----------------------------------------------------------------------===//
1767 // -- VPERMI2 - 3 source operands form --
1768 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1769                          X86FoldableSchedWrite sched,
1770                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1771 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1772     hasSideEffects = 0 in {
1773   defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1774           (ins _.RC:$src2, _.RC:$src3),
1775           OpcodeStr, "$src3, $src2", "$src2, $src3",
1776           (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1777           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1779   let mayLoad = 1 in
1780   defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1781             (ins _.RC:$src2, _.MemOp:$src3),
1782             OpcodeStr, "$src3, $src2", "$src2, $src3",
1783             (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1784                    (_.VT (_.LdFrag addr:$src3)))), 1>,
1785             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1786   }
1789 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1790                             X86FoldableSchedWrite sched,
1791                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1792   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1793       hasSideEffects = 0, mayLoad = 1 in
1794   defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1795               (ins _.RC:$src2, _.ScalarMemOp:$src3),
1796               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1797               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1798               (_.VT (X86VPermt2 _.RC:$src2,
1799                IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1800               AVX5128IBase, EVEX_4V, EVEX_B,
1801               Sched<[sched.Folded, sched.ReadAfterFold]>;
1804 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1805                                X86FoldableSchedWrite sched,
1806                                AVX512VLVectorVTInfo VTInfo,
1807                                AVX512VLVectorVTInfo ShuffleMask> {
1808   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1809                            ShuffleMask.info512>,
1810             avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1811                              ShuffleMask.info512>, EVEX_V512;
1812   let Predicates = [HasVLX] in {
1813   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1814                                ShuffleMask.info128>,
1815                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1816                                   ShuffleMask.info128>, EVEX_V128;
1817   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1818                                ShuffleMask.info256>,
1819                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1820                                   ShuffleMask.info256>, EVEX_V256;
1821   }
1824 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1825                                   X86FoldableSchedWrite sched,
1826                                   AVX512VLVectorVTInfo VTInfo,
1827                                   AVX512VLVectorVTInfo Idx,
1828                                   Predicate Prd> {
1829   let Predicates = [Prd] in
1830   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1831                            Idx.info512>, EVEX_V512;
1832   let Predicates = [Prd, HasVLX] in {
1833   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1834                                Idx.info128>, EVEX_V128;
1835   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1836                                Idx.info256>,  EVEX_V256;
1837   }
1840 defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1841                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1842 defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1843                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1844 defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1845                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1846                   VEX_W, EVEX_CD8<16, CD8VF>;
1847 defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1848                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1849                   EVEX_CD8<8, CD8VF>;
1850 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1851                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1852 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1853                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1855 // Extra patterns to deal with extra bitcasts due to passthru and index being
1856 // different types on the fp versions.
1857 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1858                                   X86VectorVTInfo IdxVT,
1859                                   X86VectorVTInfo CastVT> {
1860   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1861                                 (X86VPermt2 (_.VT _.RC:$src2),
1862                                             (IdxVT.VT (bitconvert
1863                                                        (CastVT.VT _.RC:$src1))),
1864                                             _.RC:$src3),
1865                                 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1866             (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1867                                                 _.RC:$src2, _.RC:$src3)>;
1868   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1869                                 (X86VPermt2 _.RC:$src2,
1870                                             (IdxVT.VT (bitconvert
1871                                                        (CastVT.VT _.RC:$src1))),
1872                                             (_.LdFrag addr:$src3)),
1873                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1874             (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1875                                                 _.RC:$src2, addr:$src3)>;
1876   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1877                                  (X86VPermt2 _.RC:$src2,
1878                                              (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1879                                              (_.BroadcastLdFrag addr:$src3)),
1880                                  (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1881             (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1882                                                  _.RC:$src2, addr:$src3)>;
1885 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1886 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1887 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1888 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1890 // VPERMT2
1891 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1892                          X86FoldableSchedWrite sched,
1893                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1894 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1895   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1896           (ins IdxVT.RC:$src2, _.RC:$src3),
1897           OpcodeStr, "$src3, $src2", "$src2, $src3",
1898           (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1899           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1901   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1902             (ins IdxVT.RC:$src2, _.MemOp:$src3),
1903             OpcodeStr, "$src3, $src2", "$src2, $src3",
1904             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1905                    (_.LdFrag addr:$src3))), 1>,
1906             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1907   }
1909 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1910                             X86FoldableSchedWrite sched,
1911                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1912   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1913   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1914               (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1915               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1916               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1917               (_.VT (X86VPermt2 _.RC:$src1,
1918                IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1919               AVX5128IBase, EVEX_4V, EVEX_B,
1920               Sched<[sched.Folded, sched.ReadAfterFold]>;
1923 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1924                                X86FoldableSchedWrite sched,
1925                                AVX512VLVectorVTInfo VTInfo,
1926                                AVX512VLVectorVTInfo ShuffleMask> {
1927   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1928                               ShuffleMask.info512>,
1929             avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1930                               ShuffleMask.info512>, EVEX_V512;
1931   let Predicates = [HasVLX] in {
1932   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1933                               ShuffleMask.info128>,
1934                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1935                               ShuffleMask.info128>, EVEX_V128;
1936   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1937                               ShuffleMask.info256>,
1938                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1939                               ShuffleMask.info256>, EVEX_V256;
1940   }
1943 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1944                                   X86FoldableSchedWrite sched,
1945                                   AVX512VLVectorVTInfo VTInfo,
1946                                   AVX512VLVectorVTInfo Idx, Predicate Prd> {
1947   let Predicates = [Prd] in
1948   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1949                            Idx.info512>, EVEX_V512;
1950   let Predicates = [Prd, HasVLX] in {
1951   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1952                                Idx.info128>, EVEX_V128;
1953   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1954                                Idx.info256>, EVEX_V256;
1955   }
1958 defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1959                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1960 defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1961                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1962 defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1963                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1964                   VEX_W, EVEX_CD8<16, CD8VF>;
1965 defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1966                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1967                   EVEX_CD8<8, CD8VF>;
1968 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1969                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1970 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1971                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1973 //===----------------------------------------------------------------------===//
1974 // AVX-512 - BLEND using mask
1977 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1978                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1979   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1980   def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1981              (ins _.RC:$src1, _.RC:$src2),
1982              !strconcat(OpcodeStr,
1983              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1984              EVEX_4V, Sched<[sched]>;
1985   def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1986              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1987              !strconcat(OpcodeStr,
1988              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1989              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1990   def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1991              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1992              !strconcat(OpcodeStr,
1993              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1994              []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1995   let mayLoad = 1 in {
1996   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1997              (ins _.RC:$src1, _.MemOp:$src2),
1998              !strconcat(OpcodeStr,
1999              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
2000              []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
2001              Sched<[sched.Folded, sched.ReadAfterFold]>;
2002   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2003              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2004              !strconcat(OpcodeStr,
2005              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
2006              []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
2007              Sched<[sched.Folded, sched.ReadAfterFold]>;
2008   def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2009              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2010              !strconcat(OpcodeStr,
2011              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
2012              []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
2013              Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2014   }
2015   }
2017 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
2018                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2019   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
2020   def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2021       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2022        !strconcat(OpcodeStr,
2023             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2024             "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2025       EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2026       Sched<[sched.Folded, sched.ReadAfterFold]>;
2028   def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2029       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2030        !strconcat(OpcodeStr,
2031             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2032             "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2033       EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2034       Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2036   def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2037       (ins _.RC:$src1, _.ScalarMemOp:$src2),
2038        !strconcat(OpcodeStr,
2039             "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2040             "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2041       EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2042       Sched<[sched.Folded, sched.ReadAfterFold]>;
2043   }
2046 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2047                         AVX512VLVectorVTInfo VTInfo> {
2048   defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2049            WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2050                                  EVEX_V512;
2052   let Predicates = [HasVLX] in {
2053     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2054                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2055                                       EVEX_V256;
2056     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2057                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2058                                       EVEX_V128;
2059   }
2062 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2063                         AVX512VLVectorVTInfo VTInfo> {
2064   let Predicates = [HasBWI] in
2065     defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2066                                EVEX_V512;
2068   let Predicates = [HasBWI, HasVLX] in {
2069     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2070                                   EVEX_V256;
2071     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2072                                   EVEX_V128;
2073   }
2076 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2077                               avx512vl_f32_info>;
2078 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2079                               avx512vl_f64_info>, VEX_W;
2080 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2081                               avx512vl_i32_info>;
2082 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2083                               avx512vl_i64_info>, VEX_W;
2084 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2085                               avx512vl_i8_info>;
2086 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2087                               avx512vl_i16_info>, VEX_W;
2089 //===----------------------------------------------------------------------===//
2090 // Compare Instructions
2091 //===----------------------------------------------------------------------===//
2093 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2095 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2096                              PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2097                              X86FoldableSchedWrite sched> {
2098   defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2099                       (outs _.KRC:$dst),
2100                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2101                       "vcmp"#_.Suffix,
2102                       "$cc, $src2, $src1", "$src1, $src2, $cc",
2103                       (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2104                       (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2105                                  timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2106   let mayLoad = 1 in
2107   defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2108                     (outs _.KRC:$dst),
2109                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2110                     "vcmp"#_.Suffix,
2111                     "$cc, $src2, $src1", "$src1, $src2, $cc",
2112                     (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2113                         timm:$cc),
2114                     (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2115                         timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2116                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2118   let Uses = [MXCSR] in
2119   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2120                      (outs _.KRC:$dst),
2121                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2122                      "vcmp"#_.Suffix,
2123                      "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2124                      (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2125                                 timm:$cc),
2126                      (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2127                                    timm:$cc)>,
2128                      EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2130   let isCodeGenOnly = 1 in {
2131     let isCommutable = 1 in
2132     def rr : AVX512Ii8<0xC2, MRMSrcReg,
2133                 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2134                 !strconcat("vcmp", _.Suffix,
2135                            "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2136                 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2137                                           _.FRC:$src2,
2138                                           timm:$cc))]>,
2139                 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2140     def rm : AVX512Ii8<0xC2, MRMSrcMem,
2141               (outs _.KRC:$dst),
2142               (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2143               !strconcat("vcmp", _.Suffix,
2144                          "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2145               [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2146                                         (_.ScalarLdFrag addr:$src2),
2147                                         timm:$cc))]>,
2148               EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2149               Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2150   }
2153 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2154                           (X86cmpms node:$src1, node:$src2, node:$cc), [{
2155   return N->hasOneUse();
2156 }]>;
2157 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2158                           (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2159   return N->hasOneUse();
2160 }]>;
2162 let Predicates = [HasAVX512] in {
2163   let ExeDomain = SSEPackedSingle in
2164   defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2165                                    X86cmpms_su, X86cmpmsSAE_su,
2166                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2167   let ExeDomain = SSEPackedDouble in
2168   defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2169                                    X86cmpms_su, X86cmpmsSAE_su,
2170                                    SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2172 let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2173   defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2174                                    X86cmpms_su, X86cmpmsSAE_su,
2175                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2177 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2178                               X86FoldableSchedWrite sched,
2179                               X86VectorVTInfo _, bit IsCommutable> {
2180   let isCommutable = IsCommutable, hasSideEffects = 0 in
2181   def rr : AVX512BI<opc, MRMSrcReg,
2182              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2183              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2184              []>, EVEX_4V, Sched<[sched]>;
2185   let mayLoad = 1, hasSideEffects = 0 in
2186   def rm : AVX512BI<opc, MRMSrcMem,
2187              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2188              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2189              []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2190   let isCommutable = IsCommutable, hasSideEffects = 0 in
2191   def rrk : AVX512BI<opc, MRMSrcReg,
2192               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2193               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2194                           "$dst {${mask}}, $src1, $src2}"),
2195               []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2196   let mayLoad = 1, hasSideEffects = 0 in
2197   def rmk : AVX512BI<opc, MRMSrcMem,
2198               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2199               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2200                           "$dst {${mask}}, $src1, $src2}"),
2201               []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2204 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2205                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
2206                                   bit IsCommutable> :
2207            avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2208   let mayLoad = 1, hasSideEffects = 0 in {
2209   def rmb : AVX512BI<opc, MRMSrcMem,
2210               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2211               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2212                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2213               []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2214   def rmbk : AVX512BI<opc, MRMSrcMem,
2215                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2216                                        _.ScalarMemOp:$src2),
2217                !strconcat(OpcodeStr,
2218                           "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2219                           "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2220                []>, EVEX_4V, EVEX_K, EVEX_B,
2221                Sched<[sched.Folded, sched.ReadAfterFold]>;
2222   }
2225 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2226                                  X86SchedWriteWidths sched,
2227                                  AVX512VLVectorVTInfo VTInfo, Predicate prd,
2228                                  bit IsCommutable = 0> {
2229   let Predicates = [prd] in
2230   defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2231                               VTInfo.info512, IsCommutable>, EVEX_V512;
2233   let Predicates = [prd, HasVLX] in {
2234     defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2235                                    VTInfo.info256, IsCommutable>, EVEX_V256;
2236     defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2237                                    VTInfo.info128, IsCommutable>, EVEX_V128;
2238   }
2241 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2242                                      X86SchedWriteWidths sched,
2243                                      AVX512VLVectorVTInfo VTInfo,
2244                                      Predicate prd, bit IsCommutable = 0> {
2245   let Predicates = [prd] in
2246   defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2247                                   VTInfo.info512, IsCommutable>, EVEX_V512;
2249   let Predicates = [prd, HasVLX] in {
2250     defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2251                                        VTInfo.info256, IsCommutable>, EVEX_V256;
2252     defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2253                                        VTInfo.info128, IsCommutable>, EVEX_V128;
2254   }
2257 // This fragment treats X86cmpm as commutable to help match loads in both
2258 // operands for PCMPEQ.
2259 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2260 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2261                          (setcc node:$src1, node:$src2, SETGT)>;
2263 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2264 // increase the pattern complexity the way an immediate would.
2265 let AddedComplexity = 2 in {
2266 // FIXME: Is there a better scheduler class for VPCMP?
2267 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2268                       SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2269                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2271 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2272                       SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2273                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2275 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2276                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2277                 EVEX_CD8<32, CD8VF>;
2279 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2280                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2281                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2283 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2284                       SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2285                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2287 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2288                       SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2289                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2291 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2292                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2293                 EVEX_CD8<32, CD8VF>;
2295 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2296                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2297                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2300 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2301   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2302   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2303   return getI8Imm(SSECC, SDLoc(N));
2304 }]>;
2306 // Swapped operand version of the above.
2307 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2308   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2309   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2310   SSECC = X86::getSwappedVPCMPImm(SSECC);
2311   return getI8Imm(SSECC, SDLoc(N));
2312 }]>;
2314 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2315                           PatFrag Frag_su,
2316                           X86FoldableSchedWrite sched,
2317                           X86VectorVTInfo _, string Name> {
2318   let isCommutable = 1 in
2319   def rri : AVX512AIi8<opc, MRMSrcReg,
2320              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2321              !strconcat("vpcmp", Suffix,
2322                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2323              [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2324                                                 (_.VT _.RC:$src2),
2325                                                 cond)))]>,
2326              EVEX_4V, Sched<[sched]>;
2327   def rmi : AVX512AIi8<opc, MRMSrcMem,
2328              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2329              !strconcat("vpcmp", Suffix,
2330                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2331              [(set _.KRC:$dst, (_.KVT
2332                                 (Frag:$cc
2333                                  (_.VT _.RC:$src1),
2334                                  (_.VT (_.LdFrag addr:$src2)),
2335                                  cond)))]>,
2336              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2337   let isCommutable = 1 in
2338   def rrik : AVX512AIi8<opc, MRMSrcReg,
2339               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2340                                       u8imm:$cc),
2341               !strconcat("vpcmp", Suffix,
2342                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2343                          "$dst {${mask}}, $src1, $src2, $cc}"),
2344               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2345                                      (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2346                                                          (_.VT _.RC:$src2),
2347                                                          cond))))]>,
2348               EVEX_4V, EVEX_K, Sched<[sched]>;
2349   def rmik : AVX512AIi8<opc, MRMSrcMem,
2350               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2351                                     u8imm:$cc),
2352               !strconcat("vpcmp", Suffix,
2353                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2354                          "$dst {${mask}}, $src1, $src2, $cc}"),
2355               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2356                                      (_.KVT
2357                                       (Frag_su:$cc
2358                                        (_.VT _.RC:$src1),
2359                                        (_.VT (_.LdFrag addr:$src2)),
2360                                        cond))))]>,
2361               EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2363   def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2364                              (_.VT _.RC:$src1), cond)),
2365             (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2366              _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2368   def : Pat<(and _.KRCWM:$mask,
2369                  (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2370                                      (_.VT _.RC:$src1), cond))),
2371             (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2372              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2373              (X86pcmpm_imm_commute $cc))>;
2376 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2377                               PatFrag Frag_su, X86FoldableSchedWrite sched,
2378                               X86VectorVTInfo _, string Name> :
2379            avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2380   def rmib : AVX512AIi8<opc, MRMSrcMem,
2381              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2382                                      u8imm:$cc),
2383              !strconcat("vpcmp", Suffix,
2384                         "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2385                         "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2386              [(set _.KRC:$dst, (_.KVT (Frag:$cc
2387                                        (_.VT _.RC:$src1),
2388                                        (_.BroadcastLdFrag addr:$src2),
2389                                        cond)))]>,
2390              EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2391   def rmibk : AVX512AIi8<opc, MRMSrcMem,
2392               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2393                                        _.ScalarMemOp:$src2, u8imm:$cc),
2394               !strconcat("vpcmp", Suffix,
2395                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2396                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2397               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2398                                      (_.KVT (Frag_su:$cc
2399                                              (_.VT _.RC:$src1),
2400                                              (_.BroadcastLdFrag addr:$src2),
2401                                              cond))))]>,
2402               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2404   def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2405                     (_.VT _.RC:$src1), cond)),
2406             (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2407              _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2409   def : Pat<(and _.KRCWM:$mask,
2410                  (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2411                                      (_.VT _.RC:$src1), cond))),
2412             (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2413              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2414              (X86pcmpm_imm_commute $cc))>;
2417 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2418                              PatFrag Frag_su, X86SchedWriteWidths sched,
2419                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2420   let Predicates = [prd] in
2421   defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2422                           sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2424   let Predicates = [prd, HasVLX] in {
2425     defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2426                                sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2427     defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2428                                sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2429   }
2432 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2433                                  PatFrag Frag_su, X86SchedWriteWidths sched,
2434                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2435   let Predicates = [prd] in
2436   defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2437                               sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2439   let Predicates = [prd, HasVLX] in {
2440     defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2441                                    sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2442     defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2443                                    sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2444   }
2447 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2448                        (setcc node:$src1, node:$src2, node:$cc), [{
2449   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2450   return !ISD::isUnsignedIntSetCC(CC);
2451 }], X86pcmpm_imm>;
2453 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2454                           (setcc node:$src1, node:$src2, node:$cc), [{
2455   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2456   return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2457 }], X86pcmpm_imm>;
2459 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2460                         (setcc node:$src1, node:$src2, node:$cc), [{
2461   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2462   return ISD::isUnsignedIntSetCC(CC);
2463 }], X86pcmpm_imm>;
2465 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2466                            (setcc node:$src1, node:$src2, node:$cc), [{
2467   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2468   return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2469 }], X86pcmpm_imm>;
2471 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2472 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2473                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2474                                 EVEX_CD8<8, CD8VF>;
2475 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2476                                  SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2477                                  EVEX_CD8<8, CD8VF>;
2479 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2480                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2481                                 VEX_W, EVEX_CD8<16, CD8VF>;
2482 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2483                                  SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2484                                  VEX_W, EVEX_CD8<16, CD8VF>;
2486 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2487                                     SchedWriteVecALU, avx512vl_i32_info,
2488                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2489 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2490                                      SchedWriteVecALU, avx512vl_i32_info,
2491                                      HasAVX512>, EVEX_CD8<32, CD8VF>;
2493 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2494                                     SchedWriteVecALU, avx512vl_i64_info,
2495                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2496 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2497                                      SchedWriteVecALU, avx512vl_i64_info,
2498                                      HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2500 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2501                          (X86cmpm node:$src1, node:$src2, node:$cc), [{
2502   return N->hasOneUse();
2503 }]>;
2505 def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2506   uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2507   return getI8Imm(Imm, SDLoc(N));
2508 }]>;
2510 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2511                               string Name> {
2512 let Uses = [MXCSR], mayRaiseFPException = 1 in {
2513   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2514                    (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2515                    "vcmp"#_.Suffix,
2516                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2517                    (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2518                    (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2519                    1>, Sched<[sched]>;
2521   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2522                 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2523                 "vcmp"#_.Suffix,
2524                 "$cc, $src2, $src1", "$src1, $src2, $cc",
2525                 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2526                              timm:$cc),
2527                 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2528                             timm:$cc)>,
2529                 Sched<[sched.Folded, sched.ReadAfterFold]>;
2531   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2532                 (outs _.KRC:$dst),
2533                 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2534                 "vcmp"#_.Suffix,
2535                 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2536                 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2537                 (X86any_cmpm (_.VT _.RC:$src1),
2538                              (_.VT (_.BroadcastLdFrag addr:$src2)),
2539                              timm:$cc),
2540                 (X86cmpm_su (_.VT _.RC:$src1),
2541                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2542                             timm:$cc)>,
2543                 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2544   }
2546   // Patterns for selecting with loads in other operand.
2547   def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2548                          timm:$cc),
2549             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2550                                                       (X86cmpm_imm_commute timm:$cc))>;
2552   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2553                                             (_.VT _.RC:$src1),
2554                                             timm:$cc)),
2555             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2556                                                        _.RC:$src1, addr:$src2,
2557                                                        (X86cmpm_imm_commute timm:$cc))>;
2559   def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2560                          (_.VT _.RC:$src1), timm:$cc),
2561             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2562                                                        (X86cmpm_imm_commute timm:$cc))>;
2564   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2565                                             (_.VT _.RC:$src1),
2566                                             timm:$cc)),
2567             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2568                                                         _.RC:$src1, addr:$src2,
2569                                                         (X86cmpm_imm_commute timm:$cc))>;
2571   // Patterns for mask intrinsics.
2572   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2573                       (_.KVT immAllOnesV)),
2574             (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2576   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2577             (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2578                                                        _.RC:$src2, timm:$cc)>;
2580   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2581                       (_.KVT immAllOnesV)),
2582             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2584   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2585                       _.KRCWM:$mask),
2586             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2587                                                        addr:$src2, timm:$cc)>;
2589   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2590                       (_.KVT immAllOnesV)),
2591             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2593   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2594                       _.KRCWM:$mask),
2595             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2596                                                         addr:$src2, timm:$cc)>;
2598   // Patterns for mask intrinsics with loads in other operand.
2599   def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2600                       (_.KVT immAllOnesV)),
2601             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2602                                                       (X86cmpm_imm_commute timm:$cc))>;
2604   def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2605                       _.KRCWM:$mask),
2606             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2607                                                        _.RC:$src1, addr:$src2,
2608                                                        (X86cmpm_imm_commute timm:$cc))>;
2610   def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2611                       (_.KVT immAllOnesV)),
2612             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2613                                                        (X86cmpm_imm_commute timm:$cc))>;
2615   def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2616                       _.KRCWM:$mask),
2617             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2618                                                         _.RC:$src1, addr:$src2,
2619                                                         (X86cmpm_imm_commute  timm:$cc))>;
2622 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2623   // comparison code form (VCMP[EQ/LT/LE/...]
2624   let Uses = [MXCSR] in
2625   defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2626                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2627                      (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2628                      "vcmp"#_.Suffix,
2629                      "$cc, {sae}, $src2, $src1",
2630                      "$src1, $src2, {sae}, $cc",
2631                      [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2632                                         (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2633                      [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2634                                         (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2635                      EVEX_B, Sched<[sched]>;
2638 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2639                        Predicate Pred = HasAVX512> {
2640   let Predicates = [Pred] in {
2641     defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2642                 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2644   }
2645   let Predicates = [Pred,HasVLX] in {
2646    defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2647    defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2648   }
2651 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2652                           AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2653 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2654                           AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2655 defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2656                           AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;
2658 // Patterns to select fp compares with load as first operand.
2659 let Predicates = [HasAVX512] in {
2660   def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2661             (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2663   def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2664             (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2667 let Predicates = [HasFP16] in {
2668   def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2669             (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2672 // ----------------------------------------------------------------
2673 // FPClass
2675 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2676                               (X86Vfpclasss node:$src1, node:$src2), [{
2677   return N->hasOneUse();
2678 }]>;
2680 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2681                              (X86Vfpclass node:$src1, node:$src2), [{
2682   return N->hasOneUse();
2683 }]>;
2685 //handle fpclass instruction  mask =  op(reg_scalar,imm)
2686 //                                    op(mem_scalar,imm)
2687 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2688                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2689                                  Predicate prd> {
2690   let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2691       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2692                       (ins _.RC:$src1, i32u8imm:$src2),
2693                       OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2694                       [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2695                               (i32 timm:$src2)))]>,
2696                       Sched<[sched]>;
2697       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2698                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2699                       OpcodeStr#_.Suffix#
2700                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2701                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2702                                       (X86Vfpclasss_su (_.VT _.RC:$src1),
2703                                       (i32 timm:$src2))))]>,
2704                       EVEX_K, Sched<[sched]>;
2705     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2706                     (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2707                     OpcodeStr#_.Suffix#
2708                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2709                     [(set _.KRC:$dst,
2710                           (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2711                                         (i32 timm:$src2)))]>,
2712                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2713     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2714                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2715                     OpcodeStr#_.Suffix#
2716                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2717                     [(set _.KRC:$dst,(and _.KRCWM:$mask,
2718                         (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2719                             (i32 timm:$src2))))]>,
2720                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2721   }
2724 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2725 //                                  fpclass(reg_vec, mem_vec, imm)
2726 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
2727 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2728                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2729                                  string mem>{
2730   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2731   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2732                       (ins _.RC:$src1, i32u8imm:$src2),
2733                       OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2734                       [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2735                                        (i32 timm:$src2)))]>,
2736                       Sched<[sched]>;
2737   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2738                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2739                       OpcodeStr#_.Suffix#
2740                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2741                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2742                                        (X86Vfpclass_su (_.VT _.RC:$src1),
2743                                        (i32 timm:$src2))))]>,
2744                       EVEX_K, Sched<[sched]>;
2745   def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2746                     (ins _.MemOp:$src1, i32u8imm:$src2),
2747                     OpcodeStr#_.Suffix#"{"#mem#"}"#
2748                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2749                     [(set _.KRC:$dst,(X86Vfpclass
2750                                      (_.VT (_.LdFrag addr:$src1)),
2751                                      (i32 timm:$src2)))]>,
2752                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2753   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2754                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2755                     OpcodeStr#_.Suffix#"{"#mem#"}"#
2756                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2757                     [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2758                                   (_.VT (_.LdFrag addr:$src1)),
2759                                   (i32 timm:$src2))))]>,
2760                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2761   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2762                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2763                     OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2764                                       _.BroadcastStr#", $dst|$dst, ${src1}"
2765                                                   #_.BroadcastStr#", $src2}",
2766                     [(set _.KRC:$dst,(X86Vfpclass
2767                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2768                                      (i32 timm:$src2)))]>,
2769                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2770   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2771                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2772                     OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2773                           _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2774                                                    _.BroadcastStr#", $src2}",
2775                     [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2776                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2777                                      (i32 timm:$src2))))]>,
2778                     EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2779   }
2781   // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2782   // the memory form.
2783   def : InstAlias<OpcodeStr#_.Suffix#mem#
2784                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2785                   (!cast<Instruction>(NAME#"rr")
2786                    _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2787   def : InstAlias<OpcodeStr#_.Suffix#mem#
2788                   "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2789                   (!cast<Instruction>(NAME#"rrk")
2790                    _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2791   def : InstAlias<OpcodeStr#_.Suffix#mem#
2792                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2793                   _.BroadcastStr#", $src2}",
2794                   (!cast<Instruction>(NAME#"rmb")
2795                    _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2796   def : InstAlias<OpcodeStr#_.Suffix#mem#
2797                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2798                   "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2799                   (!cast<Instruction>(NAME#"rmbk")
2800                    _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2803 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2804                                      bits<8> opc, X86SchedWriteWidths sched,
2805                                      Predicate prd>{
2806   let Predicates = [prd] in {
2807     defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2808                                       _.info512, "z">, EVEX_V512;
2809   }
2810   let Predicates = [prd, HasVLX] in {
2811     defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2812                                       _.info128, "x">, EVEX_V128;
2813     defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2814                                       _.info256, "y">, EVEX_V256;
2815   }
2818 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2819                                  bits<8> opcScalar, X86SchedWriteWidths sched> {
2820   defm PH : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f16_info, opcVec,
2821                                       sched, HasFP16>,
2822                                       EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2823   defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2824                                    sched.Scl, f16x_info, HasFP16>,
2825                                    EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2826   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2827                                       sched, HasDQI>,
2828                                       EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2829   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2830                                       sched, HasDQI>,
2831                                       EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W;
2832   defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2833                                    sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2834                                    EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2835   defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2836                                    sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2837                                    EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W;
2840 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2842 //-----------------------------------------------------------------
2843 // Mask register copy, including
2844 // - copy between mask registers
2845 // - load/store mask registers
2846 // - copy from GPR to mask register and vice versa
2848 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2849                          string OpcodeStr, RegisterClass KRC,
2850                          ValueType vvt, X86MemOperand x86memop> {
2851   let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2852   def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2853              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2854              Sched<[WriteMove]>;
2855   def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2856              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2857              [(set KRC:$dst, (vvt (load addr:$src)))]>,
2858              Sched<[WriteLoad]>;
2859   def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2860              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2861              [(store KRC:$src, addr:$dst)]>,
2862              Sched<[WriteStore]>;
2865 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2866                              string OpcodeStr,
2867                              RegisterClass KRC, RegisterClass GRC> {
2868   let hasSideEffects = 0 in {
2869     def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2870                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2871                Sched<[WriteMove]>;
2872     def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2873                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2874                Sched<[WriteMove]>;
2875   }
2878 let Predicates = [HasDQI] in
2879   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2880                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2881                VEX, PD;
2883 let Predicates = [HasAVX512] in
2884   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2885                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2886                VEX, PS;
2888 let Predicates = [HasBWI] in {
2889   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2890                VEX, PD, VEX_W;
2891   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2892                VEX, XD;
2893   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2894                VEX, PS, VEX_W;
2895   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2896                VEX, XD, VEX_W;
2899 // GR from/to mask register
2900 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2901           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2902 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2903           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2904 def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2905           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2907 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2908           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2909 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2910           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2912 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2913           (KMOVWrk VK16:$src)>;
2914 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2915           (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2916 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2917           (COPY_TO_REGCLASS VK16:$src, GR32)>;
2918 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2919           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2921 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2922           (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2923 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2924           (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2925 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2926           (COPY_TO_REGCLASS VK8:$src, GR32)>;
2927 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2928           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2930 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2931           (COPY_TO_REGCLASS GR32:$src, VK32)>;
2932 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2933           (COPY_TO_REGCLASS VK32:$src, GR32)>;
2934 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2935           (COPY_TO_REGCLASS GR64:$src, VK64)>;
2936 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2937           (COPY_TO_REGCLASS VK64:$src, GR64)>;
2939 // Load/store kreg
2940 let Predicates = [HasDQI] in {
2941   def : Pat<(v1i1 (load addr:$src)),
2942             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2943   def : Pat<(v2i1 (load addr:$src)),
2944             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2945   def : Pat<(v4i1 (load addr:$src)),
2946             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2949 let Predicates = [HasAVX512] in {
2950   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2951             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2952   def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2953             (KMOVWkm addr:$src)>;
2956 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2957                          SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2958                                               SDTCVecEltisVT<1, i1>,
2959                                               SDTCisPtrTy<2>]>>;
2961 let Predicates = [HasAVX512] in {
2962   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2963     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2964               (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2966     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2967               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2969     def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2970               (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2972     def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2973               (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2974   }
2976   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2977   defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2978   defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2979   defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2980   defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2981   defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2982   defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2984   def : Pat<(insert_subvector (v16i1 immAllZerosV),
2985                               (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2986             (KMOVWkr (AND32ri8
2987                       (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2988                       (i32 1)))>;
2991 // Mask unary operation
2992 // - KNOT
2993 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2994                             RegisterClass KRC, SDPatternOperator OpNode,
2995                             X86FoldableSchedWrite sched, Predicate prd> {
2996   let Predicates = [prd] in
2997     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2998                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2999                [(set KRC:$dst, (OpNode KRC:$src))]>,
3000                Sched<[sched]>;
3003 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
3004                                 SDPatternOperator OpNode,
3005                                 X86FoldableSchedWrite sched> {
3006   defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3007                             sched, HasDQI>, VEX, PD;
3008   defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3009                             sched, HasAVX512>, VEX, PS;
3010   defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3011                             sched, HasBWI>, VEX, PD, VEX_W;
3012   defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3013                             sched, HasBWI>, VEX, PS, VEX_W;
3016 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3017 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
3019 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
3020 let Predicates = [HasAVX512, NoDQI] in
3021 def : Pat<(vnot VK8:$src),
3022           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
3024 def : Pat<(vnot VK4:$src),
3025           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
3026 def : Pat<(vnot VK2:$src),
3027           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
3028 def : Pat<(vnot VK1:$src),
3029           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
3031 // Mask binary operation
3032 // - KAND, KANDN, KOR, KXNOR, KXOR
3033 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
3034                            RegisterClass KRC, SDPatternOperator OpNode,
3035                            X86FoldableSchedWrite sched, Predicate prd,
3036                            bit IsCommutable> {
3037   let Predicates = [prd], isCommutable = IsCommutable in
3038     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
3039                !strconcat(OpcodeStr,
3040                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3041                [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
3042                Sched<[sched]>;
3045 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
3046                                  SDPatternOperator OpNode,
3047                                  X86FoldableSchedWrite sched, bit IsCommutable,
3048                                  Predicate prdW = HasAVX512> {
3049   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3050                              sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3051   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3052                              sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3053   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3054                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3055   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3056                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3059 // These nodes use 'vnot' instead of 'not' to support vectors.
3060 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3061 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3063 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3064 defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3065 defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3066 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3067 defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3068 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3069 defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3071 multiclass avx512_binop_pat<SDPatternOperator VOpNode,
3072                             Instruction Inst> {
3073   // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3074   // for the DQI set, this type is legal and KxxxB instruction is used
3075   let Predicates = [NoDQI] in
3076   def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3077             (COPY_TO_REGCLASS
3078               (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3079                     (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3081   // All types smaller than 8 bits require conversion anyway
3082   def : Pat<(VOpNode VK1:$src1, VK1:$src2),
3083         (COPY_TO_REGCLASS (Inst
3084                            (COPY_TO_REGCLASS VK1:$src1, VK16),
3085                            (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3086   def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3087         (COPY_TO_REGCLASS (Inst
3088                            (COPY_TO_REGCLASS VK2:$src1, VK16),
3089                            (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3090   def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3091         (COPY_TO_REGCLASS (Inst
3092                            (COPY_TO_REGCLASS VK4:$src1, VK16),
3093                            (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3096 defm : avx512_binop_pat<and,   KANDWrr>;
3097 defm : avx512_binop_pat<vandn, KANDNWrr>;
3098 defm : avx512_binop_pat<or,    KORWrr>;
3099 defm : avx512_binop_pat<vxnor, KXNORWrr>;
3100 defm : avx512_binop_pat<xor,   KXORWrr>;
3102 // Mask unpacking
3103 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3104                              X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3105                              Predicate prd> {
3106   let Predicates = [prd] in {
3107     let hasSideEffects = 0 in
3108     def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3109                (ins Src.KRC:$src1, Src.KRC:$src2),
3110                "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3111                VEX_4V, VEX_L, Sched<[sched]>;
3113     def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3114               (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3115   }
3118 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3119 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3120 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3122 // Mask bit testing
3123 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3124                               SDNode OpNode, X86FoldableSchedWrite sched,
3125                               Predicate prd> {
3126   let Predicates = [prd], Defs = [EFLAGS] in
3127     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3128                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3129                [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3130                Sched<[sched]>;
3133 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3134                                 X86FoldableSchedWrite sched,
3135                                 Predicate prdW = HasAVX512> {
3136   defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3137                                                                 VEX, PD;
3138   defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3139                                                                 VEX, PS;
3140   defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3141                                                                 VEX, PS, VEX_W;
3142   defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3143                                                                 VEX, PD, VEX_W;
3146 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3147 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3148 defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3150 // Mask shift
3151 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3152                                SDNode OpNode, X86FoldableSchedWrite sched> {
3153   let Predicates = [HasAVX512] in
3154     def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3155                  !strconcat(OpcodeStr,
3156                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3157                             [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3158                  Sched<[sched]>;
3161 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3162                                  SDNode OpNode, X86FoldableSchedWrite sched> {
3163   defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3164                                sched>, VEX, TAPD, VEX_W;
3165   let Predicates = [HasDQI] in
3166   defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3167                                sched>, VEX, TAPD;
3168   let Predicates = [HasBWI] in {
3169   defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3170                                sched>, VEX, TAPD, VEX_W;
3171   defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3172                                sched>, VEX, TAPD;
3173   }
3176 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3177 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3179 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3180 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3181                                                  string InstStr,
3182                                                  X86VectorVTInfo Narrow,
3183                                                  X86VectorVTInfo Wide> {
3184 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3185                                 (Narrow.VT Narrow.RC:$src2), cond)),
3186           (COPY_TO_REGCLASS
3187            (!cast<Instruction>(InstStr#"Zrri")
3188             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3189             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3190             (X86pcmpm_imm $cc)), Narrow.KRC)>;
3192 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3193                            (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3194                                                     (Narrow.VT Narrow.RC:$src2),
3195                                                     cond)))),
3196           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3197            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3198            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3199            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3200            (X86pcmpm_imm $cc)), Narrow.KRC)>;
3203 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3204                                                      string InstStr,
3205                                                      X86VectorVTInfo Narrow,
3206                                                      X86VectorVTInfo Wide> {
3207 // Broadcast load.
3208 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3209                                 (Narrow.BroadcastLdFrag addr:$src2), cond)),
3210           (COPY_TO_REGCLASS
3211            (!cast<Instruction>(InstStr#"Zrmib")
3212             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3213             addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3215 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3216                            (Narrow.KVT
3217                             (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3218                                          (Narrow.BroadcastLdFrag addr:$src2),
3219                                          cond)))),
3220           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3221            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3222            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3223            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3225 // Commuted with broadcast load.
3226 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3227                                 (Narrow.VT Narrow.RC:$src1),
3228                                 cond)),
3229           (COPY_TO_REGCLASS
3230            (!cast<Instruction>(InstStr#"Zrmib")
3231             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3232             addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3234 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3235                            (Narrow.KVT
3236                             (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3237                                          (Narrow.VT Narrow.RC:$src1),
3238                                          cond)))),
3239           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3240            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3241            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3242            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3245 // Same as above, but for fp types which don't use PatFrags.
3246 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3247                                                 X86VectorVTInfo Narrow,
3248                                                 X86VectorVTInfo Wide> {
3249 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3250                                (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3251           (COPY_TO_REGCLASS
3252            (!cast<Instruction>(InstStr#"Zrri")
3253             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3254             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3255             timm:$cc), Narrow.KRC)>;
3257 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3258                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3259                                        (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3260           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3261            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3262            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3263            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3264            timm:$cc), Narrow.KRC)>;
3266 // Broadcast load.
3267 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3268                                (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3269           (COPY_TO_REGCLASS
3270            (!cast<Instruction>(InstStr#"Zrmbi")
3271             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3272             addr:$src2, timm:$cc), Narrow.KRC)>;
3274 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3275                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3276                                        (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3277           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3278            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3279            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3280            addr:$src2, timm:$cc), Narrow.KRC)>;
3282 // Commuted with broadcast load.
3283 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3284                                (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3285           (COPY_TO_REGCLASS
3286            (!cast<Instruction>(InstStr#"Zrmbi")
3287             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3288             addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3290 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3291                            (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3292                                        (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3293           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3294            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3295            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3296            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3299 let Predicates = [HasAVX512, NoVLX] in {
3300   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3301   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3303   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3304   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3306   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3307   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3309   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3310   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3312   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3313   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3315   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3316   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3318   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3319   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3321   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3322   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3324   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3325   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3326   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3327   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3330 let Predicates = [HasBWI, NoVLX] in {
3331   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3332   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3334   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3335   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3337   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3338   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3340   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3341   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3344 // Mask setting all 0s or 1s
3345 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3346   let Predicates = [HasAVX512] in
3347     let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3348         SchedRW = [WriteZero] in
3349       def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3350                      [(set KRC:$dst, (VT Val))]>;
3353 multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3354   defm W : avx512_mask_setop<VK16, v16i1, Val>;
3355   defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3356   defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3359 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3360 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3362 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3363 let Predicates = [HasAVX512] in {
3364   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3365   def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3366   def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3367   def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3368   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3369   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3370   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3371   def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3374 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3375 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3376                                              RegisterClass RC, ValueType VT> {
3377   def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3378             (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3380   def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3381             (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3383 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3384 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3385 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3386 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3387 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3388 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3390 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3391 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3392 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3393 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3394 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3396 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3397 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3398 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3399 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3401 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3402 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3403 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3405 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3406 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3408 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3410 //===----------------------------------------------------------------------===//
3411 // AVX-512 - Aligned and unaligned load and store
3414 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3415                        X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3416                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3417                        bit NoRMPattern = 0,
3418                        SDPatternOperator SelectOprr = vselect> {
3419   let hasSideEffects = 0 in {
3420   let isMoveReg = 1 in
3421   def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3422                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3423                     _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3424                     EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3425   def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3426                       (ins _.KRCWM:$mask,  _.RC:$src),
3427                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3428                        "${dst} {${mask}} {z}, $src}"),
3429                        [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3430                                            (_.VT _.RC:$src),
3431                                            _.ImmAllZerosV)))], _.ExeDomain>,
3432                        EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3434   let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3435   def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3436                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3437                     !if(NoRMPattern, [],
3438                         [(set _.RC:$dst,
3439                           (_.VT (ld_frag addr:$src)))]),
3440                     _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3441                     EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3443   let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3444     def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3445                       (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3446                       !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3447                       "${dst} {${mask}}, $src1}"),
3448                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3449                                           (_.VT _.RC:$src1),
3450                                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3451                        EVEX, EVEX_K, Sched<[Sched.RR]>;
3452     def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3453                      (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3454                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3455                       "${dst} {${mask}}, $src1}"),
3456                      [(set _.RC:$dst, (_.VT
3457                          (vselect_mask _.KRCWM:$mask,
3458                           (_.VT (ld_frag addr:$src1)),
3459                            (_.VT _.RC:$src0))))], _.ExeDomain>,
3460                      EVEX, EVEX_K, Sched<[Sched.RM]>;
3461   }
3462   def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3463                   (ins _.KRCWM:$mask, _.MemOp:$src),
3464                   OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3465                                 "${dst} {${mask}} {z}, $src}",
3466                   [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3467                     (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3468                   _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3469   }
3470   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3471             (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3473   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3474             (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3476   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3477             (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3478              _.KRCWM:$mask, addr:$ptr)>;
3481 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3482                                  AVX512VLVectorVTInfo _, Predicate prd,
3483                                  X86SchedWriteMoveLSWidths Sched,
3484                                  string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3485   let Predicates = [prd] in
3486   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3487                        _.info512.AlignedLdFrag, masked_load_aligned,
3488                        Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3490   let Predicates = [prd, HasVLX] in {
3491   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3492                           _.info256.AlignedLdFrag, masked_load_aligned,
3493                           Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3494   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3495                           _.info128.AlignedLdFrag, masked_load_aligned,
3496                           Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3497   }
3500 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3501                           AVX512VLVectorVTInfo _, Predicate prd,
3502                           X86SchedWriteMoveLSWidths Sched,
3503                           string EVEX2VEXOvrd, bit NoRMPattern = 0,
3504                           SDPatternOperator SelectOprr = vselect> {
3505   let Predicates = [prd] in
3506   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3507                        masked_load, Sched.ZMM, "",
3508                        NoRMPattern, SelectOprr>, EVEX_V512;
3510   let Predicates = [prd, HasVLX] in {
3511   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3512                          masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3513                          NoRMPattern, SelectOprr>, EVEX_V256;
3514   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3515                          masked_load, Sched.XMM, EVEX2VEXOvrd,
3516                          NoRMPattern, SelectOprr>, EVEX_V128;
3517   }
3520 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3521                         X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3522                         X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3523                         bit NoMRPattern = 0> {
3524   let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3525   let isMoveReg = 1 in
3526   def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3527                          OpcodeStr # "\t{$src, $dst|$dst, $src}",
3528                          [], _.ExeDomain>, EVEX,
3529                          FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3530                          EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3531   def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3532                          (ins _.KRCWM:$mask, _.RC:$src),
3533                          OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3534                          "${dst} {${mask}}, $src}",
3535                          [], _.ExeDomain>,  EVEX, EVEX_K,
3536                          FoldGenData<BaseName#_.ZSuffix#rrk>,
3537                          Sched<[Sched.RR]>;
3538   def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3539                           (ins _.KRCWM:$mask, _.RC:$src),
3540                           OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3541                           "${dst} {${mask}} {z}, $src}",
3542                           [], _.ExeDomain>, EVEX, EVEX_KZ,
3543                           FoldGenData<BaseName#_.ZSuffix#rrkz>,
3544                           Sched<[Sched.RR]>;
3545   }
3547   let hasSideEffects = 0, mayStore = 1 in
3548   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3549                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3550                     !if(NoMRPattern, [],
3551                         [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3552                     _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3553                     EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3554   def mrk : AVX512PI<opc, MRMDestMem, (outs),
3555                      (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3556               OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3557                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3558                NotMemoryFoldable;
3560   def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3561            (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3562                                                         _.KRCWM:$mask, _.RC:$src)>;
3564   def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3565                   (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3566                    _.RC:$dst, _.RC:$src), 0>;
3567   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3568                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3569                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3570   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3571                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3572                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3575 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3576                             AVX512VLVectorVTInfo _, Predicate prd,
3577                             X86SchedWriteMoveLSWidths Sched,
3578                             string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3579   let Predicates = [prd] in
3580   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3581                         masked_store, Sched.ZMM, "",
3582                         NoMRPattern>, EVEX_V512;
3583   let Predicates = [prd, HasVLX] in {
3584     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3585                              masked_store, Sched.YMM,
3586                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3587     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3588                              masked_store, Sched.XMM, EVEX2VEXOvrd,
3589                              NoMRPattern>, EVEX_V128;
3590   }
3593 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3594                                   AVX512VLVectorVTInfo _, Predicate prd,
3595                                   X86SchedWriteMoveLSWidths Sched,
3596                                   string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3597   let Predicates = [prd] in
3598   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3599                         masked_store_aligned, Sched.ZMM, "",
3600                         NoMRPattern>, EVEX_V512;
3602   let Predicates = [prd, HasVLX] in {
3603     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3604                              masked_store_aligned, Sched.YMM,
3605                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3606     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3607                              masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3608                              NoMRPattern>, EVEX_V128;
3609   }
3612 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3613                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3614                avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3615                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3616                PS, EVEX_CD8<32, CD8VF>;
3618 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3619                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3620                avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3621                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3622                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3624 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3625                               SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3626                avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3627                                SchedWriteFMoveLS, "VMOVUPS">,
3628                                PS, EVEX_CD8<32, CD8VF>;
3630 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3631                               SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3632                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3633                                SchedWriteFMoveLS, "VMOVUPD">,
3634                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3636 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3637                                        HasAVX512, SchedWriteVecMoveLS,
3638                                        "VMOVDQA", 1>,
3639                  avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3640                                         HasAVX512, SchedWriteVecMoveLS,
3641                                         "VMOVDQA", 1>,
3642                  PD, EVEX_CD8<32, CD8VF>;
3644 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3645                                        HasAVX512, SchedWriteVecMoveLS,
3646                                        "VMOVDQA">,
3647                  avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3648                                         HasAVX512, SchedWriteVecMoveLS,
3649                                         "VMOVDQA">,
3650                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
3652 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3653                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3654                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3655                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3656                 XD, EVEX_CD8<8, CD8VF>;
3658 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3659                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3660                  avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3661                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3662                  XD, VEX_W, EVEX_CD8<16, CD8VF>;
3664 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3665                                 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3666                  avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3667                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3668                  XS, EVEX_CD8<32, CD8VF>;
3670 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3671                                 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3672                  avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3673                                  SchedWriteVecMoveLS, "VMOVDQU">,
3674                  XS, VEX_W, EVEX_CD8<64, CD8VF>;
3676 // Special instructions to help with spilling when we don't have VLX. We need
3677 // to load or store from a ZMM register instead. These are converted in
3678 // expandPostRAPseudos.
3679 let isReMaterializable = 1, canFoldAsLoad = 1,
3680     isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3681 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3682                             "", []>, Sched<[WriteFLoadX]>;
3683 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3684                             "", []>, Sched<[WriteFLoadY]>;
3685 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3686                             "", []>, Sched<[WriteFLoadX]>;
3687 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3688                             "", []>, Sched<[WriteFLoadY]>;
3691 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3692 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3693                             "", []>, Sched<[WriteFStoreX]>;
3694 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3695                             "", []>, Sched<[WriteFStoreY]>;
3696 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3697                             "", []>, Sched<[WriteFStoreX]>;
3698 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3699                             "", []>, Sched<[WriteFStoreY]>;
3702 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3703                           (v8i64 VR512:$src))),
3704    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3705                                               VK8), VR512:$src)>;
3707 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3708                            (v16i32 VR512:$src))),
3709                   (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3711 // These patterns exist to prevent the above patterns from introducing a second
3712 // mask inversion when one already exists.
3713 def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3714                           (v8i64 immAllZerosV),
3715                           (v8i64 VR512:$src))),
3716                  (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3717 def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3718                            (v16i32 immAllZerosV),
3719                            (v16i32 VR512:$src))),
3720                   (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3722 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3723                               X86VectorVTInfo Wide> {
3724  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3725                                Narrow.RC:$src1, Narrow.RC:$src0)),
3726            (EXTRACT_SUBREG
3727             (Wide.VT
3728              (!cast<Instruction>(InstrStr#"rrk")
3729               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3730               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3731               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3732             Narrow.SubRegIdx)>;
3734  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3735                                Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3736            (EXTRACT_SUBREG
3737             (Wide.VT
3738              (!cast<Instruction>(InstrStr#"rrkz")
3739               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3740               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3741             Narrow.SubRegIdx)>;
3744 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3745 // available. Use a 512-bit operation and extract.
3746 let Predicates = [HasAVX512, NoVLX] in {
3747   defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3748   defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3749   defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3750   defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3752   defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3753   defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3754   defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3755   defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3758 let Predicates = [HasBWI, NoVLX] in {
3759   defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3760   defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3762   defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3763   defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3766 let Predicates = [HasAVX512] in {
3767   // 512-bit load.
3768   def : Pat<(alignedloadv16i32 addr:$src),
3769             (VMOVDQA64Zrm addr:$src)>;
3770   def : Pat<(alignedloadv32i16 addr:$src),
3771             (VMOVDQA64Zrm addr:$src)>;
3772   def : Pat<(alignedloadv64i8 addr:$src),
3773             (VMOVDQA64Zrm addr:$src)>;
3774   def : Pat<(loadv16i32 addr:$src),
3775             (VMOVDQU64Zrm addr:$src)>;
3776   def : Pat<(loadv32i16 addr:$src),
3777             (VMOVDQU64Zrm addr:$src)>;
3778   def : Pat<(loadv64i8 addr:$src),
3779             (VMOVDQU64Zrm addr:$src)>;
3781   // 512-bit store.
3782   def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3783             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3784   def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3785             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3786   def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3787             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3788   def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3789             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3790   def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3791             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3792   def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3793             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3796 let Predicates = [HasVLX] in {
3797   // 128-bit load.
3798   def : Pat<(alignedloadv4i32 addr:$src),
3799             (VMOVDQA64Z128rm addr:$src)>;
3800   def : Pat<(alignedloadv8i16 addr:$src),
3801             (VMOVDQA64Z128rm addr:$src)>;
3802   def : Pat<(alignedloadv16i8 addr:$src),
3803             (VMOVDQA64Z128rm addr:$src)>;
3804   def : Pat<(loadv4i32 addr:$src),
3805             (VMOVDQU64Z128rm addr:$src)>;
3806   def : Pat<(loadv8i16 addr:$src),
3807             (VMOVDQU64Z128rm addr:$src)>;
3808   def : Pat<(loadv16i8 addr:$src),
3809             (VMOVDQU64Z128rm addr:$src)>;
3811   // 128-bit store.
3812   def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3813             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3814   def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3815             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3816   def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3817             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3818   def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3819             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3820   def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3821             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3822   def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3823             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3825   // 256-bit load.
3826   def : Pat<(alignedloadv8i32 addr:$src),
3827             (VMOVDQA64Z256rm addr:$src)>;
3828   def : Pat<(alignedloadv16i16 addr:$src),
3829             (VMOVDQA64Z256rm addr:$src)>;
3830   def : Pat<(alignedloadv32i8 addr:$src),
3831             (VMOVDQA64Z256rm addr:$src)>;
3832   def : Pat<(loadv8i32 addr:$src),
3833             (VMOVDQU64Z256rm addr:$src)>;
3834   def : Pat<(loadv16i16 addr:$src),
3835             (VMOVDQU64Z256rm addr:$src)>;
3836   def : Pat<(loadv32i8 addr:$src),
3837             (VMOVDQU64Z256rm addr:$src)>;
3839   // 256-bit store.
3840   def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3841             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3842   def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3843             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3844   def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3845             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3846   def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3847             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3848   def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3849             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3850   def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3851             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3853 let Predicates = [HasFP16] in {
3854   def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), (v32f16 VR512:$src0))),
3855             (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3856   def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)),
3857             (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3858   def : Pat<(v32f16 (alignedloadv32f16 addr:$src)),
3859             (VMOVAPSZrm addr:$src)>;
3860   def : Pat<(v32f16 (vselect VK32WM:$mask,
3861                      (v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))),
3862             (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3863   def : Pat<(v32f16 (vselect VK32WM:$mask,
3864                      (v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
3865             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3866   def : Pat<(v32f16 (loadv32f16 addr:$src)),
3867             (VMOVUPSZrm addr:$src)>;
3868   def : Pat<(v32f16 (vselect VK32WM:$mask,
3869                      (v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))),
3870             (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3871   def : Pat<(v32f16 (vselect VK32WM:$mask,
3872                      (v32f16 (loadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
3873             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3874   def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, (v32f16 VR512:$src0))),
3875             (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3876   def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, undef)),
3877             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3878   def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)),
3879             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3881   def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3882             (VMOVAPSZmr addr:$dst, VR512:$src)>;
3883   def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3884             (VMOVUPSZmr addr:$dst, VR512:$src)>;
3885   def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask),
3886             (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3888 let Predicates = [HasFP16, HasVLX] in {
3889   def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), (v16f16 VR256X:$src0))),
3890             (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3891   def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)),
3892             (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3893   def : Pat<(v16f16 (alignedloadv16f16 addr:$src)),
3894             (VMOVAPSZ256rm addr:$src)>;
3895   def : Pat<(v16f16 (vselect VK16WM:$mask,
3896                      (v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
3897             (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3898   def : Pat<(v16f16 (vselect VK16WM:$mask,
3899                      (v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
3900             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3901   def : Pat<(v16f16 (loadv16f16 addr:$src)),
3902             (VMOVUPSZ256rm addr:$src)>;
3903   def : Pat<(v16f16 (vselect VK16WM:$mask,
3904                      (v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
3905             (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3906   def : Pat<(v16f16 (vselect VK16WM:$mask,
3907                      (v16f16 (loadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
3908             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3909   def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, (v16f16 VR256X:$src0))),
3910             (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3911   def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, undef)),
3912             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3913   def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)),
3914             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3916   def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3917             (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3918   def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3919             (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3920   def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask),
3921             (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3923   def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), (v8f16 VR128X:$src0))),
3924             (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3925   def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)),
3926             (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3927   def : Pat<(v8f16 (alignedloadv8f16 addr:$src)),
3928             (VMOVAPSZ128rm addr:$src)>;
3929   def : Pat<(v8f16 (vselect VK8WM:$mask,
3930                      (v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
3931             (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3932   def : Pat<(v8f16 (vselect VK8WM:$mask,
3933                      (v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
3934             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3935   def : Pat<(v8f16 (loadv8f16 addr:$src)),
3936             (VMOVUPSZ128rm addr:$src)>;
3937   def : Pat<(v8f16 (vselect VK8WM:$mask,
3938                      (v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
3939             (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3940   def : Pat<(v8f16 (vselect VK8WM:$mask,
3941                      (v8f16 (loadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
3942             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3943   def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, (v8f16 VR128X:$src0))),
3944             (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3945   def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, undef)),
3946             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3947   def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)),
3948             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3950   def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3951             (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3952   def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3953             (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3954   def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask),
3955             (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3958 // Move Int Doubleword to Packed Double Int
3960 let ExeDomain = SSEPackedInt in {
3961 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3962                       "vmovd\t{$src, $dst|$dst, $src}",
3963                       [(set VR128X:$dst,
3964                         (v4i32 (scalar_to_vector GR32:$src)))]>,
3965                         EVEX, Sched<[WriteVecMoveFromGpr]>;
3966 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3967                       "vmovd\t{$src, $dst|$dst, $src}",
3968                       [(set VR128X:$dst,
3969                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3970                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3971 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3972                       "vmovq\t{$src, $dst|$dst, $src}",
3973                         [(set VR128X:$dst,
3974                           (v2i64 (scalar_to_vector GR64:$src)))]>,
3975                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3976 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3977 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3978                       (ins i64mem:$src),
3979                       "vmovq\t{$src, $dst|$dst, $src}", []>,
3980                       EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3981 let isCodeGenOnly = 1 in {
3982 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3983                        "vmovq\t{$src, $dst|$dst, $src}",
3984                        [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3985                        EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3986 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3987                          "vmovq\t{$src, $dst|$dst, $src}",
3988                          [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3989                          EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3991 } // ExeDomain = SSEPackedInt
3993 // Move Int Doubleword to Single Scalar
3995 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3996 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3997                       "vmovd\t{$src, $dst|$dst, $src}",
3998                       [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3999                       EVEX, Sched<[WriteVecMoveFromGpr]>;
4000 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4002 // Move doubleword from xmm register to r/m32
4004 let ExeDomain = SSEPackedInt in {
4005 def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
4006                        "vmovd\t{$src, $dst|$dst, $src}",
4007                        [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
4008                                         (iPTR 0)))]>,
4009                        EVEX, Sched<[WriteVecMoveToGpr]>;
4010 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
4011                        (ins i32mem:$dst, VR128X:$src),
4012                        "vmovd\t{$src, $dst|$dst, $src}",
4013                        [(store (i32 (extractelt (v4i32 VR128X:$src),
4014                                      (iPTR 0))), addr:$dst)]>,
4015                        EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
4016 } // ExeDomain = SSEPackedInt
4018 // Move quadword from xmm1 register to r/m64
4020 let ExeDomain = SSEPackedInt in {
4021 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
4022                       "vmovq\t{$src, $dst|$dst, $src}",
4023                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
4024                                                    (iPTR 0)))]>,
4025                       PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
4026                       Requires<[HasAVX512]>;
4028 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
4029 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
4030                       "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
4031                       EVEX, VEX_W, Sched<[WriteVecStore]>,
4032                       Requires<[HasAVX512, In64BitMode]>;
4034 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
4035                       (ins i64mem:$dst, VR128X:$src),
4036                       "vmovq\t{$src, $dst|$dst, $src}",
4037                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
4038                               addr:$dst)]>,
4039                       EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
4040                       Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
4042 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
4043 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
4044                              (ins VR128X:$src),
4045                              "vmovq\t{$src, $dst|$dst, $src}", []>,
4046                              EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
4047 } // ExeDomain = SSEPackedInt
4049 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
4050                 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
4052 let Predicates = [HasAVX512] in {
4053   def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
4054             (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
4057 // Move Scalar Single to Double Int
4059 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4060 def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
4061                       (ins FR32X:$src),
4062                       "vmovd\t{$src, $dst|$dst, $src}",
4063                       [(set GR32:$dst, (bitconvert FR32X:$src))]>,
4064                       EVEX, Sched<[WriteVecMoveToGpr]>;
4065 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4067 // Move Quadword Int to Packed Quadword Int
4069 let ExeDomain = SSEPackedInt in {
4070 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
4071                       (ins i64mem:$src),
4072                       "vmovq\t{$src, $dst|$dst, $src}",
4073                       [(set VR128X:$dst,
4074                         (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
4075                       EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
4076 } // ExeDomain = SSEPackedInt
4078 // Allow "vmovd" but print "vmovq".
4079 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4080                 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
4081 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4082                 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
4084 // Conversions between masks and scalar fp.
4085 def : Pat<(v32i1 (bitconvert FR32X:$src)),
4086           (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
4087 def : Pat<(f32 (bitconvert VK32:$src)),
4088           (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
4090 def : Pat<(v64i1 (bitconvert FR64X:$src)),
4091           (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
4092 def : Pat<(f64 (bitconvert VK64:$src)),
4093           (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
4095 //===----------------------------------------------------------------------===//
4096 // AVX-512  MOVSH, MOVSS, MOVSD
4097 //===----------------------------------------------------------------------===//
4099 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
4100                               X86VectorVTInfo _,
4101                               list<Predicate> prd = [HasAVX512, OptForSize]> {
4102   let Predicates = prd in
4103   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4104              (ins _.RC:$src1, _.RC:$src2),
4105              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4106              [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
4107              _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
4108   def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4109               (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4110               !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
4111               "$dst {${mask}} {z}, $src1, $src2}"),
4112               [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4113                                       (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4114                                       _.ImmAllZerosV)))],
4115               _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
4116   let Constraints = "$src0 = $dst"  in
4117   def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4118              (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4119              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
4120              "$dst {${mask}}, $src1, $src2}"),
4121              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4122                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4123                                      (_.VT _.RC:$src0))))],
4124              _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
4125   let canFoldAsLoad = 1, isReMaterializable = 1 in {
4126   def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
4127              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4128              [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
4129              _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4130   // _alt version uses FR32/FR64 register class.
4131   let isCodeGenOnly = 1 in
4132   def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
4133                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4134                  [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
4135                  _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4136   }
4137   let mayLoad = 1, hasSideEffects = 0 in {
4138     let Constraints = "$src0 = $dst" in
4139     def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4140                (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
4141                !strconcat(asm, "\t{$src, $dst {${mask}}|",
4142                "$dst {${mask}}, $src}"),
4143                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
4144     def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4145                (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
4146                !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
4147                "$dst {${mask}} {z}, $src}"),
4148                [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
4149   }
4150   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4151              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4152              [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
4153              EVEX, Sched<[WriteFStore]>;
4154   let mayStore = 1, hasSideEffects = 0 in
4155   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4156               (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
4157               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4158               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
4159               NotMemoryFoldable;
4162 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
4163                                   VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4165 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4166                                   VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4168 defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
4169                                   [HasFP16]>,
4170                                   VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
4172 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4173                                        PatLeaf ZeroFP, X86VectorVTInfo _> {
4175 def : Pat<(_.VT (OpNode _.RC:$src0,
4176                         (_.VT (scalar_to_vector
4177                                   (_.EltVT (X86selects VK1WM:$mask,
4178                                                        (_.EltVT _.FRC:$src1),
4179                                                        (_.EltVT _.FRC:$src2))))))),
4180           (!cast<Instruction>(InstrStr#rrk)
4181                         (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4182                         VK1WM:$mask,
4183                         (_.VT _.RC:$src0),
4184                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4186 def : Pat<(_.VT (OpNode _.RC:$src0,
4187                         (_.VT (scalar_to_vector
4188                                   (_.EltVT (X86selects VK1WM:$mask,
4189                                                        (_.EltVT _.FRC:$src1),
4190                                                        (_.EltVT ZeroFP))))))),
4191           (!cast<Instruction>(InstrStr#rrkz)
4192                         VK1WM:$mask,
4193                         (_.VT _.RC:$src0),
4194                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4197 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4198                                         dag Mask, RegisterClass MaskRC> {
4200 def : Pat<(masked_store
4201              (_.info512.VT (insert_subvector undef,
4202                                (_.info128.VT _.info128.RC:$src),
4203                                (iPTR 0))), addr:$dst, Mask),
4204           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4205                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4206                       _.info128.RC:$src)>;
4210 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4211                                                AVX512VLVectorVTInfo _,
4212                                                dag Mask, RegisterClass MaskRC,
4213                                                SubRegIndex subreg> {
4215 def : Pat<(masked_store
4216              (_.info512.VT (insert_subvector undef,
4217                                (_.info128.VT _.info128.RC:$src),
4218                                (iPTR 0))), addr:$dst, Mask),
4219           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4220                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4221                       _.info128.RC:$src)>;
4225 // This matches the more recent codegen from clang that avoids emitting a 512
4226 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4227 // bits on AVX512F only targets.
4228 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4229                                                AVX512VLVectorVTInfo _,
4230                                                dag Mask512, dag Mask128,
4231                                                RegisterClass MaskRC,
4232                                                SubRegIndex subreg> {
4234 // AVX512F pattern.
4235 def : Pat<(masked_store
4236              (_.info512.VT (insert_subvector undef,
4237                                (_.info128.VT _.info128.RC:$src),
4238                                (iPTR 0))), addr:$dst, Mask512),
4239           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4240                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4241                       _.info128.RC:$src)>;
4243 // AVX512VL pattern.
4244 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4245           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4246                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4247                       _.info128.RC:$src)>;
4250 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4251                                        dag Mask, RegisterClass MaskRC> {
4253 def : Pat<(_.info128.VT (extract_subvector
4254                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4255                                         _.info512.ImmAllZerosV)),
4256                            (iPTR 0))),
4257           (!cast<Instruction>(InstrStr#rmkz)
4258                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4259                       addr:$srcAddr)>;
4261 def : Pat<(_.info128.VT (extract_subvector
4262                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4263                       (_.info512.VT (insert_subvector undef,
4264                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4265                             (iPTR 0))))),
4266                 (iPTR 0))),
4267           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4268                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4269                       addr:$srcAddr)>;
4273 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4274                                               AVX512VLVectorVTInfo _,
4275                                               dag Mask, RegisterClass MaskRC,
4276                                               SubRegIndex subreg> {
4278 def : Pat<(_.info128.VT (extract_subvector
4279                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4280                                         _.info512.ImmAllZerosV)),
4281                            (iPTR 0))),
4282           (!cast<Instruction>(InstrStr#rmkz)
4283                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4284                       addr:$srcAddr)>;
4286 def : Pat<(_.info128.VT (extract_subvector
4287                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4288                       (_.info512.VT (insert_subvector undef,
4289                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4290                             (iPTR 0))))),
4291                 (iPTR 0))),
4292           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4293                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4294                       addr:$srcAddr)>;
4298 // This matches the more recent codegen from clang that avoids emitting a 512
4299 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4300 // bits on AVX512F only targets.
4301 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4302                                               AVX512VLVectorVTInfo _,
4303                                               dag Mask512, dag Mask128,
4304                                               RegisterClass MaskRC,
4305                                               SubRegIndex subreg> {
4306 // AVX512F patterns.
4307 def : Pat<(_.info128.VT (extract_subvector
4308                          (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4309                                         _.info512.ImmAllZerosV)),
4310                            (iPTR 0))),
4311           (!cast<Instruction>(InstrStr#rmkz)
4312                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4313                       addr:$srcAddr)>;
4315 def : Pat<(_.info128.VT (extract_subvector
4316                 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4317                       (_.info512.VT (insert_subvector undef,
4318                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4319                             (iPTR 0))))),
4320                 (iPTR 0))),
4321           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4322                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4323                       addr:$srcAddr)>;
4325 // AVX512Vl patterns.
4326 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4327                          _.info128.ImmAllZerosV)),
4328           (!cast<Instruction>(InstrStr#rmkz)
4329                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4330                       addr:$srcAddr)>;
4332 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4333                          (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4334           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4335                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4336                       addr:$srcAddr)>;
4339 defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4340 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4341 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4343 defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4344                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4345 defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4346                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4347 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4348                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4349 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4350                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4351 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4352                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4354 defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4355                    (v32i1 (insert_subvector
4356                            (v32i1 immAllZerosV),
4357                            (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4358                            (iPTR 0))),
4359                    (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4360                    GR8, sub_8bit>;
4361 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4362                    (v16i1 (insert_subvector
4363                            (v16i1 immAllZerosV),
4364                            (v4i1 (extract_subvector
4365                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4366                                   (iPTR 0))),
4367                            (iPTR 0))),
4368                    (v4i1 (extract_subvector
4369                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4370                           (iPTR 0))), GR8, sub_8bit>;
4371 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4372                    (v8i1
4373                     (extract_subvector
4374                      (v16i1
4375                       (insert_subvector
4376                        (v16i1 immAllZerosV),
4377                        (v2i1 (extract_subvector
4378                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4379                               (iPTR 0))),
4380                        (iPTR 0))),
4381                      (iPTR 0))),
4382                    (v2i1 (extract_subvector
4383                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4384                           (iPTR 0))), GR8, sub_8bit>;
4386 defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4387                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4388 defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4389                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4390 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4391                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4392 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4393                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4394 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4395                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4397 defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4398                    (v32i1 (insert_subvector
4399                            (v32i1 immAllZerosV),
4400                            (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4401                            (iPTR 0))),
4402                    (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4403                    GR8, sub_8bit>;
4404 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4405                    (v16i1 (insert_subvector
4406                            (v16i1 immAllZerosV),
4407                            (v4i1 (extract_subvector
4408                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4409                                   (iPTR 0))),
4410                            (iPTR 0))),
4411                    (v4i1 (extract_subvector
4412                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4413                           (iPTR 0))), GR8, sub_8bit>;
4414 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4415                    (v8i1
4416                     (extract_subvector
4417                      (v16i1
4418                       (insert_subvector
4419                        (v16i1 immAllZerosV),
4420                        (v2i1 (extract_subvector
4421                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4422                               (iPTR 0))),
4423                        (iPTR 0))),
4424                      (iPTR 0))),
4425                    (v2i1 (extract_subvector
4426                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4427                           (iPTR 0))), GR8, sub_8bit>;
4429 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4430           (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4431            (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4432            VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4433            (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4435 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4436           (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4437            (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4439 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4440           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4441            (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4442            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4443            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4445 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4446           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4447            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4449 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4450           (COPY_TO_REGCLASS
4451            (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4452                                                        VK1WM:$mask, addr:$src)),
4453            FR32X)>;
4454 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4455           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4457 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4458           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4459            (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4460            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4461            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4463 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4464           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4465            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4467 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4468           (COPY_TO_REGCLASS
4469            (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4470                                                        VK1WM:$mask, addr:$src)),
4471            FR64X)>;
4472 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4473           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4476 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4477           (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4478 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4479           (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4481 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4482           (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4483 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4484           (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4486 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4487   let Predicates = [HasFP16] in {
4488     def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4489         (ins VR128X:$src1, VR128X:$src2),
4490         "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4491         []>, T_MAP5XS, EVEX_4V, VEX_LIG,
4492         FoldGenData<"VMOVSHZrr">,
4493         Sched<[SchedWriteFShuffle.XMM]>;
4495     let Constraints = "$src0 = $dst" in
4496     def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4497         (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4498          VR128X:$src1, VR128X:$src2),
4499         "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4500           "$dst {${mask}}, $src1, $src2}",
4501         []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
4502         FoldGenData<"VMOVSHZrrk">,
4503         Sched<[SchedWriteFShuffle.XMM]>;
4505     def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4506         (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4507         "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4508           "$dst {${mask}} {z}, $src1, $src2}",
4509         []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
4510         FoldGenData<"VMOVSHZrrkz">,
4511         Sched<[SchedWriteFShuffle.XMM]>;
4512   }
4513   def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4514                            (ins VR128X:$src1, VR128X:$src2),
4515                            "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4516                            []>, XS, EVEX_4V, VEX_LIG,
4517                            FoldGenData<"VMOVSSZrr">,
4518                            Sched<[SchedWriteFShuffle.XMM]>;
4520   let Constraints = "$src0 = $dst" in
4521   def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4522                              (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4523                                                    VR128X:$src1, VR128X:$src2),
4524                              "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4525                                         "$dst {${mask}}, $src1, $src2}",
4526                              []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4527                              FoldGenData<"VMOVSSZrrk">,
4528                              Sched<[SchedWriteFShuffle.XMM]>;
4530   def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4531                          (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4532                          "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4533                                     "$dst {${mask}} {z}, $src1, $src2}",
4534                          []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4535                          FoldGenData<"VMOVSSZrrkz">,
4536                          Sched<[SchedWriteFShuffle.XMM]>;
4538   def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4539                            (ins VR128X:$src1, VR128X:$src2),
4540                            "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4541                            []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4542                            FoldGenData<"VMOVSDZrr">,
4543                            Sched<[SchedWriteFShuffle.XMM]>;
4545   let Constraints = "$src0 = $dst" in
4546   def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4547                              (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4548                                                    VR128X:$src1, VR128X:$src2),
4549                              "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4550                                         "$dst {${mask}}, $src1, $src2}",
4551                              []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4552                              VEX_W, FoldGenData<"VMOVSDZrrk">,
4553                              Sched<[SchedWriteFShuffle.XMM]>;
4555   def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4556                               (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4557                                                           VR128X:$src2),
4558                               "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4559                                          "$dst {${mask}} {z}, $src1, $src2}",
4560                               []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4561                               VEX_W, FoldGenData<"VMOVSDZrrkz">,
4562                               Sched<[SchedWriteFShuffle.XMM]>;
4565 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4566                 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4567 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4568                              "$dst {${mask}}, $src1, $src2}",
4569                 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4570                                 VR128X:$src1, VR128X:$src2), 0>;
4571 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4572                              "$dst {${mask}} {z}, $src1, $src2}",
4573                 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4574                                  VR128X:$src1, VR128X:$src2), 0>;
4575 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4576                 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4577 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4578                              "$dst {${mask}}, $src1, $src2}",
4579                 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4580                                 VR128X:$src1, VR128X:$src2), 0>;
4581 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4582                              "$dst {${mask}} {z}, $src1, $src2}",
4583                 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4584                                  VR128X:$src1, VR128X:$src2), 0>;
4585 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4586                 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4587 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4588                              "$dst {${mask}}, $src1, $src2}",
4589                 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4590                                 VR128X:$src1, VR128X:$src2), 0>;
4591 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4592                              "$dst {${mask}} {z}, $src1, $src2}",
4593                 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4594                                  VR128X:$src1, VR128X:$src2), 0>;
4596 let Predicates = [HasAVX512, OptForSize] in {
4597   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4598             (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4599   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4600             (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4602   // Move low f32 and clear high bits.
4603   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4604             (SUBREG_TO_REG (i32 0),
4605              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4606               (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4607   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4608             (SUBREG_TO_REG (i32 0),
4609              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4610               (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4612   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4613             (SUBREG_TO_REG (i32 0),
4614              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4615               (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4616   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4617             (SUBREG_TO_REG (i32 0),
4618              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4619               (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4622 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4623 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4624 let Predicates = [HasAVX512, OptForSpeed] in {
4625   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4626             (SUBREG_TO_REG (i32 0),
4627              (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4628                           (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4629                           (i8 1))), sub_xmm)>;
4630   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4631             (SUBREG_TO_REG (i32 0),
4632              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4633                           (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4634                           (i8 3))), sub_xmm)>;
4637 let Predicates = [HasAVX512] in {
4638   def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4639             (VMOVSSZrm addr:$src)>;
4640   def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4641             (VMOVSDZrm addr:$src)>;
4643   // Represent the same patterns above but in the form they appear for
4644   // 256-bit types
4645   def : Pat<(v8f32 (X86vzload32 addr:$src)),
4646             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4647   def : Pat<(v4f64 (X86vzload64 addr:$src)),
4648             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4650   // Represent the same patterns above but in the form they appear for
4651   // 512-bit types
4652   def : Pat<(v16f32 (X86vzload32 addr:$src)),
4653             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4654   def : Pat<(v8f64 (X86vzload64 addr:$src)),
4655             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4657 let Predicates = [HasFP16] in {
4658   def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4659             (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4661   // FIXME we need better canonicalization in dag combine
4662   def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4663             (SUBREG_TO_REG (i32 0),
4664              (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4665               (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4666   def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4667             (SUBREG_TO_REG (i32 0),
4668              (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4669               (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4671   def : Pat<(v8f16 (X86vzload16 addr:$src)),
4672             (VMOVSHZrm addr:$src)>;
4674   def : Pat<(v16f16 (X86vzload16 addr:$src)),
4675             (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4677   def : Pat<(v32f16 (X86vzload16 addr:$src)),
4678             (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4681 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4682 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4683                                 (ins VR128X:$src),
4684                                 "vmovq\t{$src, $dst|$dst, $src}",
4685                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
4686                                                    (v2i64 VR128X:$src))))]>,
4687                                 EVEX, VEX_W;
4690 let Predicates = [HasAVX512] in {
4691   def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4692             (VMOVDI2PDIZrr GR32:$src)>;
4694   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4695             (VMOV64toPQIZrr GR64:$src)>;
4697   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4698   def : Pat<(v4i32 (X86vzload32 addr:$src)),
4699             (VMOVDI2PDIZrm addr:$src)>;
4700   def : Pat<(v8i32 (X86vzload32 addr:$src)),
4701             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4702   def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4703             (VMOVZPQILo2PQIZrr VR128X:$src)>;
4704   def : Pat<(v2i64 (X86vzload64 addr:$src)),
4705             (VMOVQI2PQIZrm addr:$src)>;
4706   def : Pat<(v4i64 (X86vzload64 addr:$src)),
4707             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4709   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4710   def : Pat<(v16i32 (X86vzload32 addr:$src)),
4711             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4712   def : Pat<(v8i64 (X86vzload64 addr:$src)),
4713             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4715   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4716             (SUBREG_TO_REG (i32 0),
4717              (v2f64 (VMOVZPQILo2PQIZrr
4718                      (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4719              sub_xmm)>;
4720   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4721             (SUBREG_TO_REG (i32 0),
4722              (v2i64 (VMOVZPQILo2PQIZrr
4723                      (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4724              sub_xmm)>;
4726   def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4727             (SUBREG_TO_REG (i32 0),
4728              (v2f64 (VMOVZPQILo2PQIZrr
4729                      (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4730              sub_xmm)>;
4731   def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4732             (SUBREG_TO_REG (i32 0),
4733              (v2i64 (VMOVZPQILo2PQIZrr
4734                      (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4735              sub_xmm)>;
4738 //===----------------------------------------------------------------------===//
4739 // AVX-512 - Non-temporals
4740 //===----------------------------------------------------------------------===//
4742 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4743                       (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4744                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4745                       EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4747 let Predicates = [HasVLX] in {
4748   def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4749                        (ins i256mem:$src),
4750                        "vmovntdqa\t{$src, $dst|$dst, $src}",
4751                        [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4752                        EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4754   def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4755                       (ins i128mem:$src),
4756                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4757                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4758                       EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4761 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4762                         X86SchedWriteMoveLS Sched,
4763                         PatFrag st_frag = alignednontemporalstore> {
4764   let SchedRW = [Sched.MR], AddedComplexity = 400 in
4765   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4766                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4767                     [(st_frag (_.VT _.RC:$src), addr:$dst)],
4768                     _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4771 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4772                            AVX512VLVectorVTInfo VTInfo,
4773                            X86SchedWriteMoveLSWidths Sched> {
4774   let Predicates = [HasAVX512] in
4775     defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4777   let Predicates = [HasAVX512, HasVLX] in {
4778     defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4779     defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4780   }
4783 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4784                                 SchedWriteVecMoveLSNT>, PD;
4785 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4786                                 SchedWriteFMoveLSNT>, PD, VEX_W;
4787 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4788                                 SchedWriteFMoveLSNT>, PS;
4790 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4791   def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4792             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4793   def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4794             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4795   def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4796             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4798   def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4799             (VMOVNTDQAZrm addr:$src)>;
4800   def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4801             (VMOVNTDQAZrm addr:$src)>;
4802   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4803             (VMOVNTDQAZrm addr:$src)>;
4804   def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4805             (VMOVNTDQAZrm addr:$src)>;
4806   def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4807             (VMOVNTDQAZrm addr:$src)>;
4808   def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4809             (VMOVNTDQAZrm addr:$src)>;
4812 let Predicates = [HasVLX], AddedComplexity = 400 in {
4813   def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4814             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4815   def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4816             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4817   def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4818             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4820   def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4821             (VMOVNTDQAZ256rm addr:$src)>;
4822   def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4823             (VMOVNTDQAZ256rm addr:$src)>;
4824   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4825             (VMOVNTDQAZ256rm addr:$src)>;
4826   def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4827             (VMOVNTDQAZ256rm addr:$src)>;
4828   def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4829             (VMOVNTDQAZ256rm addr:$src)>;
4830   def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4831             (VMOVNTDQAZ256rm addr:$src)>;
4833   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4834             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4835   def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4836             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4837   def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4838             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4840   def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4841             (VMOVNTDQAZ128rm addr:$src)>;
4842   def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4843             (VMOVNTDQAZ128rm addr:$src)>;
4844   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4845             (VMOVNTDQAZ128rm addr:$src)>;
4846   def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4847             (VMOVNTDQAZ128rm addr:$src)>;
4848   def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4849             (VMOVNTDQAZ128rm addr:$src)>;
4850   def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4851             (VMOVNTDQAZ128rm addr:$src)>;
4854 //===----------------------------------------------------------------------===//
4855 // AVX-512 - Integer arithmetic
4857 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4858                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4859                            bit IsCommutable = 0> {
4860   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4861                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4862                     "$src2, $src1", "$src1, $src2",
4863                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4864                     IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4865                     Sched<[sched]>;
4867   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4868                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4869                   "$src2, $src1", "$src1, $src2",
4870                   (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4871                   AVX512BIBase, EVEX_4V,
4872                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4875 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4876                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
4877                             bit IsCommutable = 0> :
4878            avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4879   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4880                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4881                   "${src2}"#_.BroadcastStr#", $src1",
4882                   "$src1, ${src2}"#_.BroadcastStr,
4883                   (_.VT (OpNode _.RC:$src1,
4884                                 (_.BroadcastLdFrag addr:$src2)))>,
4885                   AVX512BIBase, EVEX_4V, EVEX_B,
4886                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4889 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4890                               AVX512VLVectorVTInfo VTInfo,
4891                               X86SchedWriteWidths sched, Predicate prd,
4892                               bit IsCommutable = 0> {
4893   let Predicates = [prd] in
4894     defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4895                              IsCommutable>, EVEX_V512;
4897   let Predicates = [prd, HasVLX] in {
4898     defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4899                                 sched.YMM, IsCommutable>, EVEX_V256;
4900     defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4901                                 sched.XMM, IsCommutable>, EVEX_V128;
4902   }
4905 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4906                                AVX512VLVectorVTInfo VTInfo,
4907                                X86SchedWriteWidths sched, Predicate prd,
4908                                bit IsCommutable = 0> {
4909   let Predicates = [prd] in
4910     defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4911                              IsCommutable>, EVEX_V512;
4913   let Predicates = [prd, HasVLX] in {
4914     defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4915                                  sched.YMM, IsCommutable>, EVEX_V256;
4916     defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4917                                  sched.XMM, IsCommutable>, EVEX_V128;
4918   }
4921 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4922                                 X86SchedWriteWidths sched, Predicate prd,
4923                                 bit IsCommutable = 0> {
4924   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4925                                   sched, prd, IsCommutable>,
4926                                   VEX_W, EVEX_CD8<64, CD8VF>;
4929 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4930                                 X86SchedWriteWidths sched, Predicate prd,
4931                                 bit IsCommutable = 0> {
4932   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4933                                   sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4936 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4937                                 X86SchedWriteWidths sched, Predicate prd,
4938                                 bit IsCommutable = 0> {
4939   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4940                                  sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4941                                  VEX_WIG;
4944 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4945                                 X86SchedWriteWidths sched, Predicate prd,
4946                                 bit IsCommutable = 0> {
4947   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4948                                  sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4949                                  VEX_WIG;
4952 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4953                                  SDNode OpNode, X86SchedWriteWidths sched,
4954                                  Predicate prd, bit IsCommutable = 0> {
4955   defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4956                                    IsCommutable>;
4958   defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4959                                    IsCommutable>;
4962 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4963                                  SDNode OpNode, X86SchedWriteWidths sched,
4964                                  Predicate prd, bit IsCommutable = 0> {
4965   defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4966                                    IsCommutable>;
4968   defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4969                                    IsCommutable>;
4972 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4973                                   bits<8> opc_d, bits<8> opc_q,
4974                                   string OpcodeStr, SDNode OpNode,
4975                                   X86SchedWriteWidths sched,
4976                                   bit IsCommutable = 0> {
4977   defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4978                                     sched, HasAVX512, IsCommutable>,
4979               avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4980                                     sched, HasBWI, IsCommutable>;
4983 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4984                             X86FoldableSchedWrite sched,
4985                             SDNode OpNode,X86VectorVTInfo _Src,
4986                             X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4987                             bit IsCommutable = 0> {
4988   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4989                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4990                             "$src2, $src1","$src1, $src2",
4991                             (_Dst.VT (OpNode
4992                                          (_Src.VT _Src.RC:$src1),
4993                                          (_Src.VT _Src.RC:$src2))),
4994                             IsCommutable>,
4995                             AVX512BIBase, EVEX_4V, Sched<[sched]>;
4996   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4997                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4998                         "$src2, $src1", "$src1, $src2",
4999                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5000                                       (_Src.LdFrag addr:$src2)))>,
5001                         AVX512BIBase, EVEX_4V,
5002                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5004   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5005                     (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
5006                     OpcodeStr,
5007                     "${src2}"#_Brdct.BroadcastStr#", $src1",
5008                      "$src1, ${src2}"#_Brdct.BroadcastStr,
5009                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5010                                  (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
5011                     AVX512BIBase, EVEX_4V, EVEX_B,
5012                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5015 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
5016                                     SchedWriteVecALU, 1>;
5017 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
5018                                     SchedWriteVecALU, 0>;
5019 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
5020                                     SchedWriteVecALU, HasBWI, 1>;
5021 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
5022                                     SchedWriteVecALU, HasBWI, 0>;
5023 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
5024                                      SchedWriteVecALU, HasBWI, 1>;
5025 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
5026                                      SchedWriteVecALU, HasBWI, 0>;
5027 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
5028                                     SchedWritePMULLD, HasAVX512, 1>, T8PD;
5029 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
5030                                     SchedWriteVecIMul, HasBWI, 1>;
5031 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
5032                                     SchedWriteVecIMul, HasDQI, 1>, T8PD,
5033                                     NotEVEX2VEXConvertible;
5034 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
5035                                     HasBWI, 1>;
5036 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
5037                                      HasBWI, 1>;
5038 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
5039                                       SchedWriteVecIMul, HasBWI, 1>, T8PD;
5040 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
5041                                    SchedWriteVecALU, HasBWI, 1>;
5042 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
5043                                     SchedWriteVecIMul, HasAVX512, 1>, T8PD;
5044 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
5045                                      SchedWriteVecIMul, HasAVX512, 1>;
5047 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
5048                             X86SchedWriteWidths sched,
5049                             AVX512VLVectorVTInfo _SrcVTInfo,
5050                             AVX512VLVectorVTInfo _DstVTInfo,
5051                             SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
5052   let Predicates = [prd] in
5053     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
5054                                  _SrcVTInfo.info512, _DstVTInfo.info512,
5055                                  v8i64_info, IsCommutable>,
5056                                   EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
5057   let Predicates = [HasVLX, prd] in {
5058     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
5059                                       _SrcVTInfo.info256, _DstVTInfo.info256,
5060                                       v4i64x_info, IsCommutable>,
5061                                       EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
5062     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
5063                                       _SrcVTInfo.info128, _DstVTInfo.info128,
5064                                       v2i64x_info, IsCommutable>,
5065                                      EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
5066   }
5069 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
5070                                 avx512vl_i8_info, avx512vl_i8_info,
5071                                 X86multishift, HasVBMI, 0>, T8PD;
5073 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5074                             X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
5075                             X86FoldableSchedWrite sched> {
5076   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5077                     (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
5078                     OpcodeStr,
5079                     "${src2}"#_Src.BroadcastStr#", $src1",
5080                      "$src1, ${src2}"#_Src.BroadcastStr,
5081                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5082                                  (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
5083                     EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
5084                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5087 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
5088                             SDNode OpNode,X86VectorVTInfo _Src,
5089                             X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
5090                             bit IsCommutable = 0> {
5091   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
5092                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5093                             "$src2, $src1","$src1, $src2",
5094                             (_Dst.VT (OpNode
5095                                          (_Src.VT _Src.RC:$src1),
5096                                          (_Src.VT _Src.RC:$src2))),
5097                             IsCommutable, IsCommutable>,
5098                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
5099   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5100                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5101                         "$src2, $src1", "$src1, $src2",
5102                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5103                                       (_Src.LdFrag addr:$src2)))>,
5104                          EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
5105                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5108 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
5109                                     SDNode OpNode> {
5110   let Predicates = [HasBWI] in
5111   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
5112                                  v32i16_info, SchedWriteShuffle.ZMM>,
5113                 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
5114                                  v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
5115   let Predicates = [HasBWI, HasVLX] in {
5116     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
5117                                      v16i16x_info, SchedWriteShuffle.YMM>,
5118                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
5119                                       v16i16x_info, SchedWriteShuffle.YMM>,
5120                                       EVEX_V256;
5121     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
5122                                      v8i16x_info, SchedWriteShuffle.XMM>,
5123                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
5124                                       v8i16x_info, SchedWriteShuffle.XMM>,
5125                                       EVEX_V128;
5126   }
5128 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
5129                             SDNode OpNode> {
5130   let Predicates = [HasBWI] in
5131   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
5132                                 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
5133   let Predicates = [HasBWI, HasVLX] in {
5134     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
5135                                      v32i8x_info, SchedWriteShuffle.YMM>,
5136                                      EVEX_V256, VEX_WIG;
5137     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
5138                                      v16i8x_info, SchedWriteShuffle.XMM>,
5139                                      EVEX_V128, VEX_WIG;
5140   }
5143 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
5144                             SDNode OpNode, AVX512VLVectorVTInfo _Src,
5145                             AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
5146   let Predicates = [HasBWI] in
5147   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
5148                                 _Dst.info512, SchedWriteVecIMul.ZMM,
5149                                 IsCommutable>, EVEX_V512;
5150   let Predicates = [HasBWI, HasVLX] in {
5151     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
5152                                      _Dst.info256, SchedWriteVecIMul.YMM,
5153                                      IsCommutable>, EVEX_V256;
5154     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
5155                                      _Dst.info128, SchedWriteVecIMul.XMM,
5156                                      IsCommutable>, EVEX_V128;
5157   }
5160 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
5161 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
5162 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
5163 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
5165 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
5166                      avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
5167 defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
5168                      avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
5170 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
5171                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
5172 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
5173                                     SchedWriteVecALU, HasBWI, 1>;
5174 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
5175                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
5176 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
5177                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
5178                                     NotEVEX2VEXConvertible;
5180 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
5181                                     SchedWriteVecALU, HasBWI, 1>;
5182 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
5183                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
5184 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
5185                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
5186 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
5187                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
5188                                     NotEVEX2VEXConvertible;
5190 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
5191                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
5192 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
5193                                     SchedWriteVecALU, HasBWI, 1>;
5194 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5195                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
5196 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5197                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
5198                                     NotEVEX2VEXConvertible;
5200 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5201                                     SchedWriteVecALU, HasBWI, 1>;
5202 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5203                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
5204 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5205                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
5206 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5207                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
5208                                     NotEVEX2VEXConvertible;
5210 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5211 let Predicates = [HasDQI, NoVLX] in {
5212   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5213             (EXTRACT_SUBREG
5214                 (VPMULLQZrr
5215                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5216                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5217              sub_ymm)>;
5218   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5219             (EXTRACT_SUBREG
5220                 (VPMULLQZrmb
5221                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5222                     addr:$src2),
5223              sub_ymm)>;
5225   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5226             (EXTRACT_SUBREG
5227                 (VPMULLQZrr
5228                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5229                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5230              sub_xmm)>;
5231   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5232             (EXTRACT_SUBREG
5233                 (VPMULLQZrmb
5234                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5235                     addr:$src2),
5236              sub_xmm)>;
5239 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5240   def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5241             (EXTRACT_SUBREG
5242                 (!cast<Instruction>(Instr#"rr")
5243                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5244                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5245              sub_ymm)>;
5246   def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5247             (EXTRACT_SUBREG
5248                 (!cast<Instruction>(Instr#"rmb")
5249                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5250                     addr:$src2),
5251              sub_ymm)>;
5253   def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5254             (EXTRACT_SUBREG
5255                 (!cast<Instruction>(Instr#"rr")
5256                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5257                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5258              sub_xmm)>;
5259   def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5260             (EXTRACT_SUBREG
5261                 (!cast<Instruction>(Instr#"rmb")
5262                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5263                     addr:$src2),
5264              sub_xmm)>;
5267 let Predicates = [HasAVX512, NoVLX] in {
5268   defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5269   defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5270   defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5271   defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5274 //===----------------------------------------------------------------------===//
5275 // AVX-512  Logical Instructions
5276 //===----------------------------------------------------------------------===//
5278 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5279                                    SchedWriteVecLogic, HasAVX512, 1>;
5280 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5281                                   SchedWriteVecLogic, HasAVX512, 1>;
5282 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5283                                    SchedWriteVecLogic, HasAVX512, 1>;
5284 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5285                                     SchedWriteVecLogic, HasAVX512>;
5287 let Predicates = [HasVLX] in {
5288   def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5289             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5290   def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5291             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5293   def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5294             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5295   def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5296             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5298   def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5299             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5300   def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5301             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5303   def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5304             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5305   def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5306             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5308   def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5309             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5310   def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5311             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5313   def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5314             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5315   def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5316             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5318   def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5319             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5320   def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5321             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5323   def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5324             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5325   def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5326             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5328   def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5329             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5330   def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5331             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5333   def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5334             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5335   def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5336             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5338   def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5339             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5340   def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5341             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5343   def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5344             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5345   def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5346             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5348   def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5349             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5350   def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5351             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5353   def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5354             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5355   def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5356             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5358   def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5359             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5360   def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5361             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5363   def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5364             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5365   def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5366             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5369 let Predicates = [HasAVX512] in {
5370   def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5371             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5372   def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5373             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5375   def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5376             (VPORQZrr VR512:$src1, VR512:$src2)>;
5377   def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5378             (VPORQZrr VR512:$src1, VR512:$src2)>;
5380   def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5381             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5382   def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5383             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5385   def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5386             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5387   def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5388             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5390   def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5391             (VPANDQZrm VR512:$src1, addr:$src2)>;
5392   def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5393             (VPANDQZrm VR512:$src1, addr:$src2)>;
5395   def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5396             (VPORQZrm VR512:$src1, addr:$src2)>;
5397   def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5398             (VPORQZrm VR512:$src1, addr:$src2)>;
5400   def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5401             (VPXORQZrm VR512:$src1, addr:$src2)>;
5402   def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5403             (VPXORQZrm VR512:$src1, addr:$src2)>;
5405   def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5406             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5407   def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5408             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5411 // Patterns to catch vselect with different type than logic op.
5412 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5413                                     X86VectorVTInfo _,
5414                                     X86VectorVTInfo IntInfo> {
5415   // Masked register-register logical operations.
5416   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5417                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5418                    _.RC:$src0)),
5419             (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5420              _.RC:$src1, _.RC:$src2)>;
5422   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5423                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5424                    _.ImmAllZerosV)),
5425             (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5426              _.RC:$src2)>;
5428   // Masked register-memory logical operations.
5429   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5430                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5431                                             (load addr:$src2)))),
5432                    _.RC:$src0)),
5433             (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5434              _.RC:$src1, addr:$src2)>;
5435   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5436                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5437                                             (load addr:$src2)))),
5438                    _.ImmAllZerosV)),
5439             (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5440              addr:$src2)>;
5443 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5444                                          X86VectorVTInfo _,
5445                                          X86VectorVTInfo IntInfo> {
5446   // Register-broadcast logical operations.
5447   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5448                    (bitconvert
5449                     (IntInfo.VT (OpNode _.RC:$src1,
5450                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5451                    _.RC:$src0)),
5452             (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5453              _.RC:$src1, addr:$src2)>;
5454   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5455                    (bitconvert
5456                     (IntInfo.VT (OpNode _.RC:$src1,
5457                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5458                    _.ImmAllZerosV)),
5459             (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5460              _.RC:$src1, addr:$src2)>;
5463 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5464                                          AVX512VLVectorVTInfo SelectInfo,
5465                                          AVX512VLVectorVTInfo IntInfo> {
5466 let Predicates = [HasVLX] in {
5467   defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5468                                  IntInfo.info128>;
5469   defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5470                                  IntInfo.info256>;
5472 let Predicates = [HasAVX512] in {
5473   defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5474                                  IntInfo.info512>;
5478 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5479                                                AVX512VLVectorVTInfo SelectInfo,
5480                                                AVX512VLVectorVTInfo IntInfo> {
5481 let Predicates = [HasVLX] in {
5482   defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5483                                        SelectInfo.info128, IntInfo.info128>;
5484   defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5485                                        SelectInfo.info256, IntInfo.info256>;
5487 let Predicates = [HasAVX512] in {
5488   defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5489                                        SelectInfo.info512, IntInfo.info512>;
5493 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5494   // i64 vselect with i32/i16/i8 logic op
5495   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5496                                        avx512vl_i32_info>;
5497   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5498                                        avx512vl_i16_info>;
5499   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5500                                        avx512vl_i8_info>;
5502   // i32 vselect with i64/i16/i8 logic op
5503   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5504                                        avx512vl_i64_info>;
5505   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5506                                        avx512vl_i16_info>;
5507   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5508                                        avx512vl_i8_info>;
5510   // f32 vselect with i64/i32/i16/i8 logic op
5511   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5512                                        avx512vl_i64_info>;
5513   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5514                                        avx512vl_i32_info>;
5515   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5516                                        avx512vl_i16_info>;
5517   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5518                                        avx512vl_i8_info>;
5520   // f64 vselect with i64/i32/i16/i8 logic op
5521   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5522                                        avx512vl_i64_info>;
5523   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5524                                        avx512vl_i32_info>;
5525   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5526                                        avx512vl_i16_info>;
5527   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5528                                        avx512vl_i8_info>;
5530   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5531                                              avx512vl_f32_info,
5532                                              avx512vl_i32_info>;
5533   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5534                                              avx512vl_f64_info,
5535                                              avx512vl_i64_info>;
5538 defm : avx512_logical_lowering_types<"VPAND", and>;
5539 defm : avx512_logical_lowering_types<"VPOR",  or>;
5540 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5541 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5543 //===----------------------------------------------------------------------===//
5544 // AVX-512  FP arithmetic
5545 //===----------------------------------------------------------------------===//
5547 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5548                             SDPatternOperator OpNode, SDNode VecNode,
5549                             X86FoldableSchedWrite sched, bit IsCommutable> {
5550   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5551   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5552                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5553                            "$src2, $src1", "$src1, $src2",
5554                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5555                            Sched<[sched]>;
5557   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5558                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5559                          "$src2, $src1", "$src1, $src2",
5560                          (_.VT (VecNode _.RC:$src1,
5561                                         (_.ScalarIntMemFrags addr:$src2)))>,
5562                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5563   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5564   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5565                          (ins _.FRC:$src1, _.FRC:$src2),
5566                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5567                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5568                           Sched<[sched]> {
5569     let isCommutable = IsCommutable;
5570   }
5571   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5572                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5573                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5574                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5575                          (_.ScalarLdFrag addr:$src2)))]>,
5576                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5577   }
5578   }
5581 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5582                                   SDNode VecNode, X86FoldableSchedWrite sched,
5583                                   bit IsCommutable = 0> {
5584   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5585   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5586                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5587                           "$rc, $src2, $src1", "$src1, $src2, $rc",
5588                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5589                           (i32 timm:$rc))>,
5590                           EVEX_B, EVEX_RC, Sched<[sched]>;
5592 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5593                                 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5594                                 X86FoldableSchedWrite sched, bit IsCommutable,
5595                                 string EVEX2VexOvrd> {
5596   let ExeDomain = _.ExeDomain in {
5597   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5598                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5599                            "$src2, $src1", "$src1, $src2",
5600                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5601                            Sched<[sched]>, SIMD_EXC;
5603   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5604                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5605                          "$src2, $src1", "$src1, $src2",
5606                          (_.VT (VecNode _.RC:$src1,
5607                                         (_.ScalarIntMemFrags addr:$src2)))>,
5608                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5610   let isCodeGenOnly = 1, Predicates = [HasAVX512],
5611       Uses = [MXCSR], mayRaiseFPException = 1 in {
5612   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5613                          (ins _.FRC:$src1, _.FRC:$src2),
5614                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5615                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5616                           Sched<[sched]>,
5617                           EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5618     let isCommutable = IsCommutable;
5619   }
5620   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5621                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5622                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5623                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5624                          (_.ScalarLdFrag addr:$src2)))]>,
5625                          Sched<[sched.Folded, sched.ReadAfterFold]>,
5626                          EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5627   }
5629   let Uses = [MXCSR] in
5630   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5631                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5632                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5633                             (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5634                             EVEX_B, Sched<[sched]>;
5635   }
5638 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5639                                 SDNode VecNode, SDNode RndNode,
5640                                 X86SchedWriteSizes sched, bit IsCommutable> {
5641   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5642                               sched.PS.Scl, IsCommutable>,
5643              avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5644                               sched.PS.Scl, IsCommutable>,
5645                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5646   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5647                               sched.PD.Scl, IsCommutable>,
5648              avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5649                               sched.PD.Scl, IsCommutable>,
5650                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5651   let Predicates = [HasFP16] in
5652     defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5653                                 VecNode, sched.PH.Scl, IsCommutable>,
5654                avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5655                                 sched.PH.Scl, IsCommutable>,
5656                                 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5659 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5660                               SDNode VecNode, SDNode SaeNode,
5661                               X86SchedWriteSizes sched, bit IsCommutable> {
5662   defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5663                               VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5664                               NAME#"SS">,
5665                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5666   defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5667                               VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5668                               NAME#"SD">,
5669                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5670   let Predicates = [HasFP16] in {
5671     defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5672                                 VecNode, SaeNode, sched.PH.Scl, IsCommutable,
5673                                 NAME#"SH">,
5674                                 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
5675                                 NotEVEX2VEXConvertible;
5676   }
5678 defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5679                                  SchedWriteFAddSizes, 1>;
5680 defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5681                                  SchedWriteFMulSizes, 1>;
5682 defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5683                                  SchedWriteFAddSizes, 0>;
5684 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5685                                  SchedWriteFDivSizes, 0>;
5686 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5687                                SchedWriteFCmpSizes, 0>;
5688 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5689                                SchedWriteFCmpSizes, 0>;
5691 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5692 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5693 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5694                                     X86VectorVTInfo _, SDNode OpNode,
5695                                     X86FoldableSchedWrite sched,
5696                                     string EVEX2VEXOvrd> {
5697   let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5698   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5699                          (ins _.FRC:$src1, _.FRC:$src2),
5700                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5701                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5702                           Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5703     let isCommutable = 1;
5704   }
5705   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5706                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5707                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5708                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5709                          (_.ScalarLdFrag addr:$src2)))]>,
5710                          Sched<[sched.Folded, sched.ReadAfterFold]>,
5711                          EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5712   }
5714 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5715                                          SchedWriteFCmp.Scl, "VMINCSS">, XS,
5716                                          EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5718 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5719                                          SchedWriteFCmp.Scl, "VMINCSD">, XD,
5720                                          VEX_W, EVEX_4V, VEX_LIG,
5721                                          EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5723 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5724                                          SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5725                                          EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5727 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5728                                          SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5729                                          VEX_W, EVEX_4V, VEX_LIG,
5730                                          EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5732 defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5733                                          SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
5734                                          EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5735                                          NotEVEX2VEXConvertible;
5736 defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5737                                          SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
5738                                          EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5739                                          NotEVEX2VEXConvertible;
5741 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5742                             SDPatternOperator MaskOpNode,
5743                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
5744                             bit IsCommutable,
5745                             bit IsKCommutable = IsCommutable> {
5746   let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5747       Uses = [MXCSR], mayRaiseFPException = 1 in {
5748   defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5749                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5750                   "$src2, $src1", "$src1, $src2",
5751                   (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5752                   (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5753                   IsKCommutable, IsKCommutable>,
5754                   EVEX_4V, Sched<[sched]>;
5755   let mayLoad = 1 in {
5756     defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5757                     (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5758                     "$src2, $src1", "$src1, $src2",
5759                     (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5760                     (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5761                     EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5762     defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5763                      (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5764                      "${src2}"#_.BroadcastStr#", $src1",
5765                      "$src1, ${src2}"#_.BroadcastStr,
5766                      (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5767                      (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5768                      EVEX_4V, EVEX_B,
5769                      Sched<[sched.Folded, sched.ReadAfterFold]>;
5770     }
5771   }
5774 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5775                                   SDPatternOperator OpNodeRnd,
5776                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5777   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5778   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5779                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix,
5780                   "$rc, $src2, $src1", "$src1, $src2, $rc",
5781                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5782                   EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5785 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5786                                 SDPatternOperator OpNodeSAE,
5787                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5788   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5789   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5790                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5791                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5792                   (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5793                   EVEX_4V, EVEX_B, Sched<[sched]>;
5796 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5797                              SDPatternOperator MaskOpNode,
5798                              Predicate prd, X86SchedWriteSizes sched,
5799                              bit IsCommutable = 0,
5800                              bit IsPD128Commutable = IsCommutable> {
5801   let Predicates = [prd] in {
5802   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5803                               sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5804                               EVEX_CD8<32, CD8VF>;
5805   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5806                               sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5807                               EVEX_CD8<64, CD8VF>;
5808   }
5810     // Define only if AVX512VL feature is present.
5811   let Predicates = [prd, HasVLX] in {
5812     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5813                                    sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5814                                    EVEX_CD8<32, CD8VF>;
5815     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5816                                    sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5817                                    EVEX_CD8<32, CD8VF>;
5818     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5819                                    sched.PD.XMM, IsPD128Commutable,
5820                                    IsCommutable>, EVEX_V128, PD, VEX_W,
5821                                    EVEX_CD8<64, CD8VF>;
5822     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5823                                    sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5824                                    EVEX_CD8<64, CD8VF>;
5825   }
5828 multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5829                               SDPatternOperator MaskOpNode,
5830                               X86SchedWriteSizes sched, bit IsCommutable = 0,
5831                               bit IsPD128Commutable = IsCommutable> {
5832   let Predicates = [HasFP16] in {
5833     defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5834                                 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
5835                                 EVEX_CD8<16, CD8VF>;
5836   }
5837   let Predicates = [HasVLX, HasFP16] in {
5838     defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5839                                    sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
5840                                    EVEX_CD8<16, CD8VF>;
5841     defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5842                                    sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
5843                                    EVEX_CD8<16, CD8VF>;
5844   }
5847 let Uses = [MXCSR] in
5848 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5849                                    X86SchedWriteSizes sched> {
5850   let Predicates = [HasFP16] in {
5851     defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5852                                       v32f16_info>,
5853                                       EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5854   }
5855   defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5856                                     v16f32_info>,
5857                                     EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5858   defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5859                                     v8f64_info>,
5860                                     EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5863 let Uses = [MXCSR] in
5864 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5865                                  X86SchedWriteSizes sched> {
5866   let Predicates = [HasFP16] in {
5867     defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5868                                     v32f16_info>,
5869                                     EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5870   }
5871   defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5872                                   v16f32_info>,
5873                                   EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5874   defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5875                                   v8f64_info>,
5876                                   EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5879 defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5880                               SchedWriteFAddSizes, 1>,
5881             avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5882             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5883 defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5884                               SchedWriteFMulSizes, 1>,
5885             avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5886             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5887 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5888                               SchedWriteFAddSizes>,
5889             avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5890             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5891 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5892                               SchedWriteFDivSizes>,
5893             avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5894             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5895 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5896                               SchedWriteFCmpSizes, 0>,
5897             avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5898             avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5899 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5900                               SchedWriteFCmpSizes, 0>,
5901             avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5902             avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5903 let isCodeGenOnly = 1 in {
5904   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5905                                  SchedWriteFCmpSizes, 1>,
5906                avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5907                                  SchedWriteFCmpSizes, 1>;
5908   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5909                                  SchedWriteFCmpSizes, 1>,
5910                avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5911                                  SchedWriteFCmpSizes, 1>;
5913 let Uses = []<Register>, mayRaiseFPException = 0 in {
5914 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5915                                SchedWriteFLogicSizes, 1>;
5916 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5917                                SchedWriteFLogicSizes, 0>;
5918 defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5919                                SchedWriteFLogicSizes, 1>;
5920 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5921                                SchedWriteFLogicSizes, 1>;
5924 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5925                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5926   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5927   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5928                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5929                   "$src2, $src1", "$src1, $src2",
5930                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5931                   EVEX_4V, Sched<[sched]>;
5932   defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5933                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5934                   "$src2, $src1", "$src1, $src2",
5935                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5936                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5937   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5938                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5939                    "${src2}"#_.BroadcastStr#", $src1",
5940                    "$src1, ${src2}"#_.BroadcastStr,
5941                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5942                    EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5943   }
5946 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5947                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5948   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5949   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5950                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5951                   "$src2, $src1", "$src1, $src2",
5952                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5953                   Sched<[sched]>;
5954   defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5955                   (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5956                   "$src2, $src1", "$src1, $src2",
5957                   (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5958                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5959   }
5962 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5963                                 X86SchedWriteWidths sched> {
5964   let Predicates = [HasFP16] in {
5965     defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
5966                avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
5967                                 EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
5968     defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
5969                avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
5970                              EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
5971   }
5972   defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5973              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5974                               EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
5975   defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5976              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5977                               EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
5978   defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5979              avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5980                                     X86scalefsRnd, sched.Scl>,
5981                                     EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
5982   defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5983              avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5984                                     X86scalefsRnd, sched.Scl>,
5985                                     EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD;
5987   // Define only if AVX512VL feature is present.
5988   let Predicates = [HasVLX] in {
5989     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5990                                    EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
5991     defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5992                                    EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
5993     defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5994                                    EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
5995     defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5996                                    EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
5997   }
5999   let Predicates = [HasFP16, HasVLX] in {
6000     defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
6001                                    EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6002     defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
6003                                    EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6004   }
6006 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
6007                                     SchedWriteFAdd>, NotEVEX2VEXConvertible;
6009 //===----------------------------------------------------------------------===//
6010 // AVX-512  VPTESTM instructions
6011 //===----------------------------------------------------------------------===//
6013 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
6014                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
6015                          string Name> {
6016   // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
6017   // There are just too many permutations due to commutability and bitcasts.
6018   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6019   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
6020                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6021                       "$src2, $src1", "$src1, $src2",
6022                    (null_frag), (null_frag), 1>,
6023                    EVEX_4V, Sched<[sched]>;
6024   let mayLoad = 1 in
6025   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6026                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6027                        "$src2, $src1", "$src1, $src2",
6028                    (null_frag), (null_frag)>,
6029                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6030                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6031   }
6034 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
6035                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6036   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
6037   defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6038                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6039                     "${src2}"#_.BroadcastStr#", $src1",
6040                     "$src1, ${src2}"#_.BroadcastStr,
6041                     (null_frag), (null_frag)>,
6042                     EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6043                     Sched<[sched.Folded, sched.ReadAfterFold]>;
6046 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
6047                                   X86SchedWriteWidths sched,
6048                                   AVX512VLVectorVTInfo _> {
6049   let Predicates  = [HasAVX512] in
6050   defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
6051            avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
6053   let Predicates = [HasAVX512, HasVLX] in {
6054   defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
6055               avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
6056   defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
6057               avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
6058   }
6061 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
6062                             X86SchedWriteWidths sched> {
6063   defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
6064                                  avx512vl_i32_info>;
6065   defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
6066                                  avx512vl_i64_info>, VEX_W;
6069 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
6070                             X86SchedWriteWidths sched> {
6071   let Predicates = [HasBWI] in {
6072   defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
6073                             v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
6074   defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
6075                             v64i8_info, NAME#"B">, EVEX_V512;
6076   }
6077   let Predicates = [HasVLX, HasBWI] in {
6079   defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
6080                             v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
6081   defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
6082                             v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
6083   defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
6084                             v32i8x_info, NAME#"B">, EVEX_V256;
6085   defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
6086                             v16i8x_info, NAME#"B">, EVEX_V128;
6087   }
6090 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
6091                                    X86SchedWriteWidths sched> :
6092   avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
6093   avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
6095 defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
6096                                          SchedWriteVecLogic>, T8PD;
6097 defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
6098                                          SchedWriteVecLogic>, T8XS;
6100 //===----------------------------------------------------------------------===//
6101 // AVX-512  Shift instructions
6102 //===----------------------------------------------------------------------===//
6104 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
6105                             string OpcodeStr, SDNode OpNode,
6106                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6107   let ExeDomain = _.ExeDomain in {
6108   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
6109                    (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
6110                       "$src2, $src1", "$src1, $src2",
6111                    (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
6112                    Sched<[sched]>;
6113   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6114                    (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
6115                        "$src2, $src1", "$src1, $src2",
6116                    (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
6117                           (i8 timm:$src2)))>,
6118                    Sched<[sched.Folded]>;
6119   }
6122 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
6123                              string OpcodeStr, SDNode OpNode,
6124                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6125   let ExeDomain = _.ExeDomain in
6126   defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6127                    (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
6128       "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
6129      (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
6130      EVEX_B, Sched<[sched.Folded]>;
6133 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6134                             X86FoldableSchedWrite sched, ValueType SrcVT,
6135                             X86VectorVTInfo _> {
6136    // src2 is always 128-bit
6137   let ExeDomain = _.ExeDomain in {
6138   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6139                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
6140                       "$src2, $src1", "$src1, $src2",
6141                    (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
6142                    AVX512BIBase, EVEX_4V, Sched<[sched]>;
6143   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6144                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
6145                        "$src2, $src1", "$src1, $src2",
6146                    (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
6147                    AVX512BIBase,
6148                    EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6149   }
6152 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6153                               X86SchedWriteWidths sched, ValueType SrcVT,
6154                               AVX512VLVectorVTInfo VTInfo,
6155                               Predicate prd> {
6156   let Predicates = [prd] in
6157   defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
6158                                VTInfo.info512>, EVEX_V512,
6159                                EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
6160   let Predicates = [prd, HasVLX] in {
6161   defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
6162                                VTInfo.info256>, EVEX_V256,
6163                                EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
6164   defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
6165                                VTInfo.info128>, EVEX_V128,
6166                                EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
6167   }
6170 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
6171                               string OpcodeStr, SDNode OpNode,
6172                               X86SchedWriteWidths sched,
6173                               bit NotEVEX2VEXConvertibleQ = 0> {
6174   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
6175                               avx512vl_i32_info, HasAVX512>;
6176   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6177   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
6178                               avx512vl_i64_info, HasAVX512>, VEX_W;
6179   defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
6180                               avx512vl_i16_info, HasBWI>;
6183 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6184                                   string OpcodeStr, SDNode OpNode,
6185                                   X86SchedWriteWidths sched,
6186                                   AVX512VLVectorVTInfo VTInfo> {
6187   let Predicates = [HasAVX512] in
6188   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6189                               sched.ZMM, VTInfo.info512>,
6190              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
6191                                VTInfo.info512>, EVEX_V512;
6192   let Predicates = [HasAVX512, HasVLX] in {
6193   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6194                               sched.YMM, VTInfo.info256>,
6195              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
6196                                VTInfo.info256>, EVEX_V256;
6197   defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6198                               sched.XMM, VTInfo.info128>,
6199              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
6200                                VTInfo.info128>, EVEX_V128;
6201   }
6204 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
6205                               string OpcodeStr, SDNode OpNode,
6206                               X86SchedWriteWidths sched> {
6207   let Predicates = [HasBWI] in
6208   defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6209                                sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
6210   let Predicates = [HasVLX, HasBWI] in {
6211   defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6212                                sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
6213   defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6214                                sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
6215   }
6218 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6219                                Format ImmFormR, Format ImmFormM,
6220                                string OpcodeStr, SDNode OpNode,
6221                                X86SchedWriteWidths sched,
6222                                bit NotEVEX2VEXConvertibleQ = 0> {
6223   defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6224                                  sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6225   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6226   defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6227                                  sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
6230 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6231                                  SchedWriteVecShiftImm>,
6232              avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6233                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6235 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6236                                  SchedWriteVecShiftImm>,
6237              avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6238                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6240 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6241                                  SchedWriteVecShiftImm, 1>,
6242              avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6243                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6245 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6246                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6247 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6248                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6250 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6251                                 SchedWriteVecShift>;
6252 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6253                                 SchedWriteVecShift, 1>;
6254 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6255                                 SchedWriteVecShift>;
6257 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6258 let Predicates = [HasAVX512, NoVLX] in {
6259   def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6260             (EXTRACT_SUBREG (v8i64
6261               (VPSRAQZrr
6262                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6263                  VR128X:$src2)), sub_ymm)>;
6265   def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6266             (EXTRACT_SUBREG (v8i64
6267               (VPSRAQZrr
6268                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6269                  VR128X:$src2)), sub_xmm)>;
6271   def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6272             (EXTRACT_SUBREG (v8i64
6273               (VPSRAQZri
6274                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6275                  timm:$src2)), sub_ymm)>;
6277   def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6278             (EXTRACT_SUBREG (v8i64
6279               (VPSRAQZri
6280                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6281                  timm:$src2)), sub_xmm)>;
6284 //===-------------------------------------------------------------------===//
6285 // Variable Bit Shifts
6286 //===-------------------------------------------------------------------===//
6288 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6289                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6290   let ExeDomain = _.ExeDomain in {
6291   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6292                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6293                       "$src2, $src1", "$src1, $src2",
6294                    (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6295                    AVX5128IBase, EVEX_4V, Sched<[sched]>;
6296   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6297                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6298                        "$src2, $src1", "$src1, $src2",
6299                    (_.VT (OpNode _.RC:$src1,
6300                    (_.VT (_.LdFrag addr:$src2))))>,
6301                    AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6302                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6303   }
6306 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6307                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6308   let ExeDomain = _.ExeDomain in
6309   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6310                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6311                     "${src2}"#_.BroadcastStr#", $src1",
6312                     "$src1, ${src2}"#_.BroadcastStr,
6313                     (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6314                     AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6315                     Sched<[sched.Folded, sched.ReadAfterFold]>;
6318 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6319                                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6320   let Predicates  = [HasAVX512] in
6321   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6322            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6324   let Predicates = [HasAVX512, HasVLX] in {
6325   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6326               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6327   defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6328               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6329   }
6332 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6333                                   SDNode OpNode, X86SchedWriteWidths sched> {
6334   defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6335                                  avx512vl_i32_info>;
6336   defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6337                                  avx512vl_i64_info>, VEX_W;
6340 // Use 512bit version to implement 128/256 bit in case NoVLX.
6341 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6342                                      SDNode OpNode, list<Predicate> p> {
6343   let Predicates = p in {
6344   def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6345                                   (_.info256.VT _.info256.RC:$src2))),
6346             (EXTRACT_SUBREG
6347                 (!cast<Instruction>(OpcodeStr#"Zrr")
6348                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6349                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6350              sub_ymm)>;
6352   def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6353                                   (_.info128.VT _.info128.RC:$src2))),
6354             (EXTRACT_SUBREG
6355                 (!cast<Instruction>(OpcodeStr#"Zrr")
6356                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6357                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6358              sub_xmm)>;
6359   }
6361 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6362                               SDNode OpNode, X86SchedWriteWidths sched> {
6363   let Predicates = [HasBWI] in
6364   defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6365               EVEX_V512, VEX_W;
6366   let Predicates = [HasVLX, HasBWI] in {
6368   defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6369               EVEX_V256, VEX_W;
6370   defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6371               EVEX_V128, VEX_W;
6372   }
6375 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6376               avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6378 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6379               avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6381 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6382               avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6384 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6385 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6387 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6388 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6389 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6390 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6393 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6394 let Predicates = [HasAVX512, NoVLX] in {
6395   def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6396             (EXTRACT_SUBREG (v8i64
6397               (VPROLVQZrr
6398                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6399                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6400                        sub_xmm)>;
6401   def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6402             (EXTRACT_SUBREG (v8i64
6403               (VPROLVQZrr
6404                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6405                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6406                        sub_ymm)>;
6408   def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6409             (EXTRACT_SUBREG (v16i32
6410               (VPROLVDZrr
6411                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6412                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6413                         sub_xmm)>;
6414   def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6415             (EXTRACT_SUBREG (v16i32
6416               (VPROLVDZrr
6417                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6418                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6419                         sub_ymm)>;
6421   def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6422             (EXTRACT_SUBREG (v8i64
6423               (VPROLQZri
6424                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6425                         timm:$src2)), sub_xmm)>;
6426   def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6427             (EXTRACT_SUBREG (v8i64
6428               (VPROLQZri
6429                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6430                        timm:$src2)), sub_ymm)>;
6432   def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6433             (EXTRACT_SUBREG (v16i32
6434               (VPROLDZri
6435                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6436                         timm:$src2)), sub_xmm)>;
6437   def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6438             (EXTRACT_SUBREG (v16i32
6439               (VPROLDZri
6440                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6441                         timm:$src2)), sub_ymm)>;
6444 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6445 let Predicates = [HasAVX512, NoVLX] in {
6446   def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6447             (EXTRACT_SUBREG (v8i64
6448               (VPRORVQZrr
6449                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6450                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6451                        sub_xmm)>;
6452   def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6453             (EXTRACT_SUBREG (v8i64
6454               (VPRORVQZrr
6455                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6456                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6457                        sub_ymm)>;
6459   def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6460             (EXTRACT_SUBREG (v16i32
6461               (VPRORVDZrr
6462                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6463                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6464                         sub_xmm)>;
6465   def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6466             (EXTRACT_SUBREG (v16i32
6467               (VPRORVDZrr
6468                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6469                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6470                         sub_ymm)>;
6472   def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6473             (EXTRACT_SUBREG (v8i64
6474               (VPRORQZri
6475                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6476                         timm:$src2)), sub_xmm)>;
6477   def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6478             (EXTRACT_SUBREG (v8i64
6479               (VPRORQZri
6480                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6481                        timm:$src2)), sub_ymm)>;
6483   def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6484             (EXTRACT_SUBREG (v16i32
6485               (VPRORDZri
6486                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6487                         timm:$src2)), sub_xmm)>;
6488   def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6489             (EXTRACT_SUBREG (v16i32
6490               (VPRORDZri
6491                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6492                         timm:$src2)), sub_ymm)>;
6495 //===-------------------------------------------------------------------===//
6496 // 1-src variable permutation VPERMW/D/Q
6497 //===-------------------------------------------------------------------===//
6499 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6500                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6501   let Predicates  = [HasAVX512] in
6502   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6503            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6505   let Predicates = [HasAVX512, HasVLX] in
6506   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6507               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6510 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6511                                  string OpcodeStr, SDNode OpNode,
6512                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6513   let Predicates = [HasAVX512] in
6514   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6515                               sched, VTInfo.info512>,
6516              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6517                                sched, VTInfo.info512>, EVEX_V512;
6518   let Predicates = [HasAVX512, HasVLX] in
6519   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6520                               sched, VTInfo.info256>,
6521              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6522                                sched, VTInfo.info256>, EVEX_V256;
6525 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6526                               Predicate prd, SDNode OpNode,
6527                               X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6528   let Predicates = [prd] in
6529   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6530               EVEX_V512 ;
6531   let Predicates = [HasVLX, prd] in {
6532   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6533               EVEX_V256 ;
6534   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6535               EVEX_V128 ;
6536   }
6539 defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6540                                WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6541 defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6542                                WriteVarShuffle256, avx512vl_i8_info>;
6544 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6545                                     WriteVarShuffle256, avx512vl_i32_info>;
6546 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6547                                     WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6548 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6549                                      WriteFVarShuffle256, avx512vl_f32_info>;
6550 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6551                                      WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6553 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6554                              X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6555                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6556 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6557                              X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6558                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6560 //===----------------------------------------------------------------------===//
6561 // AVX-512 - VPERMIL
6562 //===----------------------------------------------------------------------===//
6564 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6565                              X86FoldableSchedWrite sched, X86VectorVTInfo _,
6566                              X86VectorVTInfo Ctrl> {
6567   defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6568                   (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6569                   "$src2, $src1", "$src1, $src2",
6570                   (_.VT (OpNode _.RC:$src1,
6571                                (Ctrl.VT Ctrl.RC:$src2)))>,
6572                   T8PD, EVEX_4V, Sched<[sched]>;
6573   defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6574                   (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6575                   "$src2, $src1", "$src1, $src2",
6576                   (_.VT (OpNode
6577                            _.RC:$src1,
6578                            (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6579                   T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6580                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6581   defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6582                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6583                    "${src2}"#_.BroadcastStr#", $src1",
6584                    "$src1, ${src2}"#_.BroadcastStr,
6585                    (_.VT (OpNode
6586                             _.RC:$src1,
6587                             (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6588                    T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6589                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6592 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6593                                     X86SchedWriteWidths sched,
6594                                     AVX512VLVectorVTInfo _,
6595                                     AVX512VLVectorVTInfo Ctrl> {
6596   let Predicates = [HasAVX512] in {
6597     defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6598                                   _.info512, Ctrl.info512>, EVEX_V512;
6599   }
6600   let Predicates = [HasAVX512, HasVLX] in {
6601     defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6602                                   _.info128, Ctrl.info128>, EVEX_V128;
6603     defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6604                                   _.info256, Ctrl.info256>, EVEX_V256;
6605   }
6608 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6609                          AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6610   defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6611                                       _, Ctrl>;
6612   defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6613                                     X86VPermilpi, SchedWriteFShuffle, _>,
6614                     EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6617 let ExeDomain = SSEPackedSingle in
6618 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6619                                avx512vl_i32_info>;
6620 let ExeDomain = SSEPackedDouble in
6621 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6622                                avx512vl_i64_info>, VEX_W1X;
6624 //===----------------------------------------------------------------------===//
6625 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6626 //===----------------------------------------------------------------------===//
6628 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6629                              X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6630                              EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6631 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6632                                   X86PShufhw, SchedWriteShuffle>,
6633                                   EVEX, AVX512XSIi8Base;
6634 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6635                                   X86PShuflw, SchedWriteShuffle>,
6636                                   EVEX, AVX512XDIi8Base;
6638 //===----------------------------------------------------------------------===//
6639 // AVX-512 - VPSHUFB
6640 //===----------------------------------------------------------------------===//
6642 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6643                                X86SchedWriteWidths sched> {
6644   let Predicates = [HasBWI] in
6645   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6646                               EVEX_V512;
6648   let Predicates = [HasVLX, HasBWI] in {
6649   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6650                               EVEX_V256;
6651   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6652                               EVEX_V128;
6653   }
6656 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6657                                   SchedWriteVarShuffle>, VEX_WIG;
6659 //===----------------------------------------------------------------------===//
6660 // Move Low to High and High to Low packed FP Instructions
6661 //===----------------------------------------------------------------------===//
6663 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6664           (ins VR128X:$src1, VR128X:$src2),
6665           "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6666           [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6667           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6668 let isCommutable = 1 in
6669 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6670           (ins VR128X:$src1, VR128X:$src2),
6671           "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6672           [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6673           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6675 //===----------------------------------------------------------------------===//
6676 // VMOVHPS/PD VMOVLPS Instructions
6677 // All patterns was taken from SSS implementation.
6678 //===----------------------------------------------------------------------===//
6680 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6681                                   SDPatternOperator OpNode,
6682                                   X86VectorVTInfo _> {
6683   let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6684   def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6685                   (ins _.RC:$src1, f64mem:$src2),
6686                   !strconcat(OpcodeStr,
6687                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6688                   [(set _.RC:$dst,
6689                      (OpNode _.RC:$src1,
6690                        (_.VT (bitconvert
6691                          (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6692                   Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6695 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6696 // SSE1. And MOVLPS pattern is even more complex.
6697 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6698                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6699 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6700                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6701 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6702                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6703 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6704                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6706 let Predicates = [HasAVX512] in {
6707   // VMOVHPD patterns
6708   def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6709             (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6711   // VMOVLPD patterns
6712   def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6713             (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6716 let SchedRW = [WriteFStore] in {
6717 let mayStore = 1, hasSideEffects = 0 in
6718 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6719                        (ins f64mem:$dst, VR128X:$src),
6720                        "vmovhps\t{$src, $dst|$dst, $src}",
6721                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6722 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6723                        (ins f64mem:$dst, VR128X:$src),
6724                        "vmovhpd\t{$src, $dst|$dst, $src}",
6725                        [(store (f64 (extractelt
6726                                      (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6727                                      (iPTR 0))), addr:$dst)]>,
6728                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6729 let mayStore = 1, hasSideEffects = 0 in
6730 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6731                        (ins f64mem:$dst, VR128X:$src),
6732                        "vmovlps\t{$src, $dst|$dst, $src}",
6733                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6734 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6735                        (ins f64mem:$dst, VR128X:$src),
6736                        "vmovlpd\t{$src, $dst|$dst, $src}",
6737                        [(store (f64 (extractelt (v2f64 VR128X:$src),
6738                                      (iPTR 0))), addr:$dst)]>,
6739                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6740 } // SchedRW
6742 let Predicates = [HasAVX512] in {
6743   // VMOVHPD patterns
6744   def : Pat<(store (f64 (extractelt
6745                            (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6746                            (iPTR 0))), addr:$dst),
6747            (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6749 //===----------------------------------------------------------------------===//
6750 // FMA - Fused Multiply Operations
6753 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6754                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6755                                X86VectorVTInfo _, string Suff> {
6756   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6757       Uses = [MXCSR], mayRaiseFPException = 1 in {
6758   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6759           (ins _.RC:$src2, _.RC:$src3),
6760           OpcodeStr, "$src3, $src2", "$src2, $src3",
6761           (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6762           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6763           AVX512FMA3Base, Sched<[sched]>;
6765   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6766           (ins _.RC:$src2, _.MemOp:$src3),
6767           OpcodeStr, "$src3, $src2", "$src2, $src3",
6768           (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6769           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6770           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6772   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6773             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6774             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6775             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6776             (OpNode _.RC:$src2,
6777              _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6778             (MaskOpNode _.RC:$src2,
6779              _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6780             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6781   }
6784 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6785                                  X86FoldableSchedWrite sched,
6786                                  X86VectorVTInfo _, string Suff> {
6787   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6788       Uses = [MXCSR] in
6789   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6790           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6791           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6792           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6793           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6794           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6797 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6798                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6799                                    X86SchedWriteWidths sched,
6800                                    AVX512VLVectorVTInfo _, string Suff> {
6801   let Predicates = [HasAVX512] in {
6802     defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6803                                       sched.ZMM, _.info512, Suff>,
6804                   avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6805                                         _.info512, Suff>,
6806                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6807   }
6808   let Predicates = [HasVLX, HasAVX512] in {
6809     defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6810                                     sched.YMM, _.info256, Suff>,
6811                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6812     defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6813                                     sched.XMM, _.info128, Suff>,
6814                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6815   }
6818 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6819                               SDNode MaskOpNode, SDNode OpNodeRnd> {
6820     defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6821                                       OpNodeRnd, SchedWriteFMA,
6822                                       avx512vl_f32_info, "PS">;
6823     defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6824                                       OpNodeRnd, SchedWriteFMA,
6825                                       avx512vl_f64_info, "PD">, VEX_W;
6828 defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6829                                        fma, X86FmaddRnd>;
6830 defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6831                                        X86Fmsub, X86FmsubRnd>;
6832 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6833                                        X86Fmaddsub, X86FmaddsubRnd>;
6834 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6835                                        X86Fmsubadd, X86FmsubaddRnd>;
6836 defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6837                                        X86Fnmadd, X86FnmaddRnd>;
6838 defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6839                                        X86Fnmsub, X86FnmsubRnd>;
6842 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6843                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6844                                X86VectorVTInfo _, string Suff> {
6845   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6846       Uses = [MXCSR], mayRaiseFPException = 1 in {
6847   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6848           (ins _.RC:$src2, _.RC:$src3),
6849           OpcodeStr, "$src3, $src2", "$src2, $src3",
6850           (null_frag),
6851           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6852           AVX512FMA3Base, Sched<[sched]>;
6854   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6855           (ins _.RC:$src2, _.MemOp:$src3),
6856           OpcodeStr, "$src3, $src2", "$src2, $src3",
6857           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6858           (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6859           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6861   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6862          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6863          OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6864          "$src2, ${src3}"#_.BroadcastStr,
6865          (_.VT (OpNode _.RC:$src2,
6866                       (_.VT (_.BroadcastLdFrag addr:$src3)),
6867                       _.RC:$src1)),
6868          (_.VT (MaskOpNode _.RC:$src2,
6869                            (_.VT (_.BroadcastLdFrag addr:$src3)),
6870                            _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6871          Sched<[sched.Folded, sched.ReadAfterFold]>;
6872   }
6875 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6876                                  X86FoldableSchedWrite sched,
6877                                  X86VectorVTInfo _, string Suff> {
6878   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6879       Uses = [MXCSR] in
6880   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6881           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6882           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6883           (null_frag),
6884           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6885           1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6888 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6889                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6890                                    X86SchedWriteWidths sched,
6891                                    AVX512VLVectorVTInfo _, string Suff> {
6892   let Predicates = [HasAVX512] in {
6893     defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6894                                       sched.ZMM, _.info512, Suff>,
6895                   avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6896                                         _.info512, Suff>,
6897                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6898   }
6899   let Predicates = [HasVLX, HasAVX512] in {
6900     defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6901                                     sched.YMM, _.info256, Suff>,
6902                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6903     defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6904                                     sched.XMM, _.info128, Suff>,
6905                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6906   }
6909 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6910                               SDNode MaskOpNode, SDNode OpNodeRnd > {
6911     defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6912                                       OpNodeRnd, SchedWriteFMA,
6913                                       avx512vl_f32_info, "PS">;
6914     defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6915                                       OpNodeRnd, SchedWriteFMA,
6916                                       avx512vl_f64_info, "PD">, VEX_W;
6919 defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6920                                        fma, X86FmaddRnd>;
6921 defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6922                                        X86Fmsub, X86FmsubRnd>;
6923 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6924                                        X86Fmaddsub, X86FmaddsubRnd>;
6925 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6926                                        X86Fmsubadd, X86FmsubaddRnd>;
6927 defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6928                                        X86Fnmadd, X86FnmaddRnd>;
6929 defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6930                                        X86Fnmsub, X86FnmsubRnd>;
6932 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6933                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6934                                X86VectorVTInfo _, string Suff> {
6935   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6936       Uses = [MXCSR], mayRaiseFPException = 1 in {
6937   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6938           (ins _.RC:$src2, _.RC:$src3),
6939           OpcodeStr, "$src3, $src2", "$src2, $src3",
6940           (null_frag),
6941           (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6942           AVX512FMA3Base, Sched<[sched]>;
6944   // Pattern is 312 order so that the load is in a different place from the
6945   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6946   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6947           (ins _.RC:$src2, _.MemOp:$src3),
6948           OpcodeStr, "$src3, $src2", "$src2, $src3",
6949           (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6950           (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6951           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6953   // Pattern is 312 order so that the load is in a different place from the
6954   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6955   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6956          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6957          OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6958          "$src2, ${src3}"#_.BroadcastStr,
6959          (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6960                        _.RC:$src1, _.RC:$src2)),
6961          (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6962                            _.RC:$src1, _.RC:$src2)), 1, 0>,
6963          AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6964   }
6967 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6968                                  X86FoldableSchedWrite sched,
6969                                  X86VectorVTInfo _, string Suff> {
6970   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6971       Uses = [MXCSR] in
6972   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6973           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6974           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6975           (null_frag),
6976           (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6977           1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6980 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6981                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6982                                    X86SchedWriteWidths sched,
6983                                    AVX512VLVectorVTInfo _, string Suff> {
6984   let Predicates = [HasAVX512] in {
6985     defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6986                                       sched.ZMM, _.info512, Suff>,
6987                   avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6988                                         _.info512, Suff>,
6989                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6990   }
6991   let Predicates = [HasVLX, HasAVX512] in {
6992     defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6993                                     sched.YMM, _.info256, Suff>,
6994                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6995     defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6996                                     sched.XMM, _.info128, Suff>,
6997                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6998   }
7001 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7002                               SDNode MaskOpNode, SDNode OpNodeRnd > {
7003     defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
7004                                       OpNodeRnd, SchedWriteFMA,
7005                                       avx512vl_f32_info, "PS">;
7006     defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
7007                                       OpNodeRnd, SchedWriteFMA,
7008                                       avx512vl_f64_info, "PD">, VEX_W;
7011 defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
7012                                        fma, X86FmaddRnd>;
7013 defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
7014                                        X86Fmsub, X86FmsubRnd>;
7015 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
7016                                        X86Fmaddsub, X86FmaddsubRnd>;
7017 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
7018                                        X86Fmsubadd, X86FmsubaddRnd>;
7019 defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
7020                                        X86Fnmadd, X86FnmaddRnd>;
7021 defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
7022                                        X86Fnmsub, X86FnmsubRnd>;
7024 // Scalar FMA
7025 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7026                                dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
7027 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
7028   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7029           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
7030           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7031           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
7033   let mayLoad = 1 in
7034   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7035           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
7036           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7037           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
7039   let Uses = [MXCSR] in
7040   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7041          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7042          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
7043          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
7045   let isCodeGenOnly = 1, isCommutable = 1 in {
7046     def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
7047                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
7048                      !strconcat(OpcodeStr,
7049                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7050                      !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
7051     def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
7052                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
7053                     !strconcat(OpcodeStr,
7054                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7055                     [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
7057     let Uses = [MXCSR] in
7058     def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
7059                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
7060                      !strconcat(OpcodeStr,
7061                               "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
7062                      !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
7063                      Sched<[SchedWriteFMA.Scl]>;
7064   }// isCodeGenOnly = 1
7065 }// Constraints = "$src1 = $dst"
7068 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7069                             string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
7070                             X86VectorVTInfo _, string SUFF> {
7071   let ExeDomain = _.ExeDomain in {
7072   defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
7073                 // Operands for intrinsic are in 123 order to preserve passthu
7074                 // semantics.
7075                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7076                          _.FRC:$src3))),
7077                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7078                          (_.ScalarLdFrag addr:$src3)))),
7079                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
7080                          _.FRC:$src3, (i32 timm:$rc)))), 0>;
7082   defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
7083                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
7084                                           _.FRC:$src1))),
7085                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
7086                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
7087                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
7088                          _.FRC:$src1, (i32 timm:$rc)))), 1>;
7090   // One pattern is 312 order so that the load is in a different place from the
7091   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7092   defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
7093                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
7094                          _.FRC:$src2))),
7095                 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
7096                                  _.FRC:$src1, _.FRC:$src2))),
7097                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
7098                          _.FRC:$src2, (i32 timm:$rc)))), 1>;
7099   }
7102 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7103                         string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
7104   let Predicates = [HasAVX512] in {
7105     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7106                                  OpNodeRnd, f32x_info, "SS">,
7107                                  EVEX_CD8<32, CD8VT1>, VEX_LIG;
7108     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7109                                  OpNodeRnd, f64x_info, "SD">,
7110                                  EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
7111   }
7114 defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
7115 defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
7116 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
7117 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
7119 multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
7120                                       SDNode RndOp, string Prefix,
7121                                       string Suffix, SDNode Move,
7122                                       X86VectorVTInfo _, PatLeaf ZeroFP> {
7123   let Predicates = [HasAVX512] in {
7124     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7125                 (Op _.FRC:$src2,
7126                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7127                     _.FRC:$src3))))),
7128               (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
7129                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7130                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7132     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7133                 (Op _.FRC:$src2, _.FRC:$src3,
7134                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7135               (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
7136                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7137                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7139     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7140                 (Op _.FRC:$src2,
7141                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7142                     (_.ScalarLdFrag addr:$src3)))))),
7143               (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
7144                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7145                addr:$src3)>;
7147     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7148                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7149                     (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
7150               (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
7151                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7152                addr:$src3)>;
7154     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7155                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7156                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7157               (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
7158                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7159                addr:$src3)>;
7161     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7162                (X86selects_mask VK1WM:$mask,
7163                 (MaskedOp _.FRC:$src2,
7164                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7165                     _.FRC:$src3),
7166                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7167               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
7168                VR128X:$src1, VK1WM:$mask,
7169                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7170                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7172     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7173                (X86selects_mask VK1WM:$mask,
7174                 (MaskedOp _.FRC:$src2,
7175                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7176                     (_.ScalarLdFrag addr:$src3)),
7177                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7178               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
7179                VR128X:$src1, VK1WM:$mask,
7180                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7182     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7183                (X86selects_mask VK1WM:$mask,
7184                 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7185                           (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7186                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7187               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7188                VR128X:$src1, VK1WM:$mask,
7189                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7191     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7192                (X86selects_mask VK1WM:$mask,
7193                 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7194                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7195                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7196               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7197                VR128X:$src1, VK1WM:$mask,
7198                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7199                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7201     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7202                (X86selects_mask VK1WM:$mask,
7203                 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7204                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7205                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7206               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7207                VR128X:$src1, VK1WM:$mask,
7208                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7210     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7211                (X86selects_mask VK1WM:$mask,
7212                 (MaskedOp _.FRC:$src2,
7213                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7214                           _.FRC:$src3),
7215                 (_.EltVT ZeroFP)))))),
7216               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7217                VR128X:$src1, VK1WM:$mask,
7218                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7219                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7221     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7222                (X86selects_mask VK1WM:$mask,
7223                 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7224                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7225                 (_.EltVT ZeroFP)))))),
7226               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7227                VR128X:$src1, VK1WM:$mask,
7228                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7229                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7231     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7232                (X86selects_mask VK1WM:$mask,
7233                 (MaskedOp _.FRC:$src2,
7234                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7235                           (_.ScalarLdFrag addr:$src3)),
7236                 (_.EltVT ZeroFP)))))),
7237               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7238                VR128X:$src1, VK1WM:$mask,
7239                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7241     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7242                (X86selects_mask VK1WM:$mask,
7243                 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7244                           _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7245                 (_.EltVT ZeroFP)))))),
7246               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7247                VR128X:$src1, VK1WM:$mask,
7248                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7250     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7251                (X86selects_mask VK1WM:$mask,
7252                 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7253                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7254                 (_.EltVT ZeroFP)))))),
7255               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7256                VR128X:$src1, VK1WM:$mask,
7257                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7259     // Patterns with rounding mode.
7260     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7261                 (RndOp _.FRC:$src2,
7262                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7263                        _.FRC:$src3, (i32 timm:$rc)))))),
7264               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7265                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7266                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7268     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7269                 (RndOp _.FRC:$src2, _.FRC:$src3,
7270                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7271                        (i32 timm:$rc)))))),
7272               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7273                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7274                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7276     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7277                (X86selects_mask VK1WM:$mask,
7278                 (RndOp _.FRC:$src2,
7279                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7280                        _.FRC:$src3, (i32 timm:$rc)),
7281                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7282               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7283                VR128X:$src1, VK1WM:$mask,
7284                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7285                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7287     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7288                (X86selects_mask VK1WM:$mask,
7289                 (RndOp _.FRC:$src2, _.FRC:$src3,
7290                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7291                        (i32 timm:$rc)),
7292                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7293               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7294                VR128X:$src1, VK1WM:$mask,
7295                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7296                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7298     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7299                (X86selects_mask VK1WM:$mask,
7300                 (RndOp _.FRC:$src2,
7301                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7302                        _.FRC:$src3, (i32 timm:$rc)),
7303                 (_.EltVT ZeroFP)))))),
7304               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7305                VR128X:$src1, VK1WM:$mask,
7306                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7307                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7309     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7310                (X86selects_mask VK1WM:$mask,
7311                 (RndOp _.FRC:$src2, _.FRC:$src3,
7312                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7313                        (i32 timm:$rc)),
7314                 (_.EltVT ZeroFP)))))),
7315               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7316                VR128X:$src1, VK1WM:$mask,
7317                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7318                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7319   }
7322 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7323                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7324 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7325                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7326 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7327                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7328 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7329                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7331 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7332                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7333 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7334                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7335 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7336                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7337 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7338                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7340 //===----------------------------------------------------------------------===//
7341 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7342 //===----------------------------------------------------------------------===//
7343 let Constraints = "$src1 = $dst" in {
7344 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7345                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7346   // NOTE: The SDNode have the multiply operands first with the add last.
7347   // This enables commuted load patterns to be autogenerated by tablegen.
7348   let ExeDomain = _.ExeDomain in {
7349   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7350           (ins _.RC:$src2, _.RC:$src3),
7351           OpcodeStr, "$src3, $src2", "$src2, $src3",
7352           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7353          AVX512FMA3Base, Sched<[sched]>;
7355   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7356           (ins _.RC:$src2, _.MemOp:$src3),
7357           OpcodeStr, "$src3, $src2", "$src2, $src3",
7358           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7359           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
7361   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7362             (ins _.RC:$src2, _.ScalarMemOp:$src3),
7363             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7364             !strconcat("$src2, ${src3}", _.BroadcastStr ),
7365             (OpNode _.RC:$src2,
7366                     (_.VT (_.BroadcastLdFrag addr:$src3)),
7367                     _.RC:$src1)>,
7368             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7369   }
7371 } // Constraints = "$src1 = $dst"
7373 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7374                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7375   let Predicates = [HasIFMA] in {
7376     defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7377                       EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7378   }
7379   let Predicates = [HasVLX, HasIFMA] in {
7380     defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7381                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7382     defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7383                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7384   }
7387 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7388                                          SchedWriteVecIMul, avx512vl_i64_info>,
7389                                          VEX_W;
7390 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7391                                          SchedWriteVecIMul, avx512vl_i64_info>,
7392                                          VEX_W;
7394 //===----------------------------------------------------------------------===//
7395 // AVX-512  Scalar convert from sign integer to float/double
7396 //===----------------------------------------------------------------------===//
7398 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7399                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
7400                     X86MemOperand x86memop, PatFrag ld_frag, string asm,
7401                     string mem, list<Register> _Uses = [MXCSR],
7402                     bit _mayRaiseFPException = 1> {
7403 let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7404     mayRaiseFPException = _mayRaiseFPException in {
7405   let hasSideEffects = 0, isCodeGenOnly = 1 in {
7406     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7407               (ins DstVT.FRC:$src1, SrcRC:$src),
7408               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7409               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7410     let mayLoad = 1 in
7411       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7412               (ins DstVT.FRC:$src1, x86memop:$src),
7413               asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7414               EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7415   } // hasSideEffects = 0
7416   def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7417                 (ins DstVT.RC:$src1, SrcRC:$src2),
7418                 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7419                 [(set DstVT.RC:$dst,
7420                       (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7421                EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7423   def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7424                 (ins DstVT.RC:$src1, x86memop:$src2),
7425                 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7426                 [(set DstVT.RC:$dst,
7427                       (OpNode (DstVT.VT DstVT.RC:$src1),
7428                                (ld_frag addr:$src2)))]>,
7429                 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7431   def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7432                   (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7433                   DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7436 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7437                                X86FoldableSchedWrite sched, RegisterClass SrcRC,
7438                                X86VectorVTInfo DstVT, string asm,
7439                                string mem> {
7440   let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7441   def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7442               (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7443               !strconcat(asm,
7444                   "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7445               [(set DstVT.RC:$dst,
7446                     (OpNode (DstVT.VT DstVT.RC:$src1),
7447                              SrcRC:$src2,
7448                              (i32 timm:$rc)))]>,
7449               EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7450   def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7451                   (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7452                   DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7455 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7456                                 X86FoldableSchedWrite sched,
7457                                 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7458                                 X86MemOperand x86memop, PatFrag ld_frag,
7459                                 string asm, string mem> {
7460   defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7461               avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7462                             ld_frag, asm, mem>, VEX_LIG;
7465 let Predicates = [HasAVX512] in {
7466 defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7467                                  WriteCvtI2SS, GR32,
7468                                  v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7469                                  XS, EVEX_CD8<32, CD8VT1>;
7470 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7471                                  WriteCvtI2SS, GR64,
7472                                  v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7473                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7474 defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7475                                  v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7476                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7477 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7478                                  WriteCvtI2SD, GR64,
7479                                  v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7480                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7482 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7483               (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7484 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7485               (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7487 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7488           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7489 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7490           (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7491 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7492           (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7493 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7494           (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7496 def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7497           (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7498 def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7499           (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7500 def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7501           (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7502 def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7503           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7505 defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7506                                   WriteCvtI2SS, GR32,
7507                                   v4f32x_info, i32mem, loadi32,
7508                                   "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7509 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7510                                   WriteCvtI2SS, GR64,
7511                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7512                                   XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7513 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7514                                   i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7515                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7516 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7517                                   WriteCvtI2SD, GR64,
7518                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7519                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7521 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7522               (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7523 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7524               (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7526 def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7527           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7528 def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7529           (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7530 def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7531           (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7532 def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7533           (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7535 def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7536           (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7537 def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7538           (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7539 def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7540           (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7541 def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7542           (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7545 //===----------------------------------------------------------------------===//
7546 // AVX-512  Scalar convert from float/double to integer
7547 //===----------------------------------------------------------------------===//
7549 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7550                                   X86VectorVTInfo DstVT, SDNode OpNode,
7551                                   SDNode OpNodeRnd,
7552                                   X86FoldableSchedWrite sched, string asm,
7553                                   string aliasStr, Predicate prd = HasAVX512> {
7554   let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7555     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7556                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7557                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7558                 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7559     let Uses = [MXCSR] in
7560     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7561                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7562                  [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7563                  EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7564                  Sched<[sched]>;
7565     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7566                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7567                 [(set DstVT.RC:$dst, (OpNode
7568                       (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7569                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7570   } // Predicates = [prd]
7572   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7573           (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7574   def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7575           (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7576   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7577           (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7578                                           SrcVT.IntScalarMemOp:$src), 0, "att">;
7581 // Convert float/double to signed/unsigned int 32/64
7582 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7583                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7584                                    XS, EVEX_CD8<32, CD8VT1>;
7585 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7586                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7587                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7588 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7589                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7590                                    XS, EVEX_CD8<32, CD8VT1>;
7591 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7592                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7593                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7594 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7595                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7596                                    XD, EVEX_CD8<64, CD8VT1>;
7597 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7598                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7599                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7600 defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7601                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7602                                    XD, EVEX_CD8<64, CD8VT1>;
7603 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7604                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7605                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7607 multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7608                         X86VectorVTInfo DstVT, SDNode OpNode,
7609                         X86FoldableSchedWrite sched,
7610                         string aliasStr> {
7611   let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7612     let isCodeGenOnly = 1 in {
7613     def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7614                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7615                 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7616                 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7617     def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7618                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7619                 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7620                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7621     }
7622   } // Predicates = [HasAVX512]
7625 defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7626                        lrint, WriteCvtSS2I,
7627                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7628 defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7629                        llrint, WriteCvtSS2I,
7630                        "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7631 defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7632                        lrint, WriteCvtSD2I,
7633                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7634 defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7635                        llrint, WriteCvtSD2I,
7636                        "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7638 let Predicates = [HasAVX512] in {
7639   def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7640   def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7642   def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7643   def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7646 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7647 // which produce unnecessary vmovs{s,d} instructions
7648 let Predicates = [HasAVX512] in {
7649 def : Pat<(v4f32 (X86Movss
7650                    (v4f32 VR128X:$dst),
7651                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7652           (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7654 def : Pat<(v4f32 (X86Movss
7655                    (v4f32 VR128X:$dst),
7656                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7657           (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7659 def : Pat<(v4f32 (X86Movss
7660                    (v4f32 VR128X:$dst),
7661                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7662           (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7664 def : Pat<(v4f32 (X86Movss
7665                    (v4f32 VR128X:$dst),
7666                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7667           (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7669 def : Pat<(v2f64 (X86Movsd
7670                    (v2f64 VR128X:$dst),
7671                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7672           (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7674 def : Pat<(v2f64 (X86Movsd
7675                    (v2f64 VR128X:$dst),
7676                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7677           (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7679 def : Pat<(v2f64 (X86Movsd
7680                    (v2f64 VR128X:$dst),
7681                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7682           (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7684 def : Pat<(v2f64 (X86Movsd
7685                    (v2f64 VR128X:$dst),
7686                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7687           (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7689 def : Pat<(v4f32 (X86Movss
7690                    (v4f32 VR128X:$dst),
7691                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7692           (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7694 def : Pat<(v4f32 (X86Movss
7695                    (v4f32 VR128X:$dst),
7696                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7697           (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7699 def : Pat<(v4f32 (X86Movss
7700                    (v4f32 VR128X:$dst),
7701                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7702           (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7704 def : Pat<(v4f32 (X86Movss
7705                    (v4f32 VR128X:$dst),
7706                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7707           (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7709 def : Pat<(v2f64 (X86Movsd
7710                    (v2f64 VR128X:$dst),
7711                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7712           (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7714 def : Pat<(v2f64 (X86Movsd
7715                    (v2f64 VR128X:$dst),
7716                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7717           (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7719 def : Pat<(v2f64 (X86Movsd
7720                    (v2f64 VR128X:$dst),
7721                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7722           (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7724 def : Pat<(v2f64 (X86Movsd
7725                    (v2f64 VR128X:$dst),
7726                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7727           (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7728 } // Predicates = [HasAVX512]
7730 // Convert float/double to signed/unsigned int 32/64 with truncation
7731 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7732                             X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7733                             SDNode OpNodeInt, SDNode OpNodeSAE,
7734                             X86FoldableSchedWrite sched, string aliasStr,
7735                             Predicate prd = HasAVX512> {
7736 let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7737   let isCodeGenOnly = 1 in {
7738   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7739               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7740               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7741               EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7742   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7743               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7744               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7745               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7746   }
7748   def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7749             !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7750            [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7751            EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7752   let Uses = [MXCSR] in
7753   def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7754             !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7755             [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7756                                   EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7757   def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7758               (ins _SrcRC.IntScalarMemOp:$src),
7759               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7760               [(set _DstRC.RC:$dst,
7761                 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7762               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7763 } // Predicates = [prd]
7765   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7766           (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7767   def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7768           (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7769   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7770           (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7771                                           _SrcRC.IntScalarMemOp:$src), 0, "att">;
7774 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7775                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7776                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7777 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7778                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7779                         "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7780 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7781                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7782                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7783 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7784                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7785                         "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7787 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7788                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7789                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7790 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7791                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7792                         "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7793 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7794                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7795                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7796 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7797                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7798                         "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7800 //===----------------------------------------------------------------------===//
7801 // AVX-512  Convert form float to double and back
7802 //===----------------------------------------------------------------------===//
7804 let Uses = [MXCSR], mayRaiseFPException = 1 in
7805 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7806                                 X86VectorVTInfo _Src, SDNode OpNode,
7807                                 X86FoldableSchedWrite sched> {
7808   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7809                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7810                          "$src2, $src1", "$src1, $src2",
7811                          (_.VT (OpNode (_.VT _.RC:$src1),
7812                                        (_Src.VT _Src.RC:$src2)))>,
7813                          EVEX_4V, VEX_LIG, Sched<[sched]>;
7814   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7815                          (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7816                          "$src2, $src1", "$src1, $src2",
7817                          (_.VT (OpNode (_.VT _.RC:$src1),
7818                                   (_Src.ScalarIntMemFrags addr:$src2)))>,
7819                          EVEX_4V, VEX_LIG,
7820                          Sched<[sched.Folded, sched.ReadAfterFold]>;
7822   let isCodeGenOnly = 1, hasSideEffects = 0 in {
7823     def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7824                (ins _.FRC:$src1, _Src.FRC:$src2),
7825                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7826                EVEX_4V, VEX_LIG, Sched<[sched]>;
7827     let mayLoad = 1 in
7828     def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7829                (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7830                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7831                EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7832   }
7835 // Scalar Conversion with SAE - suppress all exceptions
7836 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7837                                     X86VectorVTInfo _Src, SDNode OpNodeSAE,
7838                                     X86FoldableSchedWrite sched> {
7839   let Uses = [MXCSR] in
7840   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7841                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7842                         "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7843                         (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7844                                          (_Src.VT _Src.RC:$src2)))>,
7845                         EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7848 // Scalar Conversion with rounding control (RC)
7849 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7850                                    X86VectorVTInfo _Src, SDNode OpNodeRnd,
7851                                    X86FoldableSchedWrite sched> {
7852   let Uses = [MXCSR] in
7853   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7854                         (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7855                         "$rc, $src2, $src1", "$src1, $src2, $rc",
7856                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7857                                          (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7858                         EVEX_4V, VEX_LIG, Sched<[sched]>,
7859                         EVEX_B, EVEX_RC;
7861 multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7862                                       SDNode OpNode, SDNode OpNodeRnd,
7863                                       X86FoldableSchedWrite sched,
7864                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
7865                                       Predicate prd = HasAVX512> {
7866   let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7867     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7868              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7869                                OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7870   }
7873 multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7874                                        SDNode OpNode, SDNode OpNodeSAE,
7875                                        X86FoldableSchedWrite sched,
7876                                        X86VectorVTInfo _src, X86VectorVTInfo _dst,
7877                                        Predicate prd = HasAVX512> {
7878   let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7879     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7880              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7881              EVEX_CD8<_src.EltSize, CD8VT1>;
7882   }
7884 defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7885                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7886                                          f32x_info>, XD, VEX_W;
7887 defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7888                                           X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7889                                           f64x_info>, XS;
7890 defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7891                                           X86froundsRnd, WriteCvtSD2SS, f64x_info,
7892                                           f16x_info, HasFP16>, T_MAP5XD, VEX_W;
7893 defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7894                                           X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7895                                           f64x_info, HasFP16>, T_MAP5XS;
7896 defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7897                                           X86froundsRnd, WriteCvtSD2SS, f32x_info,
7898                                           f16x_info, HasFP16>, T_MAP5PS;
7899 defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7900                                           X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7901                                           f32x_info, HasFP16>, T_MAP6PS;
7903 def : Pat<(f64 (any_fpextend FR32X:$src)),
7904           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7905           Requires<[HasAVX512]>;
7906 def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7907           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7908           Requires<[HasAVX512, OptForSize]>;
7910 def : Pat<(f32 (any_fpround FR64X:$src)),
7911           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7912            Requires<[HasAVX512]>;
7914 def : Pat<(f32 (any_fpextend FR16X:$src)),
7915           (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7916           Requires<[HasFP16]>;
7917 def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
7918           (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
7919           Requires<[HasFP16, OptForSize]>;
7921 def : Pat<(f64 (any_fpextend FR16X:$src)),
7922           (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
7923           Requires<[HasFP16]>;
7924 def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
7925           (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7926           Requires<[HasFP16, OptForSize]>;
7928 def : Pat<(f16 (any_fpround FR32X:$src)),
7929           (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
7930            Requires<[HasFP16]>;
7931 def : Pat<(f16 (any_fpround FR64X:$src)),
7932           (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
7933            Requires<[HasFP16]>;
7935 def : Pat<(v4f32 (X86Movss
7936                    (v4f32 VR128X:$dst),
7937                    (v4f32 (scalar_to_vector
7938                      (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7939           (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7940           Requires<[HasAVX512]>;
7942 def : Pat<(v2f64 (X86Movsd
7943                    (v2f64 VR128X:$dst),
7944                    (v2f64 (scalar_to_vector
7945                      (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7946           (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7947           Requires<[HasAVX512]>;
7949 //===----------------------------------------------------------------------===//
7950 // AVX-512  Vector convert from signed/unsigned integer to float/double
7951 //          and from float/double to signed/unsigned integer
7952 //===----------------------------------------------------------------------===//
7954 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7955                           X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7956                           X86FoldableSchedWrite sched,
7957                           string Broadcast = _.BroadcastStr,
7958                           string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7959                           RegisterClass MaskRC = _.KRCWM,
7960                           dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7961                           dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7962 let Uses = [MXCSR], mayRaiseFPException = 1 in {
7963   defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7964                          (ins _Src.RC:$src),
7965                          (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7966                          (ins MaskRC:$mask, _Src.RC:$src),
7967                           OpcodeStr, "$src", "$src",
7968                          (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7969                          (vselect_mask MaskRC:$mask,
7970                                        (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7971                                        _.RC:$src0),
7972                          (vselect_mask MaskRC:$mask,
7973                                        (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7974                                        _.ImmAllZerosV)>,
7975                          EVEX, Sched<[sched]>;
7977   defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7978                          (ins MemOp:$src),
7979                          (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7980                          (ins MaskRC:$mask, MemOp:$src),
7981                          OpcodeStr#Alias, "$src", "$src",
7982                          LdDAG,
7983                          (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7984                          (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7985                          EVEX, Sched<[sched.Folded]>;
7987   defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7988                          (ins _Src.ScalarMemOp:$src),
7989                          (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7990                          (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7991                          OpcodeStr,
7992                          "${src}"#Broadcast, "${src}"#Broadcast,
7993                          (_.VT (OpNode (_Src.VT
7994                                   (_Src.BroadcastLdFrag addr:$src))
7995                             )),
7996                          (vselect_mask MaskRC:$mask,
7997                                        (_.VT
7998                                         (MaskOpNode
7999                                          (_Src.VT
8000                                           (_Src.BroadcastLdFrag addr:$src)))),
8001                                        _.RC:$src0),
8002                          (vselect_mask MaskRC:$mask,
8003                                        (_.VT
8004                                         (MaskOpNode
8005                                          (_Src.VT
8006                                           (_Src.BroadcastLdFrag addr:$src)))),
8007                                        _.ImmAllZerosV)>,
8008                          EVEX, EVEX_B, Sched<[sched.Folded]>;
8009   }
8011 // Conversion with SAE - suppress all exceptions
8012 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8013                               X86VectorVTInfo _Src, SDNode OpNodeSAE,
8014                               X86FoldableSchedWrite sched> {
8015   let Uses = [MXCSR] in
8016   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8017                         (ins _Src.RC:$src), OpcodeStr,
8018                         "{sae}, $src", "$src, {sae}",
8019                         (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
8020                         EVEX, EVEX_B, Sched<[sched]>;
8023 // Conversion with rounding control (RC)
8024 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8025                          X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
8026                          X86FoldableSchedWrite sched> {
8027   let Uses = [MXCSR] in
8028   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8029                         (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
8030                         "$rc, $src", "$src, $rc",
8031                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
8032                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8035 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
8036 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8037                                 X86VectorVTInfo _Src, SDPatternOperator OpNode,
8038                                 SDNode MaskOpNode,
8039                                 X86FoldableSchedWrite sched,
8040                                 string Broadcast = _.BroadcastStr,
8041                                 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8042                                 RegisterClass MaskRC = _.KRCWM>
8043   : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
8044                    Alias, MemOp, MaskRC,
8045                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
8046                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
8048 // Extend [Float to Double, Half to Float]
8049 multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
8050                              AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8051                              X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
8052   let Predicates = [prd] in {
8053     defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
8054                             any_fpextend, fpextend, sched.ZMM>,
8055              avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
8056                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
8057   }
8058   let Predicates = [prd, HasVLX] in {
8059     defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
8060                                X86any_vfpext, X86vfpext, sched.XMM,
8061                                _dst.info128.BroadcastStr,
8062                                "", f64mem>, EVEX_V128;
8063     defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
8064                                any_fpextend, fpextend, sched.YMM>, EVEX_V256;
8065   }
8068 // Truncate [Double to Float, Float to Half]
8069 multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
8070                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8071                             X86SchedWriteWidths sched, Predicate prd = HasAVX512,
8072                             PatFrag bcast128 = _src.info128.BroadcastLdFrag,
8073                             PatFrag bcast256 = _src.info256.BroadcastLdFrag,
8074                             PatFrag bcast512 = _src.info512.BroadcastLdFrag,
8075                             PatFrag loadVT128 = _src.info128.LdFrag,
8076                             PatFrag loadVT256 = _src.info256.LdFrag,
8077                             PatFrag loadVT512 = _src.info512.LdFrag,
8078                             RegisterClass maskRC128 = _src.info128.KRCWM,
8079                             RegisterClass maskRC256 = _src.info256.KRCWM,
8080                             RegisterClass maskRC512 = _src.info512.KRCWM> {
8081   let Predicates = [prd] in {
8082     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
8083                             X86any_vfpround, X86vfpround, sched.ZMM>,
8084              avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8085                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
8086   }
8087   let Predicates = [prd, HasVLX] in {
8088     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
8089                                null_frag, null_frag, sched.XMM,
8090                                _src.info128.BroadcastStr, "{x}",
8091                                f128mem, maskRC128>, EVEX_V128;
8092     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
8093                                X86any_vfpround, X86vfpround,
8094                                sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
8096     // Special patterns to allow use of X86vmfpround for masking. Instruction
8097     // patterns have been disabled with null_frag.
8098     def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
8099               (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8100     def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8101                             maskRC128:$mask),
8102               (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
8103     def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8104                             maskRC128:$mask),
8105               (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
8107     def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
8108               (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8109     def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
8110                             maskRC128:$mask),
8111               (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8112     def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
8113                             maskRC128:$mask),
8114               (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
8116     def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
8117               (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8118     def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8119                             (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
8120               (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8121     def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8122                             _dst.info128.ImmAllZerosV, maskRC128:$mask),
8123               (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
8124   }
8126   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8127                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8128   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8129                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8130                   VK2WM:$mask, VR128X:$src), 0, "att">;
8131   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
8132                   "$dst {${mask}} {z}, $src}",
8133                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8134                   VK2WM:$mask, VR128X:$src), 0, "att">;
8135   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8136                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8137   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8138                   "$dst {${mask}}, ${src}{1to2}}",
8139                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8140                   VK2WM:$mask, f64mem:$src), 0, "att">;
8141   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8142                   "$dst {${mask}} {z}, ${src}{1to2}}",
8143                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8144                   VK2WM:$mask, f64mem:$src), 0, "att">;
8146   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8147                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8148   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8149                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8150                   VK4WM:$mask, VR256X:$src), 0, "att">;
8151   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8152                   "$dst {${mask}} {z}, $src}",
8153                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8154                   VK4WM:$mask, VR256X:$src), 0, "att">;
8155   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8156                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8157   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8158                   "$dst {${mask}}, ${src}{1to4}}",
8159                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8160                   VK4WM:$mask, f64mem:$src), 0, "att">;
8161   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8162                   "$dst {${mask}} {z}, ${src}{1to4}}",
8163                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8164                   VK4WM:$mask, f64mem:$src), 0, "att">;
8167 defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
8168                                   avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
8169                                   VEX_W, PD, EVEX_CD8<64, CD8VF>;
8170 defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
8171                                    avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
8172                                    PS, EVEX_CD8<32, CD8VH>;
8174 // Extend Half to Double
8175 multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
8176                             X86SchedWriteWidths sched> {
8177   let Predicates = [HasFP16] in {
8178     defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
8179                                   any_fpextend, fpextend, sched.ZMM>,
8180              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
8181                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
8182     def : Pat<(v8f64 (extloadv8f16 addr:$src)),
8183                 (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
8184   }
8185   let Predicates = [HasFP16, HasVLX] in {
8186     defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8187                                      X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8188                                      f32mem>, EVEX_V128;
8189     defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8190                                      X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8191                                      f64mem>, EVEX_V256;
8192   }
8195 // Truncate Double to Half
8196 multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8197   let Predicates = [HasFP16] in {
8198     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8199                             X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8200              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8201                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
8202   }
8203   let Predicates = [HasFP16, HasVLX] in {
8204     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8205                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8206                                VK2WM>, EVEX_V128;
8207     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8208                                null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8209                                VK4WM>, EVEX_V256;
8210   }
8211   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8212                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8213                   VR128X:$src), 0, "att">;
8214   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8215                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8216                   VK2WM:$mask, VR128X:$src), 0, "att">;
8217   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8218                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8219                   VK2WM:$mask, VR128X:$src), 0, "att">;
8220   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8221                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8222                   i64mem:$src), 0, "att">;
8223   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8224                   "$dst {${mask}}, ${src}{1to2}}",
8225                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8226                   VK2WM:$mask, i64mem:$src), 0, "att">;
8227   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8228                   "$dst {${mask}} {z}, ${src}{1to2}}",
8229                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8230                   VK2WM:$mask, i64mem:$src), 0, "att">;
8232   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8233                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8234                   VR256X:$src), 0, "att">;
8235   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8236                   "$dst {${mask}}, $src}",
8237                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8238                   VK4WM:$mask, VR256X:$src), 0, "att">;
8239   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8240                   "$dst {${mask}} {z}, $src}",
8241                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8242                   VK4WM:$mask, VR256X:$src), 0, "att">;
8243   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8244                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8245                   i64mem:$src), 0, "att">;
8246   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8247                   "$dst {${mask}}, ${src}{1to4}}",
8248                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8249                   VK4WM:$mask, i64mem:$src), 0, "att">;
8250   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8251                   "$dst {${mask}} {z}, ${src}{1to4}}",
8252                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8253                   VK4WM:$mask, i64mem:$src), 0, "att">;
8255   def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8256                   (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8257                   VR512:$src), 0, "att">;
8258   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8259                   "$dst {${mask}}, $src}",
8260                   (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8261                   VK8WM:$mask, VR512:$src), 0, "att">;
8262   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8263                   "$dst {${mask}} {z}, $src}",
8264                   (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8265                   VK8WM:$mask, VR512:$src), 0, "att">;
8266   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8267                   (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8268                   i64mem:$src), 0, "att">;
8269   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8270                   "$dst {${mask}}, ${src}{1to8}}",
8271                   (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8272                   VK8WM:$mask, i64mem:$src), 0, "att">;
8273   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8274                   "$dst {${mask}} {z}, ${src}{1to8}}",
8275                   (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8276                   VK8WM:$mask, i64mem:$src), 0, "att">;
8279 defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8280                                    avx512vl_f32_info, SchedWriteCvtPD2PS,
8281                                    HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
8282 defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8283                                     avx512vl_f16_info, SchedWriteCvtPS2PD,
8284                                     HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
8285 defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8286                                  VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
8287 defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8288                                  T_MAP5PS, EVEX_CD8<16, CD8VQ>;
8290 let Predicates = [HasFP16, HasVLX] in {
8291   // Special patterns to allow use of X86vmfpround for masking. Instruction
8292   // patterns have been disabled with null_frag.
8293   def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8294             (VCVTPD2PHZ256rr VR256X:$src)>;
8295   def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8296                           VK4WM:$mask)),
8297             (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8298   def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8299                           VK4WM:$mask),
8300             (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8302   def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8303             (VCVTPD2PHZ256rm addr:$src)>;
8304   def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8305                           VK4WM:$mask),
8306             (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8307   def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8308                           VK4WM:$mask),
8309             (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8311   def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8312             (VCVTPD2PHZ256rmb addr:$src)>;
8313   def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8314                           (v8f16 VR128X:$src0), VK4WM:$mask),
8315             (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8316   def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8317                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8318             (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8320   def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8321             (VCVTPD2PHZ128rr VR128X:$src)>;
8322   def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8323                           VK2WM:$mask),
8324             (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8325   def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8326                           VK2WM:$mask),
8327             (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8329   def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8330             (VCVTPD2PHZ128rm addr:$src)>;
8331   def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8332                           VK2WM:$mask),
8333             (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8334   def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8335                           VK2WM:$mask),
8336             (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8338   def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8339             (VCVTPD2PHZ128rmb addr:$src)>;
8340   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8341                           (v8f16 VR128X:$src0), VK2WM:$mask),
8342             (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8343   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8344                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8345             (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8348 // Convert Signed/Unsigned Doubleword to Double
8349 let Uses = []<Register>, mayRaiseFPException = 0 in
8350 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8351                            SDNode MaskOpNode, SDPatternOperator OpNode128,
8352                            SDNode MaskOpNode128,
8353                            X86SchedWriteWidths sched> {
8354   // No rounding in this op
8355   let Predicates = [HasAVX512] in
8356     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8357                             MaskOpNode, sched.ZMM>, EVEX_V512;
8359   let Predicates = [HasVLX] in {
8360     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8361                                OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8362                                "", i64mem, VK2WM,
8363                                (v2f64 (OpNode128 (bc_v4i32
8364                                 (v2i64
8365                                  (scalar_to_vector (loadi64 addr:$src)))))),
8366                                (v2f64 (MaskOpNode128 (bc_v4i32
8367                                 (v2i64
8368                                  (scalar_to_vector (loadi64 addr:$src))))))>,
8369                                EVEX_V128;
8370     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8371                                MaskOpNode, sched.YMM>, EVEX_V256;
8372   }
8375 // Convert Signed/Unsigned Doubleword to Float
8376 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8377                            SDNode MaskOpNode, SDNode OpNodeRnd,
8378                            X86SchedWriteWidths sched> {
8379   let Predicates = [HasAVX512] in
8380     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8381                             MaskOpNode, sched.ZMM>,
8382              avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8383                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8385   let Predicates = [HasVLX] in {
8386     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8387                                MaskOpNode, sched.XMM>, EVEX_V128;
8388     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8389                                MaskOpNode, sched.YMM>, EVEX_V256;
8390   }
8393 // Convert Float to Signed/Unsigned Doubleword with truncation
8394 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8395                             SDNode MaskOpNode,
8396                             SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8397   let Predicates = [HasAVX512] in {
8398     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8399                             MaskOpNode, sched.ZMM>,
8400              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8401                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
8402   }
8403   let Predicates = [HasVLX] in {
8404     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8405                                MaskOpNode, sched.XMM>, EVEX_V128;
8406     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8407                                MaskOpNode, sched.YMM>, EVEX_V256;
8408   }
8411 // Convert Float to Signed/Unsigned Doubleword
8412 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8413                            SDNode MaskOpNode, SDNode OpNodeRnd,
8414                            X86SchedWriteWidths sched> {
8415   let Predicates = [HasAVX512] in {
8416     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8417                             MaskOpNode, sched.ZMM>,
8418              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8419                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8420   }
8421   let Predicates = [HasVLX] in {
8422     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8423                                MaskOpNode, sched.XMM>, EVEX_V128;
8424     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8425                                MaskOpNode, sched.YMM>, EVEX_V256;
8426   }
8429 // Convert Double to Signed/Unsigned Doubleword with truncation
8430 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8431                             SDNode MaskOpNode, SDNode OpNodeSAE,
8432                             X86SchedWriteWidths sched> {
8433   let Predicates = [HasAVX512] in {
8434     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8435                             MaskOpNode, sched.ZMM>,
8436              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8437                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
8438   }
8439   let Predicates = [HasVLX] in {
8440     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8441     // memory forms of these instructions in Asm Parser. They have the same
8442     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8443     // due to the same reason.
8444     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8445                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8446                                VK2WM>, EVEX_V128;
8447     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8448                                MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8449   }
8451   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8452                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8453                   VR128X:$src), 0, "att">;
8454   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8455                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8456                   VK2WM:$mask, VR128X:$src), 0, "att">;
8457   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8458                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8459                   VK2WM:$mask, VR128X:$src), 0, "att">;
8460   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8461                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8462                   f64mem:$src), 0, "att">;
8463   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8464                   "$dst {${mask}}, ${src}{1to2}}",
8465                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8466                   VK2WM:$mask, f64mem:$src), 0, "att">;
8467   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8468                   "$dst {${mask}} {z}, ${src}{1to2}}",
8469                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8470                   VK2WM:$mask, f64mem:$src), 0, "att">;
8472   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8473                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8474                   VR256X:$src), 0, "att">;
8475   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8476                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8477                   VK4WM:$mask, VR256X:$src), 0, "att">;
8478   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8479                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8480                   VK4WM:$mask, VR256X:$src), 0, "att">;
8481   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8482                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8483                   f64mem:$src), 0, "att">;
8484   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8485                   "$dst {${mask}}, ${src}{1to4}}",
8486                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8487                   VK4WM:$mask, f64mem:$src), 0, "att">;
8488   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8489                   "$dst {${mask}} {z}, ${src}{1to4}}",
8490                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8491                   VK4WM:$mask, f64mem:$src), 0, "att">;
8494 // Convert Double to Signed/Unsigned Doubleword
8495 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8496                            SDNode MaskOpNode, SDNode OpNodeRnd,
8497                            X86SchedWriteWidths sched> {
8498   let Predicates = [HasAVX512] in {
8499     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8500                             MaskOpNode, sched.ZMM>,
8501              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8502                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8503   }
8504   let Predicates = [HasVLX] in {
8505     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8506     // memory forms of these instructions in Asm Parcer. They have the same
8507     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8508     // due to the same reason.
8509     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8510                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8511                                VK2WM>, EVEX_V128;
8512     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8513                                MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8514   }
8516   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8517                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8518   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8519                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8520                   VK2WM:$mask, VR128X:$src), 0, "att">;
8521   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8522                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8523                   VK2WM:$mask, VR128X:$src), 0, "att">;
8524   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8525                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8526                   f64mem:$src), 0, "att">;
8527   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8528                   "$dst {${mask}}, ${src}{1to2}}",
8529                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8530                   VK2WM:$mask, f64mem:$src), 0, "att">;
8531   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8532                   "$dst {${mask}} {z}, ${src}{1to2}}",
8533                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8534                   VK2WM:$mask, f64mem:$src), 0, "att">;
8536   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8537                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8538   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8539                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8540                   VK4WM:$mask, VR256X:$src), 0, "att">;
8541   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8542                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8543                   VK4WM:$mask, VR256X:$src), 0, "att">;
8544   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8545                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8546                   f64mem:$src), 0, "att">;
8547   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8548                   "$dst {${mask}}, ${src}{1to4}}",
8549                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8550                   VK4WM:$mask, f64mem:$src), 0, "att">;
8551   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8552                   "$dst {${mask}} {z}, ${src}{1to4}}",
8553                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8554                   VK4WM:$mask, f64mem:$src), 0, "att">;
8557 // Convert Double to Signed/Unsigned Quardword
8558 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8559                            SDNode MaskOpNode, SDNode OpNodeRnd,
8560                            X86SchedWriteWidths sched> {
8561   let Predicates = [HasDQI] in {
8562     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8563                             MaskOpNode, sched.ZMM>,
8564              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8565                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8566   }
8567   let Predicates = [HasDQI, HasVLX] in {
8568     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8569                                MaskOpNode, sched.XMM>, EVEX_V128;
8570     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8571                                MaskOpNode, sched.YMM>, EVEX_V256;
8572   }
8575 // Convert Double to Signed/Unsigned Quardword with truncation
8576 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8577                             SDNode MaskOpNode, SDNode OpNodeRnd,
8578                             X86SchedWriteWidths sched> {
8579   let Predicates = [HasDQI] in {
8580     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8581                             MaskOpNode, sched.ZMM>,
8582              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8583                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8584   }
8585   let Predicates = [HasDQI, HasVLX] in {
8586     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8587                                MaskOpNode, sched.XMM>, EVEX_V128;
8588     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8589                                MaskOpNode, sched.YMM>, EVEX_V256;
8590   }
8593 // Convert Signed/Unsigned Quardword to Double
8594 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8595                            SDNode MaskOpNode, SDNode OpNodeRnd,
8596                            X86SchedWriteWidths sched> {
8597   let Predicates = [HasDQI] in {
8598     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8599                             MaskOpNode, sched.ZMM>,
8600              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8601                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8602   }
8603   let Predicates = [HasDQI, HasVLX] in {
8604     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8605                                MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8606     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8607                                MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8608   }
8611 // Convert Float to Signed/Unsigned Quardword
8612 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8613                            SDNode MaskOpNode, SDNode OpNodeRnd,
8614                            X86SchedWriteWidths sched> {
8615   let Predicates = [HasDQI] in {
8616     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8617                             MaskOpNode, sched.ZMM>,
8618              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8619                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8620   }
8621   let Predicates = [HasDQI, HasVLX] in {
8622     // Explicitly specified broadcast string, since we take only 2 elements
8623     // from v4f32x_info source
8624     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8625                                MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8626                                (v2i64 (OpNode (bc_v4f32
8627                                 (v2f64
8628                                  (scalar_to_vector (loadf64 addr:$src)))))),
8629                                (v2i64 (MaskOpNode (bc_v4f32
8630                                 (v2f64
8631                                  (scalar_to_vector (loadf64 addr:$src))))))>,
8632                                EVEX_V128;
8633     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8634                                MaskOpNode, sched.YMM>, EVEX_V256;
8635   }
8638 // Convert Float to Signed/Unsigned Quardword with truncation
8639 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8640                             SDNode MaskOpNode, SDNode OpNodeRnd,
8641                             X86SchedWriteWidths sched> {
8642   let Predicates = [HasDQI] in {
8643     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8644                             MaskOpNode, sched.ZMM>,
8645              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8646                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8647   }
8648   let Predicates = [HasDQI, HasVLX] in {
8649     // Explicitly specified broadcast string, since we take only 2 elements
8650     // from v4f32x_info source
8651     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8652                                MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8653                                (v2i64 (OpNode (bc_v4f32
8654                                 (v2f64
8655                                  (scalar_to_vector (loadf64 addr:$src)))))),
8656                                (v2i64 (MaskOpNode (bc_v4f32
8657                                 (v2f64
8658                                  (scalar_to_vector (loadf64 addr:$src))))))>,
8659                                EVEX_V128;
8660     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8661                                MaskOpNode, sched.YMM>, EVEX_V256;
8662   }
8665 // Convert Signed/Unsigned Quardword to Float
8666 // Also Convert Signed/Unsigned Doubleword to Half
8667 multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8668                                  SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8669                                  SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8670                                  AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8671                                  X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8672   let Predicates = [prd] in {
8673     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8674                             MaskOpNode, sched.ZMM>,
8675              avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8676                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8677   }
8678   let Predicates = [prd, HasVLX] in {
8679     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8680     // memory forms of these instructions in Asm Parcer. They have the same
8681     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8682     // due to the same reason.
8683     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8684                                null_frag, sched.XMM, _src.info128.BroadcastStr,
8685                                "{x}", i128mem, _src.info128.KRCWM>,
8686                                EVEX_V128, NotEVEX2VEXConvertible;
8687     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8688                                MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8689                                "{y}">, EVEX_V256,
8690                                NotEVEX2VEXConvertible;
8692     // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8693     // patterns have been disabled with null_frag.
8694     def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8695               (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8696     def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8697                              _src.info128.KRCWM:$mask),
8698               (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8699     def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8700                              _src.info128.KRCWM:$mask),
8701               (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8703     def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8704               (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8705     def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8706                              _src.info128.KRCWM:$mask),
8707               (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8708     def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8709                              _src.info128.KRCWM:$mask),
8710               (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8712     def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8713               (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8714     def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8715                              (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8716               (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8717     def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8718                              _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8719               (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8720   }
8722   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8723                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8724                   VR128X:$src), 0, "att">;
8725   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8726                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8727                   VK2WM:$mask, VR128X:$src), 0, "att">;
8728   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8729                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8730                   VK2WM:$mask, VR128X:$src), 0, "att">;
8731   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8732                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8733                   i64mem:$src), 0, "att">;
8734   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8735                   "$dst {${mask}}, ${src}{1to2}}",
8736                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8737                   VK2WM:$mask, i64mem:$src), 0, "att">;
8738   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8739                   "$dst {${mask}} {z}, ${src}{1to2}}",
8740                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8741                   VK2WM:$mask, i64mem:$src), 0, "att">;
8743   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8744                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8745                   VR256X:$src), 0, "att">;
8746   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8747                   "$dst {${mask}}, $src}",
8748                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8749                   VK4WM:$mask, VR256X:$src), 0, "att">;
8750   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8751                   "$dst {${mask}} {z}, $src}",
8752                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8753                   VK4WM:$mask, VR256X:$src), 0, "att">;
8754   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8755                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8756                   i64mem:$src), 0, "att">;
8757   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8758                   "$dst {${mask}}, ${src}{1to4}}",
8759                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8760                   VK4WM:$mask, i64mem:$src), 0, "att">;
8761   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8762                   "$dst {${mask}} {z}, ${src}{1to4}}",
8763                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8764                   VK4WM:$mask, i64mem:$src), 0, "att">;
8767 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8768                                  X86any_VSintToFP, X86VSintToFP,
8769                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8771 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8772                                 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8773                                 PS, EVEX_CD8<32, CD8VF>;
8775 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8776                                  X86cvttp2si, X86cvttp2siSAE,
8777                                  SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8779 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8780                                  X86cvttp2si, X86cvttp2siSAE,
8781                                  SchedWriteCvtPD2DQ>,
8782                                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
8784 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8785                                  X86cvttp2ui, X86cvttp2uiSAE,
8786                                  SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8788 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8789                                  X86cvttp2ui, X86cvttp2uiSAE,
8790                                  SchedWriteCvtPD2DQ>,
8791                                  PS, VEX_W, EVEX_CD8<64, CD8VF>;
8793 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8794                                   uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8795                                   SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8797 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8798                                  uint_to_fp, X86VUintToFpRnd,
8799                                  SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8801 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8802                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8803                                  EVEX_CD8<32, CD8VF>;
8805 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8806                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8807                                  VEX_W, EVEX_CD8<64, CD8VF>;
8809 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8810                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8811                                  PS, EVEX_CD8<32, CD8VF>;
8813 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8814                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8815                                  PS, EVEX_CD8<64, CD8VF>;
8817 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8818                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8819                                  PD, EVEX_CD8<64, CD8VF>;
8821 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8822                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8823                                  EVEX_CD8<32, CD8VH>;
8825 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8826                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8827                                  PD, EVEX_CD8<64, CD8VF>;
8829 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8830                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8831                                  EVEX_CD8<32, CD8VH>;
8833 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8834                                  X86cvttp2si, X86cvttp2siSAE,
8835                                  SchedWriteCvtPD2DQ>, VEX_W,
8836                                  PD, EVEX_CD8<64, CD8VF>;
8838 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8839                                  X86cvttp2si, X86cvttp2siSAE,
8840                                  SchedWriteCvtPS2DQ>, PD,
8841                                  EVEX_CD8<32, CD8VH>;
8843 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8844                                  X86cvttp2ui, X86cvttp2uiSAE,
8845                                  SchedWriteCvtPD2DQ>, VEX_W,
8846                                  PD, EVEX_CD8<64, CD8VF>;
8848 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8849                                  X86cvttp2ui, X86cvttp2uiSAE,
8850                                  SchedWriteCvtPS2DQ>, PD,
8851                                  EVEX_CD8<32, CD8VH>;
8853 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8854                             sint_to_fp, X86VSintToFpRnd,
8855                             SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
8857 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8858                             uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8859                             VEX_W, XS, EVEX_CD8<64, CD8VF>;
8861 defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8862                             X86any_VSintToFP, X86VMSintToFP,
8863                             X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8864                             SchedWriteCvtDQ2PS, HasFP16>,
8865                             T_MAP5PS, EVEX_CD8<32, CD8VF>;
8867 defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8868                             X86any_VUintToFP, X86VMUintToFP,
8869                             X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8870                             SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
8871                             EVEX_CD8<32, CD8VF>;
8873 defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8874                             X86any_VSintToFP, X86VMSintToFP,
8875                             X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8876                             SchedWriteCvtDQ2PS>, VEX_W, PS,
8877                             EVEX_CD8<64, CD8VF>;
8879 defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8880                             X86any_VUintToFP, X86VMUintToFP,
8881                             X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8882                             SchedWriteCvtDQ2PS>, VEX_W, XD,
8883                             EVEX_CD8<64, CD8VF>;
8885 let Predicates = [HasVLX] in {
8886   // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8887   // patterns have been disabled with null_frag.
8888   def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8889             (VCVTPD2DQZ128rr VR128X:$src)>;
8890   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8891                           VK2WM:$mask),
8892             (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8893   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8894                           VK2WM:$mask),
8895             (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8897   def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8898             (VCVTPD2DQZ128rm addr:$src)>;
8899   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8900                           VK2WM:$mask),
8901             (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8902   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8903                           VK2WM:$mask),
8904             (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8906   def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8907             (VCVTPD2DQZ128rmb addr:$src)>;
8908   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8909                           (v4i32 VR128X:$src0), VK2WM:$mask),
8910             (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8911   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8912                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8913             (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8915   // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8916   // patterns have been disabled with null_frag.
8917   def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8918             (VCVTTPD2DQZ128rr VR128X:$src)>;
8919   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8920                           VK2WM:$mask),
8921             (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8922   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8923                           VK2WM:$mask),
8924             (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8926   def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8927             (VCVTTPD2DQZ128rm addr:$src)>;
8928   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8929                           VK2WM:$mask),
8930             (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8931   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8932                           VK2WM:$mask),
8933             (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8935   def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8936             (VCVTTPD2DQZ128rmb addr:$src)>;
8937   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8938                           (v4i32 VR128X:$src0), VK2WM:$mask),
8939             (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8940   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8941                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8942             (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8944   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8945   // patterns have been disabled with null_frag.
8946   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8947             (VCVTPD2UDQZ128rr VR128X:$src)>;
8948   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8949                            VK2WM:$mask),
8950             (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8951   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8952                            VK2WM:$mask),
8953             (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8955   def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8956             (VCVTPD2UDQZ128rm addr:$src)>;
8957   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8958                            VK2WM:$mask),
8959             (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8960   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8961                            VK2WM:$mask),
8962             (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8964   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8965             (VCVTPD2UDQZ128rmb addr:$src)>;
8966   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8967                            (v4i32 VR128X:$src0), VK2WM:$mask),
8968             (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8969   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8970                            v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8971             (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8973   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8974   // patterns have been disabled with null_frag.
8975   def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8976             (VCVTTPD2UDQZ128rr VR128X:$src)>;
8977   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8978                           VK2WM:$mask),
8979             (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8980   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8981                           VK2WM:$mask),
8982             (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8984   def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8985             (VCVTTPD2UDQZ128rm addr:$src)>;
8986   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8987                           VK2WM:$mask),
8988             (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8989   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8990                           VK2WM:$mask),
8991             (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8993   def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8994             (VCVTTPD2UDQZ128rmb addr:$src)>;
8995   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8996                           (v4i32 VR128X:$src0), VK2WM:$mask),
8997             (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8998   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8999                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9000             (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
9003 let Predicates = [HasDQI, HasVLX] in {
9004   def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9005             (VCVTPS2QQZ128rm addr:$src)>;
9006   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9007                                  (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9008                                  VR128X:$src0)),
9009             (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9010   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9011                                  (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9012                                  v2i64x_info.ImmAllZerosV)),
9013             (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9015   def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9016             (VCVTPS2UQQZ128rm addr:$src)>;
9017   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9018                                  (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9019                                  VR128X:$src0)),
9020             (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9021   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9022                                  (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9023                                  v2i64x_info.ImmAllZerosV)),
9024             (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9026   def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9027             (VCVTTPS2QQZ128rm addr:$src)>;
9028   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9029                                  (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9030                                  VR128X:$src0)),
9031             (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9032   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9033                                  (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9034                                  v2i64x_info.ImmAllZerosV)),
9035             (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9037   def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9038             (VCVTTPS2UQQZ128rm addr:$src)>;
9039   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9040                                  (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9041                                  VR128X:$src0)),
9042             (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9043   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9044                                  (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9045                                  v2i64x_info.ImmAllZerosV)),
9046             (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9049 let Predicates = [HasVLX] in {
9050   def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9051             (VCVTDQ2PDZ128rm addr:$src)>;
9052   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9053                                  (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9054                                  VR128X:$src0)),
9055             (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9056   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9057                                  (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9058                                  v2f64x_info.ImmAllZerosV)),
9059             (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9061   def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9062             (VCVTUDQ2PDZ128rm addr:$src)>;
9063   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9064                                  (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9065                                  VR128X:$src0)),
9066             (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9067   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9068                                  (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9069                                  v2f64x_info.ImmAllZerosV)),
9070             (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9073 //===----------------------------------------------------------------------===//
9074 // Half precision conversion instructions
9075 //===----------------------------------------------------------------------===//
9077 let Uses = [MXCSR], mayRaiseFPException = 1 in
9078 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9079                            X86MemOperand x86memop, dag ld_dag,
9080                            X86FoldableSchedWrite sched> {
9081   defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
9082                             (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
9083                             (X86any_cvtph2ps (_src.VT _src.RC:$src)),
9084                             (X86cvtph2ps (_src.VT _src.RC:$src))>,
9085                             T8PD, Sched<[sched]>;
9086   defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
9087                             (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
9088                             (X86any_cvtph2ps (_src.VT ld_dag)),
9089                             (X86cvtph2ps (_src.VT ld_dag))>,
9090                             T8PD, Sched<[sched.Folded]>;
9093 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9094                                X86FoldableSchedWrite sched> {
9095   let Uses = [MXCSR] in
9096   defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
9097                              (ins _src.RC:$src), "vcvtph2ps",
9098                              "{sae}, $src", "$src, {sae}",
9099                              (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
9100                              T8PD, EVEX_B, Sched<[sched]>;
9103 let Predicates = [HasAVX512] in
9104   defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
9105                                     (load addr:$src), WriteCvtPH2PSZ>,
9106                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
9107                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9109 let Predicates = [HasVLX] in {
9110   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
9111                        (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
9112                        EVEX_CD8<32, CD8VH>;
9113   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
9114                        (bitconvert (v2i64 (X86vzload64 addr:$src))),
9115                        WriteCvtPH2PS>, EVEX, EVEX_V128,
9116                        EVEX_CD8<32, CD8VH>;
9118   // Pattern match vcvtph2ps of a scalar i64 load.
9119   def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
9120               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
9121             (VCVTPH2PSZ128rm addr:$src)>;
9124 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9125                            X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
9126 let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9127   def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9128              (ins _src.RC:$src1, i32u8imm:$src2),
9129              "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9130              [(set _dest.RC:$dst,
9131                    (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9132              Sched<[RR]>;
9133   let Constraints = "$src0 = $dst" in
9134   def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9135              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9136              "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
9137              [(set _dest.RC:$dst,
9138                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9139                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
9140              Sched<[RR]>, EVEX_K;
9141   def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9142              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9143              "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
9144              [(set _dest.RC:$dst,
9145                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9146                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9147              Sched<[RR]>, EVEX_KZ;
9148   let hasSideEffects = 0, mayStore = 1 in {
9149     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
9150                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
9151                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9152                Sched<[MR]>;
9153     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
9154                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9155                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
9156                 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
9157   }
9161 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9162                                SchedWrite Sched> {
9163   let hasSideEffects = 0, Uses = [MXCSR] in
9164   defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
9165                    (outs _dest.RC:$dst),
9166                    (ins _src.RC:$src1, i32u8imm:$src2),
9167                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
9168                    EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
9171 let Predicates = [HasAVX512] in {
9172   defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9173                                     WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9174                     avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9175                                         EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9177   def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9178             (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9181 let Predicates = [HasVLX] in {
9182   defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9183                                        WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9184                                        EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9185   defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9186                                        WriteCvtPS2PH, WriteCvtPS2PHSt>,
9187                                        EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9189   def : Pat<(store (f64 (extractelt
9190                          (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9191                          (iPTR 0))), addr:$dst),
9192             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9193   def : Pat<(store (i64 (extractelt
9194                          (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9195                          (iPTR 0))), addr:$dst),
9196             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9197   def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9198             (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9201 //  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9202 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9203                             string OpcodeStr, Domain d,
9204                             X86FoldableSchedWrite sched = WriteFComX> {
9205   let hasSideEffects = 0, Uses = [MXCSR] in
9206   def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9207                   !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9208                   EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9211 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9212   defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9213                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9214   defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9215                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9216   defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9217                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9218   defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9219                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9222 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9223   defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9224                                  "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9225                                  EVEX_CD8<32, CD8VT1>;
9226   defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9227                                   "ucomisd", SSEPackedDouble>, PD, EVEX,
9228                                   VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9229   defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9230                                  "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9231                                  EVEX_CD8<32, CD8VT1>;
9232   defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9233                                  "comisd", SSEPackedDouble>, PD, EVEX,
9234                                   VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9235   let isCodeGenOnly = 1 in {
9236     defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9237                           sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9238                           EVEX_CD8<32, CD8VT1>;
9239     defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9240                           sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
9241                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9243     defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9244                           sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9245                           EVEX_CD8<32, CD8VT1>;
9246     defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9247                           sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
9248                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9249   }
9252 let Defs = [EFLAGS], Predicates = [HasFP16] in {
9253   defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9254                                 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9255                                 EVEX_CD8<16, CD8VT1>;
9256   defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9257                                 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9258                                 EVEX_CD8<16, CD8VT1>;
9259   defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9260                                 "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
9261                                 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9262   defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9263                                 "comish", SSEPackedSingle>, T_MAP5PS, EVEX,
9264                                 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9265   let isCodeGenOnly = 1 in {
9266     defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9267                                 sse_load_f16, "ucomish", SSEPackedSingle>,
9268                                 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9270     defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9271                                 sse_load_f16, "comish", SSEPackedSingle>,
9272                                 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9273   }
9276 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9277 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9278                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
9279                          Predicate prd = HasAVX512> {
9280   let Predicates = [prd], ExeDomain = _.ExeDomain in {
9281   defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9282                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9283                            "$src2, $src1", "$src1, $src2",
9284                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9285                            EVEX_4V, VEX_LIG, Sched<[sched]>;
9286   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9287                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9288                          "$src2, $src1", "$src1, $src2",
9289                          (OpNode (_.VT _.RC:$src1),
9290                           (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
9291                           Sched<[sched.Folded, sched.ReadAfterFold]>;
9295 defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9296                                f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9297                                T_MAP6PD;
9298 defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9299                                  SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9300                                  EVEX_CD8<16, CD8VT1>, T_MAP6PD;
9301 let Uses = [MXCSR] in {
9302 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9303                                f32x_info>, EVEX_CD8<32, CD8VT1>,
9304                                T8PD;
9305 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9306                                f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
9307                                T8PD;
9308 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9309                                  SchedWriteFRsqrt.Scl, f32x_info>,
9310                                  EVEX_CD8<32, CD8VT1>, T8PD;
9311 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9312                                  SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
9313                                  EVEX_CD8<64, CD8VT1>, T8PD;
9316 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9317 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9318                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9319   let ExeDomain = _.ExeDomain in {
9320   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9321                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
9322                          (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
9323                          Sched<[sched]>;
9324   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9325                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9326                          (OpNode (_.VT
9327                            (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
9328                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9329   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9330                           (ins _.ScalarMemOp:$src), OpcodeStr,
9331                           "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9332                           (OpNode (_.VT
9333                             (_.BroadcastLdFrag addr:$src)))>,
9334                           EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9335   }
9338 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9339                                 X86SchedWriteWidths sched> {
9340   let Uses = [MXCSR] in {
9341   defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9342                              v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9343   defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9344                              v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9345   }
9346   let Predicates = [HasFP16] in
9347   defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9348                            v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9350   // Define only if AVX512VL feature is present.
9351   let Predicates = [HasVLX], Uses = [MXCSR] in {
9352     defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9353                                   OpNode, sched.XMM, v4f32x_info>,
9354                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
9355     defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9356                                   OpNode, sched.YMM, v8f32x_info>,
9357                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
9358     defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9359                                   OpNode, sched.XMM, v2f64x_info>,
9360                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
9361     defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9362                                   OpNode, sched.YMM, v4f64x_info>,
9363                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
9364   }
9365   let Predicates = [HasFP16, HasVLX] in {
9366     defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9367                                 OpNode, sched.XMM, v8f16x_info>,
9368                                 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9369     defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9370                                 OpNode, sched.YMM, v16f16x_info>,
9371                                 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9372   }
9375 defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9376 defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9378 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9379 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9380                          SDNode OpNode, SDNode OpNodeSAE,
9381                          X86FoldableSchedWrite sched> {
9382   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9383   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9384                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9385                            "$src2, $src1", "$src1, $src2",
9386                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9387                            Sched<[sched]>, SIMD_EXC;
9389   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9390                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9391                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9392                             (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9393                             EVEX_B, Sched<[sched]>;
9395   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9396                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9397                          "$src2, $src1", "$src1, $src2",
9398                          (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9399                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9400   }
9403 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9404                         SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9405   defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9406                            sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
9407   defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9408                            sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V;
9411 multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9412                         SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9413   let Predicates = [HasFP16] in
9414   defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode,  OpNodeSAE, sched>,
9415                EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
9418 let Predicates = [HasERI] in {
9419   defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
9420                                SchedWriteFRcp.Scl>;
9421   defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
9422                                SchedWriteFRsqrt.Scl>;
9425 defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9426                               SchedWriteFRnd.Scl>,
9427                  avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9428                                   SchedWriteFRnd.Scl>;
9429 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9431 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9432                          SDNode OpNode, X86FoldableSchedWrite sched> {
9433   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9434   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9435                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
9436                          (OpNode (_.VT _.RC:$src))>,
9437                          Sched<[sched]>;
9439   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9440                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9441                          (OpNode (_.VT
9442                              (bitconvert (_.LdFrag addr:$src))))>,
9443                           Sched<[sched.Folded, sched.ReadAfterFold]>;
9445   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9446                          (ins _.ScalarMemOp:$src), OpcodeStr,
9447                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9448                          (OpNode (_.VT
9449                                   (_.BroadcastLdFrag addr:$src)))>,
9450                          EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9451   }
9453 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9454                          SDNode OpNode, X86FoldableSchedWrite sched> {
9455   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9456   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9457                         (ins _.RC:$src), OpcodeStr,
9458                         "{sae}, $src", "$src, {sae}",
9459                         (OpNode (_.VT _.RC:$src))>,
9460                         EVEX_B, Sched<[sched]>;
9463 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9464                        SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9465    defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9466               avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9467               T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9468    defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9469               avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9470               T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9473 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9474                                   SDNode OpNode, X86SchedWriteWidths sched> {
9475   // Define only if AVX512VL feature is present.
9476   let Predicates = [HasVLX] in {
9477     defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9478                                 sched.XMM>,
9479                                 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
9480     defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9481                                 sched.YMM>,
9482                                 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
9483     defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9484                                 sched.XMM>,
9485                                 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9486     defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9487                                 sched.YMM>,
9488                                 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9489   }
9492 multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9493                        SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9494   let Predicates = [HasFP16] in
9495   defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9496               avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9497               T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9498   let Predicates = [HasFP16, HasVLX] in {
9499     defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9500                                      EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9501     defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9502                                      EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9503   }
9505 let Predicates = [HasERI] in {
9506  defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
9507                             SchedWriteFRsqrt>, EVEX;
9508  defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
9509                             SchedWriteFRcp>, EVEX;
9510  defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
9511                             SchedWriteFAdd>, EVEX;
9513 defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9514                             SchedWriteFRnd>,
9515                  avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9516                                      SchedWriteFRnd>,
9517                  avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9518                                           SchedWriteFRnd>, EVEX;
9520 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9521                                     X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9522   let ExeDomain = _.ExeDomain in
9523   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9524                          (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9525                          (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9526                          EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9529 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9530                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9531   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9532   defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9533                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
9534                          (_.VT (any_fsqrt _.RC:$src)),
9535                          (_.VT (fsqrt _.RC:$src))>, EVEX,
9536                          Sched<[sched]>;
9537   defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9538                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9539                          (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9540                          (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9541                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9542   defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9543                           (ins _.ScalarMemOp:$src), OpcodeStr,
9544                           "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9545                           (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9546                           (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9547                           EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9548   }
9551 let Uses = [MXCSR], mayRaiseFPException = 1 in
9552 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9553                                   X86SchedWriteSizes sched> {
9554   let Predicates = [HasFP16] in
9555   defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9556                                 sched.PH.ZMM, v32f16_info>,
9557                                 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9558   let Predicates = [HasFP16, HasVLX] in {
9559     defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9560                                      sched.PH.XMM, v8f16x_info>,
9561                                      EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9562     defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9563                                      sched.PH.YMM, v16f16x_info>,
9564                                      EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9565   }
9566   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9567                                 sched.PS.ZMM, v16f32_info>,
9568                                 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9569   defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9570                                 sched.PD.ZMM, v8f64_info>,
9571                                 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9572   // Define only if AVX512VL feature is present.
9573   let Predicates = [HasVLX] in {
9574     defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9575                                      sched.PS.XMM, v4f32x_info>,
9576                                      EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
9577     defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9578                                      sched.PS.YMM, v8f32x_info>,
9579                                      EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
9580     defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9581                                      sched.PD.XMM, v2f64x_info>,
9582                                      EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9583     defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9584                                      sched.PD.YMM, v4f64x_info>,
9585                                      EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9586   }
9589 let Uses = [MXCSR] in
9590 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9591                                         X86SchedWriteSizes sched> {
9592   let Predicates = [HasFP16] in
9593   defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9594                                       sched.PH.ZMM, v32f16_info>,
9595                                       EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9596   defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9597                                       sched.PS.ZMM, v16f32_info>,
9598                                       EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9599   defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9600                                       sched.PD.ZMM, v8f64_info>,
9601                                       EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9604 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9605                               X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9606   let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9607     defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9608                          (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9609                          "$src2, $src1", "$src1, $src2",
9610                          (X86fsqrts (_.VT _.RC:$src1),
9611                                     (_.VT _.RC:$src2))>,
9612                          Sched<[sched]>, SIMD_EXC;
9613     defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9614                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9615                          "$src2, $src1", "$src1, $src2",
9616                          (X86fsqrts (_.VT _.RC:$src1),
9617                                     (_.ScalarIntMemFrags addr:$src2))>,
9618                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9619     let Uses = [MXCSR] in
9620     defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9621                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9622                          "$rc, $src2, $src1", "$src1, $src2, $rc",
9623                          (X86fsqrtRnds (_.VT _.RC:$src1),
9624                                      (_.VT _.RC:$src2),
9625                                      (i32 timm:$rc))>,
9626                          EVEX_B, EVEX_RC, Sched<[sched]>;
9628     let isCodeGenOnly = 1, hasSideEffects = 0 in {
9629       def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9630                 (ins _.FRC:$src1, _.FRC:$src2),
9631                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9632                 Sched<[sched]>, SIMD_EXC;
9633       let mayLoad = 1 in
9634         def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9635                   (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9636                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9637                   Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9638     }
9639   }
9641   let Predicates = [prd] in {
9642     def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9643               (!cast<Instruction>(Name#Zr)
9644                   (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9645   }
9647   let Predicates = [prd, OptForSize] in {
9648     def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9649               (!cast<Instruction>(Name#Zm)
9650                   (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9651   }
9654 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9655                                   X86SchedWriteSizes sched> {
9656   defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9657                         EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
9658   defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9659                         EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9660   defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9661                         EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9664 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9665              avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9667 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9669 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9670                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9671   let ExeDomain = _.ExeDomain in {
9672   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9673                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9674                            "$src3, $src2, $src1", "$src1, $src2, $src3",
9675                            (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9676                            (i32 timm:$src3)))>,
9677                            Sched<[sched]>, SIMD_EXC;
9679   let Uses = [MXCSR] in
9680   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9681                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9682                          "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9683                          (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9684                          (i32 timm:$src3)))>, EVEX_B,
9685                          Sched<[sched]>;
9687   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9688                          (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9689                          OpcodeStr,
9690                          "$src3, $src2, $src1", "$src1, $src2, $src3",
9691                          (_.VT (X86RndScales _.RC:$src1,
9692                                 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9693                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9695   let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9696     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9697                (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9698                OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9699                []>, Sched<[sched]>, SIMD_EXC;
9701     let mayLoad = 1 in
9702       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9703                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9704                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9705                  []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9706   }
9707   }
9709   let Predicates = [HasAVX512] in {
9710     def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9711               (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9712                _.FRC:$src1, timm:$src2))>;
9713   }
9715   let Predicates = [HasAVX512, OptForSize] in {
9716     def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9717               (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9718                addr:$src1, timm:$src2))>;
9719   }
9722 let Predicates = [HasFP16] in
9723 defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9724                                            SchedWriteFRnd.Scl, f16x_info>,
9725                                            AVX512PSIi8Base, TA, EVEX_4V,
9726                                            EVEX_CD8<16, CD8VT1>;
9728 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9729                                            SchedWriteFRnd.Scl, f32x_info>,
9730                                            AVX512AIi8Base, EVEX_4V, VEX_LIG,
9731                                            EVEX_CD8<32, CD8VT1>;
9733 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9734                                            SchedWriteFRnd.Scl, f64x_info>,
9735                                            VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9736                                            EVEX_CD8<64, CD8VT1>;
9738 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9739                                 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9740                                 dag OutMask, Predicate BasePredicate> {
9741   let Predicates = [BasePredicate] in {
9742     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9743                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9744                (extractelt _.VT:$dst, (iPTR 0))))),
9745               (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9746                _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9748     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9749                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9750                ZeroFP))),
9751               (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9752                OutMask, _.VT:$src2, _.VT:$src1)>;
9753   }
9756 defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9757                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9758                             fp16imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasFP16>;
9759 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9760                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9761                             fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9762 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9763                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9764                             fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9767 //-------------------------------------------------
9768 // Integer truncate and extend operations
9769 //-------------------------------------------------
9771 // PatFrags that contain a select and a truncate op. The take operands in the
9772 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9773 // either to the multiclasses.
9774 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9775                            (vselect_mask node:$mask,
9776                                          (trunc node:$src), node:$src0)>;
9777 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9778                             (vselect_mask node:$mask,
9779                                           (X86vtruncs node:$src), node:$src0)>;
9780 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9781                              (vselect_mask node:$mask,
9782                                            (X86vtruncus node:$src), node:$src0)>;
9784 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9785                               SDPatternOperator MaskNode,
9786                               X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9787                               X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9788   let ExeDomain = DestInfo.ExeDomain in {
9789   def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9790              (ins SrcInfo.RC:$src),
9791              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9792              [(set DestInfo.RC:$dst,
9793                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9794              EVEX, Sched<[sched]>;
9795   let Constraints = "$src0 = $dst" in
9796   def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9797              (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9798              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9799              [(set DestInfo.RC:$dst,
9800                    (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9801                              (DestInfo.VT DestInfo.RC:$src0),
9802                              SrcInfo.KRCWM:$mask))]>,
9803              EVEX, EVEX_K, Sched<[sched]>;
9804   def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9805              (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9806              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9807              [(set DestInfo.RC:$dst,
9808                    (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9809                              DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9810              EVEX, EVEX_KZ, Sched<[sched]>;
9811   }
9813   let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9814     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9815                (ins x86memop:$dst, SrcInfo.RC:$src),
9816                OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9817                EVEX, Sched<[sched.Folded]>;
9819     def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9820                (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9821                OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9822                EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9823   }//mayStore = 1, hasSideEffects = 0
9826 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9827                                     X86VectorVTInfo DestInfo,
9828                                     PatFrag truncFrag, PatFrag mtruncFrag,
9829                                     string Name> {
9831   def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9832             (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9833                                     addr:$dst, SrcInfo.RC:$src)>;
9835   def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9836                         SrcInfo.KRCWM:$mask),
9837             (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9838                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9841 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9842                         SDNode OpNode256, SDNode OpNode512,
9843                         SDPatternOperator MaskNode128,
9844                         SDPatternOperator MaskNode256,
9845                         SDPatternOperator MaskNode512,
9846                         X86FoldableSchedWrite sched,
9847                         AVX512VLVectorVTInfo VTSrcInfo,
9848                         X86VectorVTInfo DestInfoZ128,
9849                         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9850                         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9851                         X86MemOperand x86memopZ, PatFrag truncFrag,
9852                         PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9854   let Predicates = [HasVLX, prd] in {
9855     defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9856                              VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9857                 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9858                              truncFrag, mtruncFrag, NAME>, EVEX_V128;
9860     defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9861                              VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9862                 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9863                              truncFrag, mtruncFrag, NAME>, EVEX_V256;
9864   }
9865   let Predicates = [prd] in
9866     defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9867                              VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9868                 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9869                              truncFrag, mtruncFrag, NAME>, EVEX_V512;
9872 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9873                            SDPatternOperator MaskNode,
9874                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9875                            PatFrag MaskedStoreNode, SDNode InVecNode,
9876                            SDPatternOperator InVecMaskNode> {
9877   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9878                           InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9879                           avx512vl_i64_info, v16i8x_info, v16i8x_info,
9880                           v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9881                           MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9884 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9885                            SDPatternOperator MaskNode,
9886                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9887                            PatFrag MaskedStoreNode, SDNode InVecNode,
9888                            SDPatternOperator InVecMaskNode> {
9889   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9890                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9891                           avx512vl_i64_info, v8i16x_info, v8i16x_info,
9892                           v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9893                           MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9896 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9897                            SDPatternOperator MaskNode,
9898                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9899                            PatFrag MaskedStoreNode, SDNode InVecNode,
9900                            SDPatternOperator InVecMaskNode> {
9901   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9902                           InVecMaskNode, MaskNode, MaskNode, sched,
9903                           avx512vl_i64_info, v4i32x_info, v4i32x_info,
9904                           v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9905                           MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9908 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9909                            SDPatternOperator MaskNode,
9910                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9911                            PatFrag MaskedStoreNode, SDNode InVecNode,
9912                            SDPatternOperator InVecMaskNode> {
9913   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9914                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9915                           avx512vl_i32_info, v16i8x_info, v16i8x_info,
9916                           v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9917                           MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9920 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9921                            SDPatternOperator MaskNode,
9922                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9923                            PatFrag MaskedStoreNode, SDNode InVecNode,
9924                            SDPatternOperator InVecMaskNode> {
9925   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9926                           InVecMaskNode, MaskNode, MaskNode, sched,
9927                           avx512vl_i32_info, v8i16x_info, v8i16x_info,
9928                           v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9929                           MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9932 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9933                            SDPatternOperator MaskNode,
9934                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9935                            PatFrag MaskedStoreNode, SDNode InVecNode,
9936                            SDPatternOperator InVecMaskNode> {
9937   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9938                           InVecMaskNode, MaskNode, MaskNode, sched,
9939                           avx512vl_i16_info, v16i8x_info, v16i8x_info,
9940                           v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9941                           MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9944 defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, select_trunc,
9945                                   WriteShuffle256, truncstorevi8,
9946                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9947 defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, select_truncs,
9948                                   WriteShuffle256, truncstore_s_vi8,
9949                                   masked_truncstore_s_vi8, X86vtruncs,
9950                                   X86vmtruncs>;
9951 defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9952                                   select_truncus, WriteShuffle256,
9953                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9954                                   X86vtruncus, X86vmtruncus>;
9956 defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9957                                   WriteShuffle256, truncstorevi16,
9958                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9959 defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9960                                   WriteShuffle256, truncstore_s_vi16,
9961                                   masked_truncstore_s_vi16, X86vtruncs,
9962                                   X86vmtruncs>;
9963 defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9964                                   select_truncus, WriteShuffle256,
9965                                   truncstore_us_vi16, masked_truncstore_us_vi16,
9966                                   X86vtruncus, X86vmtruncus>;
9968 defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9969                                   WriteShuffle256, truncstorevi32,
9970                                   masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9971 defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9972                                   WriteShuffle256, truncstore_s_vi32,
9973                                   masked_truncstore_s_vi32, X86vtruncs,
9974                                   X86vmtruncs>;
9975 defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9976                                   select_truncus, WriteShuffle256,
9977                                   truncstore_us_vi32, masked_truncstore_us_vi32,
9978                                   X86vtruncus, X86vmtruncus>;
9980 defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9981                                   WriteShuffle256, truncstorevi8,
9982                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9983 defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9984                                   WriteShuffle256, truncstore_s_vi8,
9985                                   masked_truncstore_s_vi8, X86vtruncs,
9986                                   X86vmtruncs>;
9987 defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9988                                   select_truncus, WriteShuffle256,
9989                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9990                                   X86vtruncus, X86vmtruncus>;
9992 defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9993                                   WriteShuffle256, truncstorevi16,
9994                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9995 defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9996                                   WriteShuffle256, truncstore_s_vi16,
9997                                   masked_truncstore_s_vi16, X86vtruncs,
9998                                   X86vmtruncs>;
9999 defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
10000                                   select_truncus, WriteShuffle256,
10001                                   truncstore_us_vi16, masked_truncstore_us_vi16,
10002                                   X86vtruncus, X86vmtruncus>;
10004 defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
10005                                   WriteShuffle256, truncstorevi8,
10006                                   masked_truncstorevi8, X86vtrunc,
10007                                   X86vmtrunc>;
10008 defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
10009                                   WriteShuffle256, truncstore_s_vi8,
10010                                   masked_truncstore_s_vi8, X86vtruncs,
10011                                   X86vmtruncs>;
10012 defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
10013                                   select_truncus, WriteShuffle256,
10014                                   truncstore_us_vi8, masked_truncstore_us_vi8,
10015                                   X86vtruncus, X86vmtruncus>;
10017 let Predicates = [HasAVX512, NoVLX] in {
10018 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
10019          (v8i16 (EXTRACT_SUBREG
10020                  (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
10021                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
10022 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
10023          (v4i32 (EXTRACT_SUBREG
10024                  (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
10025                                            VR256X:$src, sub_ymm)))), sub_xmm))>;
10028 let Predicates = [HasBWI, NoVLX] in {
10029 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10030          (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
10031                                             VR256X:$src, sub_ymm))), sub_xmm))>;
10034 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
10035 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
10036                            X86VectorVTInfo DestInfo,
10037                            X86VectorVTInfo SrcInfo> {
10038   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10039                                  DestInfo.RC:$src0,
10040                                  SrcInfo.KRCWM:$mask)),
10041             (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
10042                                                  SrcInfo.KRCWM:$mask,
10043                                                  SrcInfo.RC:$src)>;
10045   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10046                                  DestInfo.ImmAllZerosV,
10047                                  SrcInfo.KRCWM:$mask)),
10048             (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
10049                                                   SrcInfo.RC:$src)>;
10052 let Predicates = [HasVLX] in {
10053 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
10054 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
10055 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
10058 let Predicates = [HasAVX512] in {
10059 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
10060 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
10061 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
10063 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
10064 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
10065 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
10067 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
10068 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
10069 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
10072 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
10073               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
10074               X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
10075   let ExeDomain = DestInfo.ExeDomain in {
10076   defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10077                     (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
10078                     (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
10079                   EVEX, Sched<[sched]>;
10081   defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10082                   (ins x86memop:$src), OpcodeStr ,"$src", "$src",
10083                   (DestInfo.VT (LdFrag addr:$src))>,
10084                 EVEX, Sched<[sched.Folded]>;
10085   }
10088 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
10089           SDNode OpNode, SDNode InVecNode, string ExtTy,
10090           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10091   let Predicates = [HasVLX, HasBWI] in {
10092     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
10093                     v16i8x_info, i64mem, LdFrag, InVecNode>,
10094                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
10096     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
10097                     v16i8x_info, i128mem, LdFrag, OpNode>,
10098                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
10099   }
10100   let Predicates = [HasBWI] in {
10101     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
10102                     v32i8x_info, i256mem, LdFrag, OpNode>,
10103                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
10104   }
10107 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
10108           SDNode OpNode, SDNode InVecNode, string ExtTy,
10109           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10110   let Predicates = [HasVLX, HasAVX512] in {
10111     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
10112                    v16i8x_info, i32mem, LdFrag, InVecNode>,
10113                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
10115     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
10116                    v16i8x_info, i64mem, LdFrag, InVecNode>,
10117                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
10118   }
10119   let Predicates = [HasAVX512] in {
10120     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
10121                    v16i8x_info, i128mem, LdFrag, OpNode>,
10122                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
10123   }
10126 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
10127           SDNode OpNode, SDNode InVecNode, string ExtTy,
10128           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10129   let Predicates = [HasVLX, HasAVX512] in {
10130     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
10131                    v16i8x_info, i16mem, LdFrag, InVecNode>,
10132                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
10134     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
10135                    v16i8x_info, i32mem, LdFrag, InVecNode>,
10136                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
10137   }
10138   let Predicates = [HasAVX512] in {
10139     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
10140                    v16i8x_info, i64mem, LdFrag, InVecNode>,
10141                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
10142   }
10145 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
10146          SDNode OpNode, SDNode InVecNode, string ExtTy,
10147          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10148   let Predicates = [HasVLX, HasAVX512] in {
10149     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
10150                    v8i16x_info, i64mem, LdFrag, InVecNode>,
10151                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
10153     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
10154                    v8i16x_info, i128mem, LdFrag, OpNode>,
10155                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
10156   }
10157   let Predicates = [HasAVX512] in {
10158     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
10159                    v16i16x_info, i256mem, LdFrag, OpNode>,
10160                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
10161   }
10164 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
10165          SDNode OpNode, SDNode InVecNode, string ExtTy,
10166          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10167   let Predicates = [HasVLX, HasAVX512] in {
10168     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
10169                    v8i16x_info, i32mem, LdFrag, InVecNode>,
10170                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
10172     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
10173                    v8i16x_info, i64mem, LdFrag, InVecNode>,
10174                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
10175   }
10176   let Predicates = [HasAVX512] in {
10177     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
10178                    v8i16x_info, i128mem, LdFrag, OpNode>,
10179                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
10180   }
10183 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
10184          SDNode OpNode, SDNode InVecNode, string ExtTy,
10185          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10187   let Predicates = [HasVLX, HasAVX512] in {
10188     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
10189                    v4i32x_info, i64mem, LdFrag, InVecNode>,
10190                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
10192     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
10193                    v4i32x_info, i128mem, LdFrag, OpNode>,
10194                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
10195   }
10196   let Predicates = [HasAVX512] in {
10197     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
10198                    v8i32x_info, i256mem, LdFrag, OpNode>,
10199                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
10200   }
10203 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
10204 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
10205 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
10206 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
10207 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
10208 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
10210 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
10211 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
10212 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
10213 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
10214 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
10215 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
10218 // Patterns that we also need any extend versions of. aext_vector_inreg
10219 // is currently legalized to zext_vector_inreg.
10220 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10221   // 256-bit patterns
10222   let Predicates = [HasVLX, HasBWI] in {
10223     def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10224               (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10225   }
10227   let Predicates = [HasVLX] in {
10228     def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10229               (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10231     def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10232               (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10233   }
10235   // 512-bit patterns
10236   let Predicates = [HasBWI] in {
10237     def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10238               (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10239   }
10240   let Predicates = [HasAVX512] in {
10241     def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10242               (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10243     def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10244               (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10246     def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10247               (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10249     def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10250               (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10251   }
10254 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10255                                  SDNode InVecOp> :
10256     AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10257   // 128-bit patterns
10258   let Predicates = [HasVLX, HasBWI] in {
10259   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10260             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10261   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10262             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10263   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10264             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10265   }
10266   let Predicates = [HasVLX] in {
10267   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10268             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10269   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10270             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10272   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10273             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10275   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10276             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10277   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10278             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10279   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10280             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10282   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10283             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10284   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10285             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10287   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10288             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10289   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10290             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10291   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10292             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10293   }
10294   let Predicates = [HasVLX] in {
10295   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10296             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10297   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10298             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10299   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10300             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10302   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10303             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10304   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10305             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10307   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10308             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10309   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10310             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10311   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10312             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10313   }
10314   // 512-bit patterns
10315   let Predicates = [HasAVX512] in {
10316   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10317             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10318   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10319             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10320   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10321             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10322   }
10325 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10326 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10328 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10329 // ext+trunc aggressively making it impossible to legalize the DAG to this
10330 // pattern directly.
10331 let Predicates = [HasAVX512, NoBWI] in {
10332 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10333          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10334 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10335          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10338 //===----------------------------------------------------------------------===//
10339 // GATHER - SCATTER Operations
10341 // FIXME: Improve scheduling of gather/scatter instructions.
10342 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10343                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10344   let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10345       ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10346   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10347             (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10348             !strconcat(OpcodeStr#_.Suffix,
10349             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10350             []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10351             Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10354 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10355                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10356   defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10357                                       vy512xmem>, EVEX_V512, VEX_W;
10358   defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10359                                       vz512mem>, EVEX_V512, VEX_W;
10360 let Predicates = [HasVLX] in {
10361   defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10362                               vx256xmem>, EVEX_V256, VEX_W;
10363   defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10364                               vy256xmem>, EVEX_V256, VEX_W;
10365   defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10366                               vx128xmem>, EVEX_V128, VEX_W;
10367   defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10368                               vx128xmem>, EVEX_V128, VEX_W;
10372 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10373                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10374   defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10375                                        EVEX_V512;
10376   defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10377                                        EVEX_V512;
10378 let Predicates = [HasVLX] in {
10379   defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10380                                           vy256xmem>, EVEX_V256;
10381   defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10382                                           vy128xmem>, EVEX_V256;
10383   defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10384                                           vx128xmem>, EVEX_V128;
10385   defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10386                                           vx64xmem, VK2WM>, EVEX_V128;
10391 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10392                avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10394 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10395                 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10397 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10398                           X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10400 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 
10401     hasSideEffects = 0 in
10403   def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10404             (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10405             !strconcat(OpcodeStr#_.Suffix,
10406             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10407             []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10408             Sched<[WriteStore]>;
10411 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10412                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10413   defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10414                                       vy512xmem>, EVEX_V512, VEX_W;
10415   defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10416                                       vz512mem>, EVEX_V512, VEX_W;
10417 let Predicates = [HasVLX] in {
10418   defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10419                               vx256xmem>, EVEX_V256, VEX_W;
10420   defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10421                               vy256xmem>, EVEX_V256, VEX_W;
10422   defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10423                               vx128xmem>, EVEX_V128, VEX_W;
10424   defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10425                               vx128xmem>, EVEX_V128, VEX_W;
10429 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10430                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10431   defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10432                                        EVEX_V512;
10433   defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10434                                        EVEX_V512;
10435 let Predicates = [HasVLX] in {
10436   defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10437                                           vy256xmem>, EVEX_V256;
10438   defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10439                                           vy128xmem>, EVEX_V256;
10440   defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10441                                           vx128xmem>, EVEX_V128;
10442   defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10443                                           vx64xmem, VK2WM>, EVEX_V128;
10447 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10448                avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10450 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10451                 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10453 // prefetch
10454 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10455                        RegisterClass KRC, X86MemOperand memop> {
10456   let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10457   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10458             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10459             EVEX, EVEX_K, Sched<[WriteLoad]>;
10462 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10463                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10465 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10466                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10468 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10469                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10471 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10472                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10474 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10475                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10477 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10478                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10480 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10481                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10483 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10484                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10486 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10487                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10489 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10490                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10492 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10493                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10495 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10496                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10498 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10499                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10501 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10502                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10504 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10505                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10507 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10508                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10510 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
10511 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10512                   !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10513                   [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10514                   EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
10517 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10518                                  string OpcodeStr, Predicate prd> {
10519 let Predicates = [prd] in
10520   defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
10522   let Predicates = [prd, HasVLX] in {
10523     defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
10524     defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
10525   }
10528 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10529 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
10530 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10531 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
10533 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10534     def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10535                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10536                         [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10537                         EVEX, Sched<[WriteMove]>;
10540 // Use 512bit version to implement 128/256 bit in case NoVLX.
10541 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10542                                            X86VectorVTInfo _,
10543                                            string Name> {
10545   def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10546             (_.KVT (COPY_TO_REGCLASS
10547                      (!cast<Instruction>(Name#"Zrr")
10548                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10549                                       _.RC:$src, _.SubRegIdx)),
10550                    _.KRC))>;
10553 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10554                                    AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10555   let Predicates = [prd] in
10556     defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10557                                             EVEX_V512;
10559   let Predicates = [prd, HasVLX] in {
10560     defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10561                                               EVEX_V256;
10562     defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10563                                                EVEX_V128;
10564   }
10565   let Predicates = [prd, NoVLX] in {
10566     defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10567     defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10568   }
10571 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10572                                               avx512vl_i8_info, HasBWI>;
10573 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10574                                               avx512vl_i16_info, HasBWI>, VEX_W;
10575 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10576                                               avx512vl_i32_info, HasDQI>;
10577 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10578                                               avx512vl_i64_info, HasDQI>, VEX_W;
10580 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10581 // is available, but BWI is not. We can't handle this in lowering because
10582 // a target independent DAG combine likes to combine sext and trunc.
10583 let Predicates = [HasDQI, NoBWI] in {
10584   def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10585             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10586   def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10587             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10590 let Predicates = [HasDQI, NoBWI, HasVLX] in {
10591   def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10592             (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10595 //===----------------------------------------------------------------------===//
10596 // AVX-512 - COMPRESS and EXPAND
10599 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10600                                  string OpcodeStr, X86FoldableSchedWrite sched> {
10601   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10602               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10603               (null_frag)>, AVX5128IBase,
10604               Sched<[sched]>;
10606   let mayStore = 1, hasSideEffects = 0 in
10607   def mr : AVX5128I<opc, MRMDestMem, (outs),
10608               (ins _.MemOp:$dst, _.RC:$src),
10609               OpcodeStr # "\t{$src, $dst|$dst, $src}",
10610               []>, EVEX_CD8<_.EltSize, CD8VT1>,
10611               Sched<[sched.Folded]>;
10613   def mrk : AVX5128I<opc, MRMDestMem, (outs),
10614               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10615               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10616               []>,
10617               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10618               Sched<[sched.Folded]>;
10621 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10622   def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10623             (!cast<Instruction>(Name#_.ZSuffix#mrk)
10624                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10626   def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10627             (!cast<Instruction>(Name#_.ZSuffix#rrk)
10628                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10629   def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10630             (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10631                             _.KRCWM:$mask, _.RC:$src)>;
10634 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10635                                  X86FoldableSchedWrite sched,
10636                                  AVX512VLVectorVTInfo VTInfo,
10637                                  Predicate Pred = HasAVX512> {
10638   let Predicates = [Pred] in
10639   defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10640            compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10642   let Predicates = [Pred, HasVLX] in {
10643     defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10644                 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10645     defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10646                 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10647   }
10650 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10651 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10652                                           avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10653 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10654                                           avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10655 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10656                                           avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10657 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10658                                           avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10660 // expand
10661 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10662                                  string OpcodeStr, X86FoldableSchedWrite sched> {
10663   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10664               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10665               (null_frag)>, AVX5128IBase,
10666               Sched<[sched]>;
10668   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10669               (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10670               (null_frag)>,
10671             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10672             Sched<[sched.Folded, sched.ReadAfterFold]>;
10675 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10677   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10678             (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10679                                         _.KRCWM:$mask, addr:$src)>;
10681   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10682             (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10683                                         _.KRCWM:$mask, addr:$src)>;
10685   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10686                                                (_.VT _.RC:$src0))),
10687             (!cast<Instruction>(Name#_.ZSuffix#rmk)
10688                             _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10690   def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10691             (!cast<Instruction>(Name#_.ZSuffix#rrk)
10692                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10693   def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10694             (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10695                             _.KRCWM:$mask, _.RC:$src)>;
10698 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10699                                X86FoldableSchedWrite sched,
10700                                AVX512VLVectorVTInfo VTInfo,
10701                                Predicate Pred = HasAVX512> {
10702   let Predicates = [Pred] in
10703   defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10704            expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10706   let Predicates = [Pred, HasVLX] in {
10707     defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10708                 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10709     defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10710                 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10711   }
10714 // FIXME: Is there a better scheduler class for VPEXPAND?
10715 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10716                                       avx512vl_i32_info>, EVEX;
10717 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10718                                       avx512vl_i64_info>, EVEX, VEX_W;
10719 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10720                                       avx512vl_f32_info>, EVEX;
10721 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10722                                       avx512vl_f64_info>, EVEX, VEX_W;
10724 //handle instruction  reg_vec1 = op(reg_vec,imm)
10725 //                               op(mem_vec,imm)
10726 //                               op(broadcast(eltVt),imm)
10727 //all instruction created with FROUND_CURRENT
10728 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10729                                       SDPatternOperator OpNode,
10730                                       SDPatternOperator MaskOpNode,
10731                                       X86FoldableSchedWrite sched,
10732                                       X86VectorVTInfo _> {
10733   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10734   defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10735                       (ins _.RC:$src1, i32u8imm:$src2),
10736                       OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10737                       (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10738                       (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10739                       Sched<[sched]>;
10740   defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10741                     (ins _.MemOp:$src1, i32u8imm:$src2),
10742                     OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10743                     (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10744                             (i32 timm:$src2)),
10745                     (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10746                                 (i32 timm:$src2))>,
10747                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10748   defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10749                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10750                     OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10751                     "${src1}"#_.BroadcastStr#", $src2",
10752                     (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10753                             (i32 timm:$src2)),
10754                     (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10755                                 (i32 timm:$src2))>, EVEX_B,
10756                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10757   }
10760 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10761 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10762                                           SDNode OpNode, X86FoldableSchedWrite sched,
10763                                           X86VectorVTInfo _> {
10764   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10765   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10766                       (ins _.RC:$src1, i32u8imm:$src2),
10767                       OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10768                       "$src1, {sae}, $src2",
10769                       (OpNode (_.VT _.RC:$src1),
10770                               (i32 timm:$src2))>,
10771                       EVEX_B, Sched<[sched]>;
10774 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10775             AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10776             SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10777             Predicate prd>{
10778   let Predicates = [prd] in {
10779     defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10780                                            sched.ZMM, _.info512>,
10781                 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10782                                                sched.ZMM, _.info512>, EVEX_V512;
10783   }
10784   let Predicates = [prd, HasVLX] in {
10785     defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10786                                            sched.XMM, _.info128>, EVEX_V128;
10787     defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10788                                            sched.YMM, _.info256>, EVEX_V256;
10789   }
10792 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10793 //                               op(reg_vec2,mem_vec,imm)
10794 //                               op(reg_vec2,broadcast(eltVt),imm)
10795 //all instruction created with FROUND_CURRENT
10796 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10797                                 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10798   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10799   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10800                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10801                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10802                       (OpNode (_.VT _.RC:$src1),
10803                               (_.VT _.RC:$src2),
10804                               (i32 timm:$src3))>,
10805                       Sched<[sched]>;
10806   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10807                     (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10808                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10809                     (OpNode (_.VT _.RC:$src1),
10810                             (_.VT (bitconvert (_.LdFrag addr:$src2))),
10811                             (i32 timm:$src3))>,
10812                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10813   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10814                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10815                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10816                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
10817                     (OpNode (_.VT _.RC:$src1),
10818                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10819                             (i32 timm:$src3))>, EVEX_B,
10820                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10821   }
10824 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10825 //                               op(reg_vec2,mem_vec,imm)
10826 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10827                               X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10828                               X86VectorVTInfo SrcInfo>{
10829   let ExeDomain = DestInfo.ExeDomain in {
10830   defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10831                   (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10832                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10833                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10834                                (SrcInfo.VT SrcInfo.RC:$src2),
10835                                (i8 timm:$src3)))>,
10836                   Sched<[sched]>;
10837   defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10838                 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10839                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10840                 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10841                              (SrcInfo.VT (bitconvert
10842                                                 (SrcInfo.LdFrag addr:$src2))),
10843                              (i8 timm:$src3)))>,
10844                 Sched<[sched.Folded, sched.ReadAfterFold]>;
10845   }
10848 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10849 //                               op(reg_vec2,mem_vec,imm)
10850 //                               op(reg_vec2,broadcast(eltVt),imm)
10851 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10852                            X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10853   avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10855   let ExeDomain = _.ExeDomain in
10856   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10857                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10858                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10859                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
10860                     (OpNode (_.VT _.RC:$src1),
10861                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10862                             (i8 timm:$src3))>, EVEX_B,
10863                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10866 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10867 //                                      op(reg_vec2,mem_scalar,imm)
10868 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10869                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10870   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10871   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10872                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10873                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10874                       (OpNode (_.VT _.RC:$src1),
10875                               (_.VT _.RC:$src2),
10876                               (i32 timm:$src3))>,
10877                       Sched<[sched]>;
10878   defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10879                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10880                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10881                     (OpNode (_.VT _.RC:$src1),
10882                             (_.ScalarIntMemFrags addr:$src2),
10883                             (i32 timm:$src3))>,
10884                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10885   }
10888 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10889 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10890                                     SDNode OpNode, X86FoldableSchedWrite sched,
10891                                     X86VectorVTInfo _> {
10892   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10893   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10894                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10895                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10896                       "$src1, $src2, {sae}, $src3",
10897                       (OpNode (_.VT _.RC:$src1),
10898                               (_.VT _.RC:$src2),
10899                               (i32 timm:$src3))>,
10900                       EVEX_B, Sched<[sched]>;
10903 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10904 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10905                                     X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10906   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10907   defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10908                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10909                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10910                       "$src1, $src2, {sae}, $src3",
10911                       (OpNode (_.VT _.RC:$src1),
10912                               (_.VT _.RC:$src2),
10913                               (i32 timm:$src3))>,
10914                       EVEX_B, Sched<[sched]>;
10917 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10918             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10919             SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10920   let Predicates = [prd] in {
10921     defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10922                 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10923                                   EVEX_V512;
10925   }
10926   let Predicates = [prd, HasVLX] in {
10927     defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10928                                   EVEX_V128;
10929     defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10930                                   EVEX_V256;
10931   }
10934 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10935                    X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10936                    AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10937   let Predicates = [Pred] in {
10938     defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10939                            SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10940   }
10941   let Predicates = [Pred, HasVLX] in {
10942     defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10943                            SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10944     defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10945                            SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10946   }
10949 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10950                                   bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10951                                   Predicate Pred = HasAVX512> {
10952   let Predicates = [Pred] in {
10953     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10954                                 EVEX_V512;
10955   }
10956   let Predicates = [Pred, HasVLX] in {
10957     defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10958                                 EVEX_V128;
10959     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10960                                 EVEX_V256;
10961   }
10964 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10965                   X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10966                   SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10967   let Predicates = [prd] in {
10968      defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10969               avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10970   }
10973 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10974                     bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10975                     SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
10976                     X86SchedWriteWidths sched, Predicate prd>{
10977   defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
10978                             opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
10979                             AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
10980   defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10981                             opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10982                             AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
10983   defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10984                             opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10985                             AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W;
10988 defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10989                               X86VReduce, X86VReduce, X86VReduceSAE,
10990                               SchedWriteFRnd, HasDQI>;
10991 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10992                               X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10993                               SchedWriteFRnd, HasAVX512>;
10994 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10995                               X86VGetMant, X86VGetMant, X86VGetMantSAE,
10996                               SchedWriteFRnd, HasAVX512>;
10998 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10999                                                 0x50, X86VRange, X86VRangeSAE,
11000                                                 SchedWriteFAdd, HasDQI>,
11001       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11002 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
11003                                                 0x50, X86VRange, X86VRangeSAE,
11004                                                 SchedWriteFAdd, HasDQI>,
11005       AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11007 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
11008       f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11009       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11010 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
11011       0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11012       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11014 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
11015       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11016       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11017 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
11018       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11019       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11020 defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
11021       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
11022       AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11024 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
11025       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11026       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11027 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
11028       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11029       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11030 defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
11031       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
11032       AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11034 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
11035                                           X86FoldableSchedWrite sched,
11036                                           X86VectorVTInfo _,
11037                                           X86VectorVTInfo CastInfo,
11038                                           string EVEX2VEXOvrd> {
11039   let ExeDomain = _.ExeDomain in {
11040   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11041                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11042                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11043                   (_.VT (bitconvert
11044                          (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
11045                                                   (i8 timm:$src3)))))>,
11046                   Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
11047   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11048                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11049                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11050                 (_.VT
11051                  (bitconvert
11052                   (CastInfo.VT (X86Shuf128 _.RC:$src1,
11053                                            (CastInfo.LdFrag addr:$src2),
11054                                            (i8 timm:$src3)))))>,
11055                 Sched<[sched.Folded, sched.ReadAfterFold]>,
11056                 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
11057   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11058                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11059                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11060                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
11061                     (_.VT
11062                      (bitconvert
11063                       (CastInfo.VT
11064                        (X86Shuf128 _.RC:$src1,
11065                                    (_.BroadcastLdFrag addr:$src2),
11066                                    (i8 timm:$src3)))))>, EVEX_B,
11067                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11068   }
11071 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
11072                                    AVX512VLVectorVTInfo _,
11073                                    AVX512VLVectorVTInfo CastInfo, bits<8> opc,
11074                                    string EVEX2VEXOvrd>{
11075   let Predicates = [HasAVX512] in
11076   defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11077                                           _.info512, CastInfo.info512, "">, EVEX_V512;
11079   let Predicates = [HasAVX512, HasVLX] in
11080   defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11081                                              _.info256, CastInfo.info256,
11082                                              EVEX2VEXOvrd>, EVEX_V256;
11085 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
11086       avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11087 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
11088       avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11089 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11090       avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11091 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11092       avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11094 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11095                          X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11096   // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
11097   // instantiation of this class.
11098   let ExeDomain = _.ExeDomain in {
11099   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11100                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11101                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11102                   (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
11103                   Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
11104   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11105                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11106                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11107                 (_.VT (X86VAlign _.RC:$src1,
11108                                  (bitconvert (_.LdFrag addr:$src2)),
11109                                  (i8 timm:$src3)))>,
11110                 Sched<[sched.Folded, sched.ReadAfterFold]>,
11111                 EVEX2VEXOverride<"VPALIGNRrmi">;
11113   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11114                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11115                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11116                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
11117                    (X86VAlign _.RC:$src1,
11118                               (_.VT (_.BroadcastLdFrag addr:$src2)),
11119                               (i8 timm:$src3))>, EVEX_B,
11120                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11121   }
11124 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11125                                 AVX512VLVectorVTInfo _> {
11126   let Predicates = [HasAVX512] in {
11127     defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11128                                 AVX512AIi8Base, EVEX_4V, EVEX_V512;
11129   }
11130   let Predicates = [HasAVX512, HasVLX] in {
11131     defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11132                                 AVX512AIi8Base, EVEX_4V, EVEX_V128;
11133     // We can't really override the 256-bit version so change it back to unset.
11134     let EVEX2VEXOverride = ? in
11135     defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11136                                 AVX512AIi8Base, EVEX_4V, EVEX_V256;
11137   }
11140 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11141                                    avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11142 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11143                                    avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11144                                    VEX_W;
11146 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11147                                          SchedWriteShuffle, avx512vl_i8_info,
11148                                          avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11150 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
11151 // into vpalignr.
11152 def ValignqImm32XForm : SDNodeXForm<timm, [{
11153   return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11154 }]>;
11155 def ValignqImm8XForm : SDNodeXForm<timm, [{
11156   return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11157 }]>;
11158 def ValigndImm8XForm : SDNodeXForm<timm, [{
11159   return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11160 }]>;
11162 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11163                                         X86VectorVTInfo From, X86VectorVTInfo To,
11164                                         SDNodeXForm ImmXForm> {
11165   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11166                                  (bitconvert
11167                                   (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11168                                                    timm:$src3))),
11169                                  To.RC:$src0)),
11170             (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11171                                                   To.RC:$src1, To.RC:$src2,
11172                                                   (ImmXForm timm:$src3))>;
11174   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11175                                  (bitconvert
11176                                   (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11177                                                    timm:$src3))),
11178                                  To.ImmAllZerosV)),
11179             (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11180                                                    To.RC:$src1, To.RC:$src2,
11181                                                    (ImmXForm timm:$src3))>;
11183   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11184                                  (bitconvert
11185                                   (From.VT (OpNode From.RC:$src1,
11186                                                    (From.LdFrag addr:$src2),
11187                                            timm:$src3))),
11188                                  To.RC:$src0)),
11189             (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11190                                                   To.RC:$src1, addr:$src2,
11191                                                   (ImmXForm timm:$src3))>;
11193   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11194                                  (bitconvert
11195                                   (From.VT (OpNode From.RC:$src1,
11196                                                    (From.LdFrag addr:$src2),
11197                                            timm:$src3))),
11198                                  To.ImmAllZerosV)),
11199             (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11200                                                    To.RC:$src1, addr:$src2,
11201                                                    (ImmXForm timm:$src3))>;
11204 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11205                                            X86VectorVTInfo From,
11206                                            X86VectorVTInfo To,
11207                                            SDNodeXForm ImmXForm> :
11208       avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11209   def : Pat<(From.VT (OpNode From.RC:$src1,
11210                              (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11211                              timm:$src3)),
11212             (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11213                                                   (ImmXForm timm:$src3))>;
11215   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11216                                  (bitconvert
11217                                   (From.VT (OpNode From.RC:$src1,
11218                                            (bitconvert
11219                                             (To.VT (To.BroadcastLdFrag addr:$src2))),
11220                                            timm:$src3))),
11221                                  To.RC:$src0)),
11222             (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11223                                                    To.RC:$src1, addr:$src2,
11224                                                    (ImmXForm timm:$src3))>;
11226   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11227                                  (bitconvert
11228                                   (From.VT (OpNode From.RC:$src1,
11229                                            (bitconvert
11230                                             (To.VT (To.BroadcastLdFrag addr:$src2))),
11231                                            timm:$src3))),
11232                                  To.ImmAllZerosV)),
11233             (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11234                                                     To.RC:$src1, addr:$src2,
11235                                                     (ImmXForm timm:$src3))>;
11238 let Predicates = [HasAVX512] in {
11239   // For 512-bit we lower to the widest element type we can. So we only need
11240   // to handle converting valignq to valignd.
11241   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11242                                          v16i32_info, ValignqImm32XForm>;
11245 let Predicates = [HasVLX] in {
11246   // For 128-bit we lower to the widest element type we can. So we only need
11247   // to handle converting valignq to valignd.
11248   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11249                                          v4i32x_info, ValignqImm32XForm>;
11250   // For 256-bit we lower to the widest element type we can. So we only need
11251   // to handle converting valignq to valignd.
11252   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11253                                          v8i32x_info, ValignqImm32XForm>;
11256 let Predicates = [HasVLX, HasBWI] in {
11257   // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11258   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11259                                       v16i8x_info, ValignqImm8XForm>;
11260   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11261                                       v16i8x_info, ValigndImm8XForm>;
11264 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11265                 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11266                 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
11268 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11269                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11270   let ExeDomain = _.ExeDomain in {
11271   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11272                     (ins _.RC:$src1), OpcodeStr,
11273                     "$src1", "$src1",
11274                     (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11275                     Sched<[sched]>;
11277   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11278                   (ins _.MemOp:$src1), OpcodeStr,
11279                   "$src1", "$src1",
11280                   (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11281             EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11282             Sched<[sched.Folded]>;
11283   }
11286 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11287                             X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11288            avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11289   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11290                   (ins _.ScalarMemOp:$src1), OpcodeStr,
11291                   "${src1}"#_.BroadcastStr,
11292                   "${src1}"#_.BroadcastStr,
11293                   (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11294              EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11295              Sched<[sched.Folded]>;
11298 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11299                               X86SchedWriteWidths sched,
11300                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11301   let Predicates = [prd] in
11302     defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11303                              EVEX_V512;
11305   let Predicates = [prd, HasVLX] in {
11306     defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11307                               EVEX_V256;
11308     defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11309                               EVEX_V128;
11310   }
11313 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11314                                X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11315                                Predicate prd> {
11316   let Predicates = [prd] in
11317     defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11318                               EVEX_V512;
11320   let Predicates = [prd, HasVLX] in {
11321     defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11322                                  EVEX_V256;
11323     defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11324                                  EVEX_V128;
11325   }
11328 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11329                                  SDNode OpNode, X86SchedWriteWidths sched,
11330                                  Predicate prd> {
11331   defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11332                                avx512vl_i64_info, prd>, VEX_W;
11333   defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11334                                avx512vl_i32_info, prd>;
11337 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11338                                  SDNode OpNode, X86SchedWriteWidths sched,
11339                                  Predicate prd> {
11340   defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11341                               avx512vl_i16_info, prd>, VEX_WIG;
11342   defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11343                               avx512vl_i8_info, prd>, VEX_WIG;
11346 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11347                                   bits<8> opc_d, bits<8> opc_q,
11348                                   string OpcodeStr, SDNode OpNode,
11349                                   X86SchedWriteWidths sched> {
11350   defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11351                                     HasAVX512>,
11352               avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11353                                     HasBWI>;
11356 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11357                                     SchedWriteVecALU>;
11359 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11360 let Predicates = [HasAVX512, NoVLX] in {
11361   def : Pat<(v4i64 (abs VR256X:$src)),
11362             (EXTRACT_SUBREG
11363                 (VPABSQZrr
11364                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11365              sub_ymm)>;
11366   def : Pat<(v2i64 (abs VR128X:$src)),
11367             (EXTRACT_SUBREG
11368                 (VPABSQZrr
11369                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11370              sub_xmm)>;
11373 // Use 512bit version to implement 128/256 bit.
11374 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11375                                  AVX512VLVectorVTInfo _, Predicate prd> {
11376   let Predicates = [prd, NoVLX] in {
11377     def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11378               (EXTRACT_SUBREG
11379                 (!cast<Instruction>(InstrStr # "Zrr")
11380                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11381                                  _.info256.RC:$src1,
11382                                  _.info256.SubRegIdx)),
11383               _.info256.SubRegIdx)>;
11385     def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11386               (EXTRACT_SUBREG
11387                 (!cast<Instruction>(InstrStr # "Zrr")
11388                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11389                                  _.info128.RC:$src1,
11390                                  _.info128.SubRegIdx)),
11391               _.info128.SubRegIdx)>;
11392   }
11395 defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11396                                         SchedWriteVecIMul, HasCDI>;
11398 // FIXME: Is there a better scheduler class for VPCONFLICT?
11399 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11400                                         SchedWriteVecALU, HasCDI>;
11402 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11403 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11404 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11406 //===---------------------------------------------------------------------===//
11407 // Counts number of ones - VPOPCNTD and VPOPCNTQ
11408 //===---------------------------------------------------------------------===//
11410 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11411 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11412                                      SchedWriteVecALU, HasVPOPCNTDQ>;
11414 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11415 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11417 //===---------------------------------------------------------------------===//
11418 // Replicate Single FP - MOVSHDUP and MOVSLDUP
11419 //===---------------------------------------------------------------------===//
11421 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11422                             X86SchedWriteWidths sched> {
11423   defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11424                                       avx512vl_f32_info, HasAVX512>, XS;
11427 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11428                                   SchedWriteFShuffle>;
11429 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11430                                   SchedWriteFShuffle>;
11432 //===----------------------------------------------------------------------===//
11433 // AVX-512 - MOVDDUP
11434 //===----------------------------------------------------------------------===//
11436 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11437                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11438   let ExeDomain = _.ExeDomain in {
11439   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11440                    (ins _.RC:$src), OpcodeStr, "$src", "$src",
11441                    (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11442                    Sched<[sched]>;
11443   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11444                  (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11445                  (_.VT (_.BroadcastLdFrag addr:$src))>,
11446                  EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11447                  Sched<[sched.Folded]>;
11448   }
11451 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
11452                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11453   defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11454                            VTInfo.info512>, EVEX_V512;
11456   let Predicates = [HasAVX512, HasVLX] in {
11457     defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11458                                 VTInfo.info256>, EVEX_V256;
11459     defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11460                                    VTInfo.info128>, EVEX_V128;
11461   }
11464 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
11465                           X86SchedWriteWidths sched> {
11466   defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
11467                                         avx512vl_f64_info>, XD, VEX_W;
11470 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
11472 let Predicates = [HasVLX] in {
11473 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11474           (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11476 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11477                         (v2f64 VR128X:$src0)),
11478           (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11479                            (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11480 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11481                         immAllZerosV),
11482           (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11485 //===----------------------------------------------------------------------===//
11486 // AVX-512 - Unpack Instructions
11487 //===----------------------------------------------------------------------===//
11489 let Uses = []<Register>, mayRaiseFPException = 0 in {
11490 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11491                                  SchedWriteFShuffleSizes, 0, 1>;
11492 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11493                                  SchedWriteFShuffleSizes>;
11496 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11497                                        SchedWriteShuffle, HasBWI>;
11498 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11499                                        SchedWriteShuffle, HasBWI>;
11500 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11501                                        SchedWriteShuffle, HasBWI>;
11502 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11503                                        SchedWriteShuffle, HasBWI>;
11505 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11506                                        SchedWriteShuffle, HasAVX512>;
11507 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11508                                        SchedWriteShuffle, HasAVX512>;
11509 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11510                                         SchedWriteShuffle, HasAVX512>;
11511 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11512                                         SchedWriteShuffle, HasAVX512>;
11514 //===----------------------------------------------------------------------===//
11515 // AVX-512 - Extract & Insert Integer Instructions
11516 //===----------------------------------------------------------------------===//
11518 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11519                                                             X86VectorVTInfo _> {
11520   def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11521               (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11522               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11523               [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11524                        addr:$dst)]>,
11525               EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11528 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11529   let Predicates = [HasBWI] in {
11530     def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11531                   (ins _.RC:$src1, u8imm:$src2),
11532                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11533                   [(set GR32orGR64:$dst,
11534                         (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11535                   EVEX, TAPD, Sched<[WriteVecExtract]>;
11537     defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
11538   }
11541 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11542   let Predicates = [HasBWI] in {
11543     def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11544                   (ins _.RC:$src1, u8imm:$src2),
11545                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11546                   [(set GR32orGR64:$dst,
11547                         (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11548                   EVEX, PD, Sched<[WriteVecExtract]>;
11550     let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11551     def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11552                    (ins _.RC:$src1, u8imm:$src2),
11553                    OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11554                    EVEX, TAPD, FoldGenData<NAME#rr>,
11555                    Sched<[WriteVecExtract]>;
11557     defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
11558   }
11561 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11562                                                             RegisterClass GRC> {
11563   let Predicates = [HasDQI] in {
11564     def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11565                   (ins _.RC:$src1, u8imm:$src2),
11566                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11567                   [(set GRC:$dst,
11568                       (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11569                   EVEX, TAPD, Sched<[WriteVecExtract]>;
11571     def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11572                 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11573                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11574                 [(store (extractelt (_.VT _.RC:$src1),
11575                                     imm:$src2),addr:$dst)]>,
11576                 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
11577                 Sched<[WriteVecExtractSt]>;
11578   }
11581 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
11582 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
11583 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11584 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
11586 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11587                                             X86VectorVTInfo _, PatFrag LdFrag,
11588                                             SDPatternOperator immoperator> {
11589   def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11590       (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11591       OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11592       [(set _.RC:$dst,
11593           (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11594       EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11597 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11598                                             X86VectorVTInfo _, PatFrag LdFrag> {
11599   let Predicates = [HasBWI] in {
11600     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11601         (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11602         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11603         [(set _.RC:$dst,
11604             (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
11605         Sched<[WriteVecInsert]>;
11607     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11608   }
11611 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11612                                          X86VectorVTInfo _, RegisterClass GRC> {
11613   let Predicates = [HasDQI] in {
11614     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11615         (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11616         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11617         [(set _.RC:$dst,
11618             (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11619         EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11621     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11622                                     _.ScalarLdFrag, imm>, TAPD;
11623   }
11626 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11627                                      extloadi8>, TAPD, VEX_WIG;
11628 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11629                                      extloadi16>, PD, VEX_WIG;
11630 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11631 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11633 //===----------------------------------------------------------------------===//
11634 // VSHUFPS - VSHUFPD Operations
11635 //===----------------------------------------------------------------------===//
11637 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11638                         AVX512VLVectorVTInfo VTInfo_FP>{
11639   defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11640                                     SchedWriteFShuffle>,
11641                                     EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11642                                     AVX512AIi8Base, EVEX_4V;
11645 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11646 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11648 //===----------------------------------------------------------------------===//
11649 // AVX-512 - Byte shift Left/Right
11650 //===----------------------------------------------------------------------===//
11652 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11653                                Format MRMm, string OpcodeStr,
11654                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11655   def ri : AVX512<opc, MRMr,
11656              (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11657              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11658              [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11659              Sched<[sched]>;
11660   def mi : AVX512<opc, MRMm,
11661            (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11662            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11663            [(set _.RC:$dst,(_.VT (OpNode
11664                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
11665                                  (i8 timm:$src2))))]>,
11666            Sched<[sched.Folded, sched.ReadAfterFold]>;
11669 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11670                                    Format MRMm, string OpcodeStr,
11671                                    X86SchedWriteWidths sched, Predicate prd>{
11672   let Predicates = [prd] in
11673     defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11674                                  sched.ZMM, v64i8_info>, EVEX_V512;
11675   let Predicates = [prd, HasVLX] in {
11676     defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11677                                     sched.YMM, v32i8x_info>, EVEX_V256;
11678     defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11679                                     sched.XMM, v16i8x_info>, EVEX_V128;
11680   }
11682 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11683                                        SchedWriteShuffle, HasBWI>,
11684                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11685 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11686                                        SchedWriteShuffle, HasBWI>,
11687                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11689 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11690                                 string OpcodeStr, X86FoldableSchedWrite sched,
11691                                 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11692   let isCommutable = 1 in
11693   def rr : AVX512BI<opc, MRMSrcReg,
11694              (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11695              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11696              [(set _dst.RC:$dst,(_dst.VT
11697                                 (OpNode (_src.VT _src.RC:$src1),
11698                                         (_src.VT _src.RC:$src2))))]>,
11699              Sched<[sched]>;
11700   def rm : AVX512BI<opc, MRMSrcMem,
11701            (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11702            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11703            [(set _dst.RC:$dst,(_dst.VT
11704                               (OpNode (_src.VT _src.RC:$src1),
11705                               (_src.VT (bitconvert
11706                                         (_src.LdFrag addr:$src2))))))]>,
11707            Sched<[sched.Folded, sched.ReadAfterFold]>;
11710 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11711                                     string OpcodeStr, X86SchedWriteWidths sched,
11712                                     Predicate prd> {
11713   let Predicates = [prd] in
11714     defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11715                                   v8i64_info, v64i8_info>, EVEX_V512;
11716   let Predicates = [prd, HasVLX] in {
11717     defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11718                                      v4i64x_info, v32i8x_info>, EVEX_V256;
11719     defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11720                                      v2i64x_info, v16i8x_info>, EVEX_V128;
11721   }
11724 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11725                                         SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11727 // Transforms to swizzle an immediate to enable better matching when
11728 // memory operand isn't in the right place.
11729 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11730   // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11731   uint8_t Imm = N->getZExtValue();
11732   // Swap bits 1/4 and 3/6.
11733   uint8_t NewImm = Imm & 0xa5;
11734   if (Imm & 0x02) NewImm |= 0x10;
11735   if (Imm & 0x10) NewImm |= 0x02;
11736   if (Imm & 0x08) NewImm |= 0x40;
11737   if (Imm & 0x40) NewImm |= 0x08;
11738   return getI8Imm(NewImm, SDLoc(N));
11739 }]>;
11740 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11741   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11742   uint8_t Imm = N->getZExtValue();
11743   // Swap bits 2/4 and 3/5.
11744   uint8_t NewImm = Imm & 0xc3;
11745   if (Imm & 0x04) NewImm |= 0x10;
11746   if (Imm & 0x10) NewImm |= 0x04;
11747   if (Imm & 0x08) NewImm |= 0x20;
11748   if (Imm & 0x20) NewImm |= 0x08;
11749   return getI8Imm(NewImm, SDLoc(N));
11750 }]>;
11751 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11752   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11753   uint8_t Imm = N->getZExtValue();
11754   // Swap bits 1/2 and 5/6.
11755   uint8_t NewImm = Imm & 0x99;
11756   if (Imm & 0x02) NewImm |= 0x04;
11757   if (Imm & 0x04) NewImm |= 0x02;
11758   if (Imm & 0x20) NewImm |= 0x40;
11759   if (Imm & 0x40) NewImm |= 0x20;
11760   return getI8Imm(NewImm, SDLoc(N));
11761 }]>;
11762 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11763   // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11764   uint8_t Imm = N->getZExtValue();
11765   // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11766   uint8_t NewImm = Imm & 0x81;
11767   if (Imm & 0x02) NewImm |= 0x04;
11768   if (Imm & 0x04) NewImm |= 0x10;
11769   if (Imm & 0x08) NewImm |= 0x40;
11770   if (Imm & 0x10) NewImm |= 0x02;
11771   if (Imm & 0x20) NewImm |= 0x08;
11772   if (Imm & 0x40) NewImm |= 0x20;
11773   return getI8Imm(NewImm, SDLoc(N));
11774 }]>;
11775 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11776   // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11777   uint8_t Imm = N->getZExtValue();
11778   // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11779   uint8_t NewImm = Imm & 0x81;
11780   if (Imm & 0x02) NewImm |= 0x10;
11781   if (Imm & 0x04) NewImm |= 0x02;
11782   if (Imm & 0x08) NewImm |= 0x20;
11783   if (Imm & 0x10) NewImm |= 0x04;
11784   if (Imm & 0x20) NewImm |= 0x40;
11785   if (Imm & 0x40) NewImm |= 0x08;
11786   return getI8Imm(NewImm, SDLoc(N));
11787 }]>;
11789 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11790                           X86FoldableSchedWrite sched, X86VectorVTInfo _,
11791                           string Name>{
11792   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11793   defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11794                       (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11795                       OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11796                       (OpNode (_.VT _.RC:$src1),
11797                               (_.VT _.RC:$src2),
11798                               (_.VT _.RC:$src3),
11799                               (i8 timm:$src4)), 1, 1>,
11800                       AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11801   defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11802                     (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11803                     OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11804                     (OpNode (_.VT _.RC:$src1),
11805                             (_.VT _.RC:$src2),
11806                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
11807                             (i8 timm:$src4)), 1, 0>,
11808                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11809                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11810   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11811                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11812                     OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11813                     "$src2, ${src3}"#_.BroadcastStr#", $src4",
11814                     (OpNode (_.VT _.RC:$src1),
11815                             (_.VT _.RC:$src2),
11816                             (_.VT (_.BroadcastLdFrag addr:$src3)),
11817                             (i8 timm:$src4)), 1, 0>, EVEX_B,
11818                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11819                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11820   }// Constraints = "$src1 = $dst"
11822   // Additional patterns for matching passthru operand in other positions.
11823   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11824                    (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11825                    _.RC:$src1)),
11826             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11827              _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11828   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11829                    (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11830                    _.RC:$src1)),
11831             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11832              _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11834   // Additional patterns for matching zero masking with loads in other
11835   // positions.
11836   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11837                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11838                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11839                    _.ImmAllZerosV)),
11840             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11841              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11842   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11843                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11844                     _.RC:$src2, (i8 timm:$src4)),
11845                    _.ImmAllZerosV)),
11846             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11847              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11849   // Additional patterns for matching masked loads with different
11850   // operand orders.
11851   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11852                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11853                     _.RC:$src2, (i8 timm:$src4)),
11854                    _.RC:$src1)),
11855             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11856              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11857   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11858                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11859                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11860                    _.RC:$src1)),
11861             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11862              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11863   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11864                    (OpNode _.RC:$src2, _.RC:$src1,
11865                     (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11866                    _.RC:$src1)),
11867             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11868              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11869   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11870                    (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11871                     _.RC:$src1, (i8 timm:$src4)),
11872                    _.RC:$src1)),
11873             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11874              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11875   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11876                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11877                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11878                    _.RC:$src1)),
11879             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11880              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11882   // Additional patterns for matching zero masking with broadcasts in other
11883   // positions.
11884   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11885                    (OpNode (_.BroadcastLdFrag addr:$src3),
11886                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11887                    _.ImmAllZerosV)),
11888             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11889              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11890              (VPTERNLOG321_imm8 timm:$src4))>;
11891   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11892                    (OpNode _.RC:$src1,
11893                     (_.BroadcastLdFrag addr:$src3),
11894                     _.RC:$src2, (i8 timm:$src4)),
11895                    _.ImmAllZerosV)),
11896             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11897              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11898              (VPTERNLOG132_imm8 timm:$src4))>;
11900   // Additional patterns for matching masked broadcasts with different
11901   // operand orders.
11902   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11903                    (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11904                     _.RC:$src2, (i8 timm:$src4)),
11905                    _.RC:$src1)),
11906             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11907              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11908   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11909                    (OpNode (_.BroadcastLdFrag addr:$src3),
11910                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11911                    _.RC:$src1)),
11912             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11913              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11914   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11915                    (OpNode _.RC:$src2, _.RC:$src1,
11916                     (_.BroadcastLdFrag addr:$src3),
11917                     (i8 timm:$src4)), _.RC:$src1)),
11918             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11919              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11920   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11921                    (OpNode _.RC:$src2,
11922                     (_.BroadcastLdFrag addr:$src3),
11923                     _.RC:$src1, (i8 timm:$src4)),
11924                    _.RC:$src1)),
11925             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11926              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11927   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11928                    (OpNode (_.BroadcastLdFrag addr:$src3),
11929                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11930                    _.RC:$src1)),
11931             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11932              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11935 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11936                                  AVX512VLVectorVTInfo _> {
11937   let Predicates = [HasAVX512] in
11938     defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11939                                _.info512, NAME>, EVEX_V512;
11940   let Predicates = [HasAVX512, HasVLX] in {
11941     defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11942                                _.info128, NAME>, EVEX_V128;
11943     defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11944                                _.info256, NAME>, EVEX_V256;
11945   }
11948 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11949                                         avx512vl_i32_info>;
11950 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11951                                         avx512vl_i64_info>, VEX_W;
11953 // Patterns to implement vnot using vpternlog instead of creating all ones
11954 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11955 // so that the result is only dependent on src0. But we use the same source
11956 // for all operands to prevent a false dependency.
11957 // TODO: We should maybe have a more generalized algorithm for folding to
11958 // vpternlog.
11959 let Predicates = [HasAVX512] in {
11960   def : Pat<(v64i8 (vnot VR512:$src)),
11961             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11962   def : Pat<(v32i16 (vnot VR512:$src)),
11963             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11964   def : Pat<(v16i32 (vnot VR512:$src)),
11965             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11966   def : Pat<(v8i64 (vnot VR512:$src)),
11967             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11970 let Predicates = [HasAVX512, NoVLX] in {
11971   def : Pat<(v16i8 (vnot VR128X:$src)),
11972             (EXTRACT_SUBREG
11973              (VPTERNLOGQZrri
11974               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11975               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11976               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11977               (i8 15)), sub_xmm)>;
11978   def : Pat<(v8i16 (vnot VR128X:$src)),
11979             (EXTRACT_SUBREG
11980              (VPTERNLOGQZrri
11981               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11982               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11983               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11984               (i8 15)), sub_xmm)>;
11985   def : Pat<(v4i32 (vnot VR128X:$src)),
11986             (EXTRACT_SUBREG
11987              (VPTERNLOGQZrri
11988               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11989               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11990               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11991               (i8 15)), sub_xmm)>;
11992   def : Pat<(v2i64 (vnot VR128X:$src)),
11993             (EXTRACT_SUBREG
11994              (VPTERNLOGQZrri
11995               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11996               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11997               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11998               (i8 15)), sub_xmm)>;
12000   def : Pat<(v32i8 (vnot VR256X:$src)),
12001             (EXTRACT_SUBREG
12002              (VPTERNLOGQZrri
12003               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12004               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12005               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12006               (i8 15)), sub_ymm)>;
12007   def : Pat<(v16i16 (vnot VR256X:$src)),
12008             (EXTRACT_SUBREG
12009              (VPTERNLOGQZrri
12010               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12011               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12012               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12013               (i8 15)), sub_ymm)>;
12014   def : Pat<(v8i32 (vnot VR256X:$src)),
12015             (EXTRACT_SUBREG
12016              (VPTERNLOGQZrri
12017               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12018               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12019               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12020               (i8 15)), sub_ymm)>;
12021   def : Pat<(v4i64 (vnot VR256X:$src)),
12022             (EXTRACT_SUBREG
12023              (VPTERNLOGQZrri
12024               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12025               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12026               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12027               (i8 15)), sub_ymm)>;
12030 let Predicates = [HasVLX] in {
12031   def : Pat<(v16i8 (vnot VR128X:$src)),
12032             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12033   def : Pat<(v8i16 (vnot VR128X:$src)),
12034             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12035   def : Pat<(v4i32 (vnot VR128X:$src)),
12036             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12037   def : Pat<(v2i64 (vnot VR128X:$src)),
12038             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12040   def : Pat<(v32i8 (vnot VR256X:$src)),
12041             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12042   def : Pat<(v16i16 (vnot VR256X:$src)),
12043             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12044   def : Pat<(v8i32 (vnot VR256X:$src)),
12045             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12046   def : Pat<(v4i64 (vnot VR256X:$src)),
12047             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12050 //===----------------------------------------------------------------------===//
12051 // AVX-512 - FixupImm
12052 //===----------------------------------------------------------------------===//
12054 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
12055                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
12056                                   X86VectorVTInfo TblVT>{
12057   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
12058       Uses = [MXCSR], mayRaiseFPException = 1 in {
12059     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12060                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12061                          OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12062                         (X86VFixupimm (_.VT _.RC:$src1),
12063                                       (_.VT _.RC:$src2),
12064                                       (TblVT.VT _.RC:$src3),
12065                                       (i32 timm:$src4))>, Sched<[sched]>;
12066     defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12067                       (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
12068                       OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12069                       (X86VFixupimm (_.VT _.RC:$src1),
12070                                     (_.VT _.RC:$src2),
12071                                     (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12072                                     (i32 timm:$src4))>,
12073                       Sched<[sched.Folded, sched.ReadAfterFold]>;
12074     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12075                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12076                     OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
12077                     "$src2, ${src3}"#_.BroadcastStr#", $src4",
12078                       (X86VFixupimm (_.VT _.RC:$src1),
12079                                     (_.VT _.RC:$src2),
12080                                     (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
12081                                     (i32 timm:$src4))>,
12082                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12083   } // Constraints = "$src1 = $dst"
12086 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12087                                       X86FoldableSchedWrite sched,
12088                                       X86VectorVTInfo _, X86VectorVTInfo TblVT>
12089   : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
12090 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
12091   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12092                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12093                       OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12094                       "$src2, $src3, {sae}, $src4",
12095                       (X86VFixupimmSAE (_.VT _.RC:$src1),
12096                                        (_.VT _.RC:$src2),
12097                                        (TblVT.VT _.RC:$src3),
12098                                        (i32 timm:$src4))>,
12099                       EVEX_B, Sched<[sched]>;
12100   }
12103 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
12104                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
12105                                   X86VectorVTInfo _src3VT> {
12106   let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12107       ExeDomain = _.ExeDomain in {
12108     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12109                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12110                       OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12111                       (X86VFixupimms (_.VT _.RC:$src1),
12112                                      (_.VT _.RC:$src2),
12113                                      (_src3VT.VT _src3VT.RC:$src3),
12114                                      (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
12115     let Uses = [MXCSR] in
12116     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12117                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12118                       OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12119                       "$src2, $src3, {sae}, $src4",
12120                       (X86VFixupimmSAEs (_.VT _.RC:$src1),
12121                                         (_.VT _.RC:$src2),
12122                                         (_src3VT.VT _src3VT.RC:$src3),
12123                                         (i32 timm:$src4))>,
12124                       EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12125     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12126                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12127                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12128                      (X86VFixupimms (_.VT _.RC:$src1),
12129                                     (_.VT _.RC:$src2),
12130                                     (_src3VT.VT (scalar_to_vector
12131                                               (_src3VT.ScalarLdFrag addr:$src3))),
12132                                     (i32 timm:$src4))>,
12133                      Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
12134   }
12137 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12138                                       AVX512VLVectorVTInfo _Vec,
12139                                       AVX512VLVectorVTInfo _Tbl> {
12140   let Predicates = [HasAVX512] in
12141     defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
12142                                 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12143                                 EVEX_4V, EVEX_V512;
12144   let Predicates = [HasAVX512, HasVLX] in {
12145     defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
12146                             _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12147                             EVEX_4V, EVEX_V128;
12148     defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
12149                             _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12150                             EVEX_4V, EVEX_V256;
12151   }
12154 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12155                                            SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12156                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
12157 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12158                                            SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12159                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
12160 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12161                          avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12162 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12163                          avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
12165 // Patterns used to select SSE scalar fp arithmetic instructions from
12166 // either:
12168 // (1) a scalar fp operation followed by a blend
12170 // The effect is that the backend no longer emits unnecessary vector
12171 // insert instructions immediately after SSE scalar fp instructions
12172 // like addss or mulss.
12174 // For example, given the following code:
12175 //   __m128 foo(__m128 A, __m128 B) {
12176 //     A[0] += B[0];
12177 //     return A;
12178 //   }
12180 // Previously we generated:
12181 //   addss %xmm0, %xmm1
12182 //   movss %xmm1, %xmm0
12184 // We now generate:
12185 //   addss %xmm1, %xmm0
12187 // (2) a vector packed single/double fp operation followed by a vector insert
12189 // The effect is that the backend converts the packed fp instruction
12190 // followed by a vector insert into a single SSE scalar fp instruction.
12192 // For example, given the following code:
12193 //   __m128 foo(__m128 A, __m128 B) {
12194 //     __m128 C = A + B;
12195 //     return (__m128) {c[0], a[1], a[2], a[3]};
12196 //   }
12198 // Previously we generated:
12199 //   addps %xmm0, %xmm1
12200 //   movss %xmm1, %xmm0
12202 // We now generate:
12203 //   addss %xmm1, %xmm0
12205 // TODO: Some canonicalization in lowering would simplify the number of
12206 // patterns we have to try to match.
12207 multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12208                                           string OpcPrefix, SDNode MoveNode,
12209                                           X86VectorVTInfo _, PatLeaf ZeroFP> {
12210   let Predicates = [HasAVX512] in {
12211     // extracted scalar math op with insert via movss
12212     def : Pat<(MoveNode
12213                (_.VT VR128X:$dst),
12214                (_.VT (scalar_to_vector
12215                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12216                           _.FRC:$src)))),
12217               (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12218                (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12219     def : Pat<(MoveNode
12220                (_.VT VR128X:$dst),
12221                (_.VT (scalar_to_vector
12222                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12223                           (_.ScalarLdFrag addr:$src))))),
12224               (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12226     // extracted masked scalar math op with insert via movss
12227     def : Pat<(MoveNode (_.VT VR128X:$src1),
12228                (scalar_to_vector
12229                 (X86selects_mask VK1WM:$mask,
12230                             (MaskedOp (_.EltVT
12231                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12232                                       _.FRC:$src2),
12233                             _.FRC:$src0))),
12234               (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12235                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12236                VK1WM:$mask, _.VT:$src1,
12237                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12238     def : Pat<(MoveNode (_.VT VR128X:$src1),
12239                (scalar_to_vector
12240                 (X86selects_mask VK1WM:$mask,
12241                             (MaskedOp (_.EltVT
12242                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12243                                       (_.ScalarLdFrag addr:$src2)),
12244                             _.FRC:$src0))),
12245               (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12246                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12247                VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12249     // extracted masked scalar math op with insert via movss
12250     def : Pat<(MoveNode (_.VT VR128X:$src1),
12251                (scalar_to_vector
12252                 (X86selects_mask VK1WM:$mask,
12253                             (MaskedOp (_.EltVT
12254                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12255                                       _.FRC:$src2), (_.EltVT ZeroFP)))),
12256       (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12257           VK1WM:$mask, _.VT:$src1,
12258           (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12259     def : Pat<(MoveNode (_.VT VR128X:$src1),
12260                (scalar_to_vector
12261                 (X86selects_mask VK1WM:$mask,
12262                             (MaskedOp (_.EltVT
12263                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12264                                       (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12265       (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12266   }
12269 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12270 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12271 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12272 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12274 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12275 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12276 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12277 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12279 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12280 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12281 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12282 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12284 multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12285                                              SDNode Move, X86VectorVTInfo _> {
12286   let Predicates = [HasAVX512] in {
12287     def : Pat<(_.VT (Move _.VT:$dst,
12288                      (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12289               (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12290   }
12293 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12294 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12295 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12297 //===----------------------------------------------------------------------===//
12298 // AES instructions
12299 //===----------------------------------------------------------------------===//
12301 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12302   let Predicates = [HasVLX, HasVAES] in {
12303     defm Z128 : AESI_binop_rm_int<Op, OpStr,
12304                                   !cast<Intrinsic>(IntPrefix),
12305                                   loadv2i64, 0, VR128X, i128mem>,
12306                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
12307     defm Z256 : AESI_binop_rm_int<Op, OpStr,
12308                                   !cast<Intrinsic>(IntPrefix#"_256"),
12309                                   loadv4i64, 0, VR256X, i256mem>,
12310                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
12311     }
12312     let Predicates = [HasAVX512, HasVAES] in
12313     defm Z    : AESI_binop_rm_int<Op, OpStr,
12314                                   !cast<Intrinsic>(IntPrefix#"_512"),
12315                                   loadv8i64, 0, VR512, i512mem>,
12316                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
12319 defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12320 defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12321 defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12322 defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12324 //===----------------------------------------------------------------------===//
12325 // PCLMUL instructions - Carry less multiplication
12326 //===----------------------------------------------------------------------===//
12328 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12329 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12330                               EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
12332 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12333 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12334                               EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
12336 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12337                                 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
12338                                 EVEX_CD8<64, CD8VF>, VEX_WIG;
12341 // Aliases
12342 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12343 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12344 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12346 //===----------------------------------------------------------------------===//
12347 // VBMI2
12348 //===----------------------------------------------------------------------===//
12350 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12351                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12352   let Constraints = "$src1 = $dst",
12353       ExeDomain   = VTI.ExeDomain in {
12354     defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12355                 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12356                 "$src3, $src2", "$src2, $src3",
12357                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12358                 AVX512FMA3Base, Sched<[sched]>;
12359     defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12360                 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12361                 "$src3, $src2", "$src2, $src3",
12362                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12363                         (VTI.VT (VTI.LdFrag addr:$src3))))>,
12364                 AVX512FMA3Base,
12365                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12366   }
12369 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12370                                X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12371          : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12372   let Constraints = "$src1 = $dst",
12373       ExeDomain   = VTI.ExeDomain in
12374   defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12375               (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12376               "${src3}"#VTI.BroadcastStr#", $src2",
12377               "$src2, ${src3}"#VTI.BroadcastStr,
12378               (OpNode VTI.RC:$src1, VTI.RC:$src2,
12379                (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12380               AVX512FMA3Base, EVEX_B,
12381               Sched<[sched.Folded, sched.ReadAfterFold]>;
12384 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12385                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12386   let Predicates = [HasVBMI2] in
12387   defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12388                                    EVEX_V512;
12389   let Predicates = [HasVBMI2, HasVLX] in {
12390     defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12391                                    EVEX_V256;
12392     defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12393                                    EVEX_V128;
12394   }
12397 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12398                                       X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12399   let Predicates = [HasVBMI2] in
12400   defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12401                                     EVEX_V512;
12402   let Predicates = [HasVBMI2, HasVLX] in {
12403     defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12404                                     EVEX_V256;
12405     defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12406                                     EVEX_V128;
12407   }
12409 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12410                            SDNode OpNode, X86SchedWriteWidths sched> {
12411   defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12412              avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
12413   defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12414              avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12415   defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12416              avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
12419 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12420                            SDNode OpNode, X86SchedWriteWidths sched> {
12421   defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12422              avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12423              VEX_W, EVEX_CD8<16, CD8VF>;
12424   defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12425              OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
12426   defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12427              sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
12430 // Concat & Shift
12431 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12432 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12433 defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12434 defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12436 // Compress
12437 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12438                                          avx512vl_i8_info, HasVBMI2>, EVEX,
12439                                          NotMemoryFoldable;
12440 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12441                                           avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
12442                                           NotMemoryFoldable;
12443 // Expand
12444 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12445                                       avx512vl_i8_info, HasVBMI2>, EVEX;
12446 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12447                                       avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
12449 //===----------------------------------------------------------------------===//
12450 // VNNI
12451 //===----------------------------------------------------------------------===//
12453 let Constraints = "$src1 = $dst" in
12454 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12455                     X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12456                     bit IsCommutable> {
12457   let ExeDomain = VTI.ExeDomain in {
12458   defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12459                                    (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12460                                    "$src3, $src2", "$src2, $src3",
12461                                    (VTI.VT (OpNode VTI.RC:$src1,
12462                                             VTI.RC:$src2, VTI.RC:$src3)),
12463                                    IsCommutable, IsCommutable>,
12464                                    EVEX_4V, T8PD, Sched<[sched]>;
12465   defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12466                                    (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12467                                    "$src3, $src2", "$src2, $src3",
12468                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12469                                             (VTI.VT (VTI.LdFrag addr:$src3))))>,
12470                                    EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12471                                    Sched<[sched.Folded, sched.ReadAfterFold]>;
12472   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12473                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12474                                    OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12475                                    "$src2, ${src3}"#VTI.BroadcastStr,
12476                                    (OpNode VTI.RC:$src1, VTI.RC:$src2,
12477                                     (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12478                                    EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12479                                    T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12480   }
12483 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12484                        X86SchedWriteWidths sched, bit IsCommutable> {
12485   let Predicates = [HasVNNI] in
12486   defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12487                            IsCommutable>, EVEX_V512;
12488   let Predicates = [HasVNNI, HasVLX] in {
12489     defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12490                            IsCommutable>, EVEX_V256;
12491     defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12492                            IsCommutable>, EVEX_V128;
12493   }
12496 // FIXME: Is there a better scheduler class for VPDP?
12497 defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12498 defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12499 defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12500 defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12502 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
12503 let Predicates = [HasVNNI] in {
12504   def : Pat<(v16i32 (add VR512:$src1,
12505                          (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12506             (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12507   def : Pat<(v16i32 (add VR512:$src1,
12508                          (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12509             (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12511 let Predicates = [HasVNNI,HasVLX] in {
12512   def : Pat<(v8i32 (add VR256X:$src1,
12513                         (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12514             (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12515   def : Pat<(v8i32 (add VR256X:$src1,
12516                         (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12517             (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12518   def : Pat<(v4i32 (add VR128X:$src1,
12519                         (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12520             (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12521   def : Pat<(v4i32 (add VR128X:$src1,
12522                         (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12523             (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12526 //===----------------------------------------------------------------------===//
12527 // Bit Algorithms
12528 //===----------------------------------------------------------------------===//
12530 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12531 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12532                                    avx512vl_i8_info, HasBITALG>;
12533 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12534                                    avx512vl_i16_info, HasBITALG>, VEX_W;
12536 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12537 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12539 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12540                                  (X86Vpshufbitqmb node:$src1, node:$src2), [{
12541   return N->hasOneUse();
12542 }]>;
12544 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12545   defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12546                                 (ins VTI.RC:$src1, VTI.RC:$src2),
12547                                 "vpshufbitqmb",
12548                                 "$src2, $src1", "$src1, $src2",
12549                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12550                                 (VTI.VT VTI.RC:$src2)),
12551                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12552                                 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12553                                 Sched<[sched]>;
12554   defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12555                                 (ins VTI.RC:$src1, VTI.MemOp:$src2),
12556                                 "vpshufbitqmb",
12557                                 "$src2, $src1", "$src1, $src2",
12558                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12559                                 (VTI.VT (VTI.LdFrag addr:$src2))),
12560                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12561                                 (VTI.VT (VTI.LdFrag addr:$src2)))>,
12562                                 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12563                                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12566 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12567   let Predicates = [HasBITALG] in
12568   defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12569   let Predicates = [HasBITALG, HasVLX] in {
12570     defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12571     defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12572   }
12575 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12576 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12578 //===----------------------------------------------------------------------===//
12579 // GFNI
12580 //===----------------------------------------------------------------------===//
12582 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12583                                    X86SchedWriteWidths sched> {
12584   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12585   defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12586                                 EVEX_V512;
12587   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12588     defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12589                                 EVEX_V256;
12590     defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12591                                 EVEX_V128;
12592   }
12595 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12596                                           SchedWriteVecALU>,
12597                                           EVEX_CD8<8, CD8VF>, T8PD;
12599 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12600                                       X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12601                                       X86VectorVTInfo BcstVTI>
12602            : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12603   let ExeDomain = VTI.ExeDomain in
12604   defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12605                 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12606                 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12607                 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12608                 (OpNode (VTI.VT VTI.RC:$src1),
12609                  (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12610                  (i8 timm:$src3))>, EVEX_B,
12611                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12614 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12615                                      X86SchedWriteWidths sched> {
12616   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12617   defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12618                                            v64i8_info, v8i64_info>, EVEX_V512;
12619   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12620     defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12621                                            v32i8x_info, v4i64x_info>, EVEX_V256;
12622     defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12623                                            v16i8x_info, v2i64x_info>, EVEX_V128;
12624   }
12627 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12628                          X86GF2P8affineinvqb, SchedWriteVecIMul>,
12629                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12630 defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12631                          X86GF2P8affineqb, SchedWriteVecIMul>,
12632                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12635 //===----------------------------------------------------------------------===//
12636 // AVX5124FMAPS
12637 //===----------------------------------------------------------------------===//
12639 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12640     Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12641 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12642                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12643                     "v4fmaddps", "$src3, $src2", "$src2, $src3",
12644                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12645                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12647 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12648                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12649                      "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12650                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12651                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12653 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12654                     (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12655                     "v4fmaddss", "$src3, $src2", "$src2, $src3",
12656                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12657                     Sched<[SchedWriteFMA.Scl.Folded]>;
12659 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12660                      (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12661                      "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12662                      []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12663                      Sched<[SchedWriteFMA.Scl.Folded]>;
12666 //===----------------------------------------------------------------------===//
12667 // AVX5124VNNIW
12668 //===----------------------------------------------------------------------===//
12670 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12671     Constraints = "$src1 = $dst" in {
12672 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12673                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12674                      "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12675                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12676                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12678 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12679                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12680                      "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12681                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12682                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12685 let hasSideEffects = 0 in {
12686   let mayStore = 1, SchedRW = [WriteFStoreX] in
12687   def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12688   let mayLoad = 1, SchedRW = [WriteFLoadX] in
12689   def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12692 //===----------------------------------------------------------------------===//
12693 // VP2INTERSECT
12694 //===----------------------------------------------------------------------===//
12696 multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12697   def rr : I<0x68, MRMSrcReg,
12698                   (outs _.KRPC:$dst),
12699                   (ins _.RC:$src1, _.RC:$src2),
12700                   !strconcat("vp2intersect", _.Suffix,
12701                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12702                   [(set _.KRPC:$dst, (X86vp2intersect
12703                             _.RC:$src1, (_.VT _.RC:$src2)))]>,
12704                   EVEX_4V, T8XD, Sched<[sched]>;
12706   def rm : I<0x68, MRMSrcMem,
12707                   (outs _.KRPC:$dst),
12708                   (ins  _.RC:$src1, _.MemOp:$src2),
12709                   !strconcat("vp2intersect", _.Suffix,
12710                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12711                   [(set _.KRPC:$dst, (X86vp2intersect
12712                             _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12713                   EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12714                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12716   def rmb : I<0x68, MRMSrcMem,
12717                   (outs _.KRPC:$dst),
12718                   (ins _.RC:$src1, _.ScalarMemOp:$src2),
12719                   !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12720                              ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12721                   [(set _.KRPC:$dst, (X86vp2intersect
12722                              _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12723                   EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12724                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12727 multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12728   let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12729     defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12731   let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12732     defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12733     defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12734   }
12737 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12738 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12740 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12741                              X86SchedWriteWidths sched,
12742                              AVX512VLVectorVTInfo _SrcVTInfo,
12743                              AVX512VLVectorVTInfo _DstVTInfo,
12744                              SDNode OpNode, Predicate prd,
12745                              bit IsCommutable = 0> {
12746   let Predicates = [prd] in
12747     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12748                                    _SrcVTInfo.info512, _DstVTInfo.info512,
12749                                    _SrcVTInfo.info512, IsCommutable>,
12750                                    EVEX_V512, EVEX_CD8<32, CD8VF>;
12751   let Predicates = [HasVLX, prd] in {
12752     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12753                                       _SrcVTInfo.info256, _DstVTInfo.info256,
12754                                       _SrcVTInfo.info256, IsCommutable>,
12755                                      EVEX_V256, EVEX_CD8<32, CD8VF>;
12756     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12757                                       _SrcVTInfo.info128, _DstVTInfo.info128,
12758                                       _SrcVTInfo.info128, IsCommutable>,
12759                                       EVEX_V128, EVEX_CD8<32, CD8VF>;
12760   }
12763 let ExeDomain = SSEPackedSingle in
12764 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12765                                         SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12766                                         avx512vl_f32_info, avx512vl_i16_info,
12767                                         X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12769 // Truncate Float to BFloat16
12770 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12771                              X86SchedWriteWidths sched> {
12772   let ExeDomain = SSEPackedSingle in {
12773   let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12774     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12775                             X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12776   }
12777   let Predicates = [HasBF16, HasVLX] in {
12778     let Uses = []<Register>, mayRaiseFPException = 0 in {
12779     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12780                                null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12781                                VK4WM>, EVEX_V128;
12782     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12783                                X86cvtneps2bf16, X86cvtneps2bf16,
12784                                sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12785     }
12786   } // Predicates = [HasBF16, HasVLX]
12787   } // ExeDomain = SSEPackedSingle
12789   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12790                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12791                   VR128X:$src), 0>;
12792   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12793                   (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12794                   f128mem:$src), 0, "intel">;
12795   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12796                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12797                   VR256X:$src), 0>;
12798   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12799                   (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12800                   f256mem:$src), 0, "intel">;
12803 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12804                                        SchedWriteCvtPD2PS>, T8XS,
12805                                        EVEX_CD8<32, CD8VF>;
12807 let Predicates = [HasBF16, HasVLX] in {
12808   // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12809   // patterns have been disabled with null_frag.
12810   def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12811             (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12812   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12813                               VK4WM:$mask),
12814             (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12815   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12816                               VK4WM:$mask),
12817             (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12819   def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12820             (VCVTNEPS2BF16Z128rm addr:$src)>;
12821   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12822                               VK4WM:$mask),
12823             (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12824   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12825                               VK4WM:$mask),
12826             (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12828   def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12829                                      (X86VBroadcastld32 addr:$src)))),
12830             (VCVTNEPS2BF16Z128rmb addr:$src)>;
12831   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12832                               (v8i16 VR128X:$src0), VK4WM:$mask),
12833             (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12834   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12835                               v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12836             (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12839 let Constraints = "$src1 = $dst" in {
12840 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12841                               X86FoldableSchedWrite sched,
12842                               X86VectorVTInfo _, X86VectorVTInfo src_v> {
12843   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12844                            (ins src_v.RC:$src2, src_v.RC:$src3),
12845                            OpcodeStr, "$src3, $src2", "$src2, $src3",
12846                            (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12847                            EVEX_4V, Sched<[sched]>;
12849   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12850                                (ins src_v.RC:$src2, src_v.MemOp:$src3),
12851                                OpcodeStr, "$src3, $src2", "$src2, $src3",
12852                                (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12853                                (src_v.LdFrag addr:$src3)))>, EVEX_4V,
12854                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12856   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12857                   (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3),
12858                   OpcodeStr,
12859                   !strconcat("${src3}", _.BroadcastStr,", $src2"),
12860                   !strconcat("$src2, ${src3}", _.BroadcastStr),
12861                   (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12862                   (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12863                   EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
12866 } // Constraints = "$src1 = $dst"
12868 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12869                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12870                                  AVX512VLVectorVTInfo src_v, Predicate prd> {
12871   let Predicates = [prd] in {
12872     defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12873                                    src_v.info512>, EVEX_V512;
12874   }
12875   let Predicates = [HasVLX, prd] in {
12876     defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12877                                    src_v.info256>, EVEX_V256;
12878     defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12879                                    src_v.info128>, EVEX_V128;
12880   }
12883 let ExeDomain = SSEPackedSingle in
12884 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12885                                        avx512vl_f32_info, avx512vl_i32_info,
12886                                        HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
12888 //===----------------------------------------------------------------------===//
12889 // AVX512FP16
12890 //===----------------------------------------------------------------------===//
12892 let Predicates = [HasFP16] in {
12893 // Move word ( r/m16) to Packed word
12894 def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
12895                       "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
12896 def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
12897                       "vmovw\t{$src, $dst|$dst, $src}",
12898                       [(set VR128X:$dst,
12899                         (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
12900                       T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
12902 def : Pat<(f16 (bitconvert GR16:$src)),
12903           (f16 (COPY_TO_REGCLASS
12904                 (VMOVW2SHrr
12905                  (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
12906                 FR16X))>;
12907 def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
12908           (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12909 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
12910           (VMOVW2SHrr GR32:$src)>;
12911 // FIXME: We should really find a way to improve these patterns.
12912 def : Pat<(v8i32 (X86vzmovl
12913                   (insert_subvector undef,
12914                                     (v4i32 (scalar_to_vector
12915                                             (and GR32:$src, 0xffff))),
12916                                     (iPTR 0)))),
12917           (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12918 def : Pat<(v16i32 (X86vzmovl
12919                    (insert_subvector undef,
12920                                      (v4i32 (scalar_to_vector
12921                                              (and GR32:$src, 0xffff))),
12922                                      (iPTR 0)))),
12923           (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12925 def : Pat<(v8i16 (X86vzmovl (v8i16 (scalar_to_vector (i16 (trunc GR32:$src)))))),
12926           (VMOVW2SHrr GR32:$src)>;
12928 // AVX 128-bit movw instruction write zeros in the high 128-bit part.
12929 def : Pat<(v8i16 (X86vzload16 addr:$src)),
12930           (VMOVWrm addr:$src)>;
12931 def : Pat<(v16i16 (X86vzload16 addr:$src)),
12932           (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12934 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
12935 def : Pat<(v32i16 (X86vzload16 addr:$src)),
12936           (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12938 def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
12939           (VMOVWrm addr:$src)>;
12940 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
12941           (VMOVWrm addr:$src)>;
12942 def : Pat<(v8i32 (X86vzmovl
12943                   (insert_subvector undef,
12944                                     (v4i32 (scalar_to_vector
12945                                             (i32 (zextloadi16 addr:$src)))),
12946                                     (iPTR 0)))),
12947           (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12948 def : Pat<(v16i32 (X86vzmovl
12949                    (insert_subvector undef,
12950                                      (v4i32 (scalar_to_vector
12951                                              (i32 (zextloadi16 addr:$src)))),
12952                                      (iPTR 0)))),
12953           (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12955 // Move word from xmm register to r/m16
12956 def VMOVSH2Wrr  : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
12957                        "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
12958 def VMOVWmr  : AVX512<0x7E, MRMDestMem, (outs),
12959                        (ins i16mem:$dst, VR128X:$src),
12960                        "vmovw\t{$src, $dst|$dst, $src}",
12961                        [(store (i16 (extractelt (v8i16 VR128X:$src),
12962                                      (iPTR 0))), addr:$dst)]>,
12963                        T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
12965 def : Pat<(i16 (bitconvert FR16X:$src)),
12966           (i16 (EXTRACT_SUBREG
12967                 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
12968                 sub_16bit))>;
12969 def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
12970           (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
12973 // Allow "vmovw" to use GR64
12974 let hasSideEffects = 0 in {
12975   def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
12976                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
12977   def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
12978                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
12981 // Convert 16-bit float to i16/u16
12982 multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12983                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12984                           AVX512VLVectorVTInfo _Dst,
12985                           AVX512VLVectorVTInfo _Src,
12986                           X86SchedWriteWidths sched> {
12987   let Predicates = [HasFP16] in {
12988     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12989                             OpNode, MaskOpNode, sched.ZMM>,
12990              avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
12991                                OpNodeRnd, sched.ZMM>, EVEX_V512;
12992   }
12993   let Predicates = [HasFP16, HasVLX] in {
12994     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12995                                OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12996     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12997                                OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12998   }
13001 // Convert 16-bit float to i16/u16 truncate
13002 multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13003                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13004                            AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
13005                            X86SchedWriteWidths sched> {
13006   let Predicates = [HasFP16] in {
13007     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13008                             OpNode, MaskOpNode, sched.ZMM>,
13009              avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
13010                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13011   }
13012   let Predicates = [HasFP16, HasVLX] in {
13013     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13014                                OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13015     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13016                                OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13017   }
13020 defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
13021                                 X86cvtp2UIntRnd, avx512vl_i16_info,
13022                                 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13023                                 T_MAP5PS, EVEX_CD8<16, CD8VF>;
13024 defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
13025                                 X86VUintToFpRnd, avx512vl_f16_info,
13026                                 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13027                                 T_MAP5XD, EVEX_CD8<16, CD8VF>;
13028 defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
13029                                 X86cvttp2si, X86cvttp2siSAE,
13030                                 avx512vl_i16_info, avx512vl_f16_info,
13031                                 SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
13032 defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
13033                                 X86cvttp2ui, X86cvttp2uiSAE,
13034                                 avx512vl_i16_info, avx512vl_f16_info,
13035                                 SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
13036 defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
13037                                 X86cvtp2IntRnd, avx512vl_i16_info,
13038                                 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13039                                 T_MAP5PD, EVEX_CD8<16, CD8VF>;
13040 defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
13041                                 X86VSintToFpRnd, avx512vl_f16_info,
13042                                 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13043                                 T_MAP5XS, EVEX_CD8<16, CD8VF>;
13045 // Convert Half to Signed/Unsigned Doubleword
13046 multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13047                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13048                            X86SchedWriteWidths sched> {
13049   let Predicates = [HasFP16] in {
13050     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13051                             MaskOpNode, sched.ZMM>,
13052              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
13053                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
13054   }
13055   let Predicates = [HasFP16, HasVLX] in {
13056     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13057                                MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13058     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13059                                MaskOpNode, sched.YMM>, EVEX_V256;
13060   }
13063 // Convert Half to Signed/Unsigned Doubleword with truncation
13064 multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13065                             SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13066                             X86SchedWriteWidths sched> {
13067   let Predicates = [HasFP16] in {
13068     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13069                             MaskOpNode, sched.ZMM>,
13070              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
13071                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
13072   }
13073   let Predicates = [HasFP16, HasVLX] in {
13074     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13075                                MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13076     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13077                                MaskOpNode, sched.YMM>, EVEX_V256;
13078   }
13082 defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
13083                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13084                                  EVEX_CD8<16, CD8VH>;
13085 defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
13086                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
13087                                  EVEX_CD8<16, CD8VH>;
13089 defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
13090                                 X86cvttp2si, X86cvttp2siSAE,
13091                                 SchedWriteCvtPS2DQ>, T_MAP5XS,
13092                                 EVEX_CD8<16, CD8VH>;
13094 defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
13095                                  X86cvttp2ui, X86cvttp2uiSAE,
13096                                  SchedWriteCvtPS2DQ>, T_MAP5PS,
13097                                  EVEX_CD8<16, CD8VH>;
13099 // Convert Half to Signed/Unsigned Quardword
13100 multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13101                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13102                            X86SchedWriteWidths sched> {
13103   let Predicates = [HasFP16] in {
13104     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13105                             MaskOpNode, sched.ZMM>,
13106              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
13107                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13108   }
13109   let Predicates = [HasFP16, HasVLX] in {
13110     // Explicitly specified broadcast string, since we take only 2 elements
13111     // from v8f16x_info source
13112     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13113                                MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
13114                                EVEX_V128;
13115     // Explicitly specified broadcast string, since we take only 4 elements
13116     // from v8f16x_info source
13117     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13118                                MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
13119                                EVEX_V256;
13120   }
13123 // Convert Half to Signed/Unsigned Quardword with truncation
13124 multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13125                             SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13126                             X86SchedWriteWidths sched> {
13127   let Predicates = [HasFP16] in {
13128     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13129                             MaskOpNode, sched.ZMM>,
13130              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13131                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
13132   }
13133   let Predicates = [HasFP16, HasVLX] in {
13134     // Explicitly specified broadcast string, since we take only 2 elements
13135     // from v8f16x_info source
13136     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13137                                MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13138     // Explicitly specified broadcast string, since we take only 4 elements
13139     // from v8f16x_info source
13140     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13141                                MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13142   }
13145 defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13146                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13147                                  EVEX_CD8<16, CD8VQ>;
13149 defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13150                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13151                                  EVEX_CD8<16, CD8VQ>;
13153 defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13154                                  X86cvttp2si, X86cvttp2siSAE,
13155                                  SchedWriteCvtPS2DQ>, T_MAP5PD,
13156                                  EVEX_CD8<16, CD8VQ>;
13158 defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13159                                  X86cvttp2ui, X86cvttp2uiSAE,
13160                                  SchedWriteCvtPS2DQ>, T_MAP5PD,
13161                                  EVEX_CD8<16, CD8VQ>;
13163 // Convert Signed/Unsigned Quardword to Half
13164 multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13165                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13166                            X86SchedWriteWidths sched> {
13167   // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13168   // 512 memory forms of these instructions in Asm Parcer. They have the same
13169   // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13170   // due to the same reason.
13171   let Predicates = [HasFP16] in {
13172     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13173                             MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13174              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13175                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13176   }
13177   let Predicates = [HasFP16, HasVLX] in {
13178     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13179                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13180                                i128mem, VK2WM>,
13181                                EVEX_V128, NotEVEX2VEXConvertible;
13182     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13183                                null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13184                                i256mem, VK4WM>,
13185                                EVEX_V256, NotEVEX2VEXConvertible;
13186   }
13188   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13189                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13190                   VR128X:$src), 0, "att">;
13191   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13192                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13193                   VK2WM:$mask, VR128X:$src), 0, "att">;
13194   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13195                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13196                   VK2WM:$mask, VR128X:$src), 0, "att">;
13197   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13198                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13199                   i64mem:$src), 0, "att">;
13200   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13201                   "$dst {${mask}}, ${src}{1to2}}",
13202                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13203                   VK2WM:$mask, i64mem:$src), 0, "att">;
13204   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13205                   "$dst {${mask}} {z}, ${src}{1to2}}",
13206                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13207                   VK2WM:$mask, i64mem:$src), 0, "att">;
13209   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13210                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13211                   VR256X:$src), 0, "att">;
13212   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13213                   "$dst {${mask}}, $src}",
13214                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13215                   VK4WM:$mask, VR256X:$src), 0, "att">;
13216   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13217                   "$dst {${mask}} {z}, $src}",
13218                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13219                   VK4WM:$mask, VR256X:$src), 0, "att">;
13220   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13221                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13222                   i64mem:$src), 0, "att">;
13223   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13224                   "$dst {${mask}}, ${src}{1to4}}",
13225                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13226                   VK4WM:$mask, i64mem:$src), 0, "att">;
13227   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13228                   "$dst {${mask}} {z}, ${src}{1to4}}",
13229                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13230                   VK4WM:$mask, i64mem:$src), 0, "att">;
13232   def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13233                   (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13234                   VR512:$src), 0, "att">;
13235   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13236                   "$dst {${mask}}, $src}",
13237                   (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13238                   VK8WM:$mask, VR512:$src), 0, "att">;
13239   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13240                   "$dst {${mask}} {z}, $src}",
13241                   (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13242                   VK8WM:$mask, VR512:$src), 0, "att">;
13243   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13244                   (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13245                   i64mem:$src), 0, "att">;
13246   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13247                   "$dst {${mask}}, ${src}{1to8}}",
13248                   (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13249                   VK8WM:$mask, i64mem:$src), 0, "att">;
13250   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13251                   "$dst {${mask}} {z}, ${src}{1to8}}",
13252                   (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13253                   VK8WM:$mask, i64mem:$src), 0, "att">;
13256 defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13257                             X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS,
13258                             EVEX_CD8<64, CD8VF>;
13260 defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13261                             X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD,
13262                             EVEX_CD8<64, CD8VF>;
13264 // Convert half to signed/unsigned int 32/64
13265 defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13266                                    X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13267                                    T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13268 defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13269                                    X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13270                                    T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13271 defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13272                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13273                                    T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13274 defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13275                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13276                                    T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13278 defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13279                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13280                         "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13281 defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13282                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13283                         "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13284 defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13285                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13286                         "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13287 defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13288                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13289                         "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13291 let Predicates = [HasFP16] in {
13292   defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13293                                    v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13294                                    T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13295   defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13296                                    v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13297                                    T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
13298   defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13299                                     v8f16x_info, i32mem, loadi32,
13300                                     "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13301   defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13302                                     v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13303                                     T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
13304   def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13305               (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13307   def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13308               (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13311   def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13312             (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13313   def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13314             (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13316   def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13317             (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13318   def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13319             (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13321   def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13322             (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13323   def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13324             (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13326   def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13327             (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13328   def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13329             (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13331   // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13332   // which produce unnecessary vmovsh instructions
13333   def : Pat<(v8f16 (X86Movsh
13334                      (v8f16 VR128X:$dst),
13335                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13336             (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13338   def : Pat<(v8f16 (X86Movsh
13339                      (v8f16 VR128X:$dst),
13340                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13341             (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13343   def : Pat<(v8f16 (X86Movsh
13344                      (v8f16 VR128X:$dst),
13345                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13346             (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13348   def : Pat<(v8f16 (X86Movsh
13349                      (v8f16 VR128X:$dst),
13350                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13351             (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13353   def : Pat<(v8f16 (X86Movsh
13354                      (v8f16 VR128X:$dst),
13355                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13356             (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13358   def : Pat<(v8f16 (X86Movsh
13359                      (v8f16 VR128X:$dst),
13360                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13361             (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13363   def : Pat<(v8f16 (X86Movsh
13364                      (v8f16 VR128X:$dst),
13365                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13366             (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13368   def : Pat<(v8f16 (X86Movsh
13369                      (v8f16 VR128X:$dst),
13370                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13371             (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13372 } // Predicates = [HasFP16]
13374 let Predicates = [HasFP16, HasVLX] in {
13375   // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13376   // patterns have been disabled with null_frag.
13377   def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13378             (VCVTQQ2PHZ256rr VR256X:$src)>;
13379   def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13380                            VK4WM:$mask),
13381             (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13382   def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13383                            VK4WM:$mask),
13384             (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13386   def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13387             (VCVTQQ2PHZ256rm addr:$src)>;
13388   def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13389                            VK4WM:$mask),
13390             (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13391   def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13392                            VK4WM:$mask),
13393             (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13395   def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13396             (VCVTQQ2PHZ256rmb addr:$src)>;
13397   def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13398                            (v8f16 VR128X:$src0), VK4WM:$mask),
13399             (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13400   def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13401                            v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13402             (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13404   def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13405             (VCVTQQ2PHZ128rr VR128X:$src)>;
13406   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13407                            VK2WM:$mask),
13408             (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13409   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13410                            VK2WM:$mask),
13411             (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13413   def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13414             (VCVTQQ2PHZ128rm addr:$src)>;
13415   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13416                            VK2WM:$mask),
13417             (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13418   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13419                            VK2WM:$mask),
13420             (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13422   def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13423             (VCVTQQ2PHZ128rmb addr:$src)>;
13424   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13425                            (v8f16 VR128X:$src0), VK2WM:$mask),
13426             (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13427   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13428                            v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13429             (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13431   // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13432   // patterns have been disabled with null_frag.
13433   def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13434             (VCVTUQQ2PHZ256rr VR256X:$src)>;
13435   def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13436                            VK4WM:$mask),
13437             (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13438   def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13439                            VK4WM:$mask),
13440             (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13442   def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13443             (VCVTUQQ2PHZ256rm addr:$src)>;
13444   def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13445                            VK4WM:$mask),
13446             (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13447   def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13448                            VK4WM:$mask),
13449             (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13451   def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13452             (VCVTUQQ2PHZ256rmb addr:$src)>;
13453   def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13454                            (v8f16 VR128X:$src0), VK4WM:$mask),
13455             (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13456   def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13457                            v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13458             (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13460   def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13461             (VCVTUQQ2PHZ128rr VR128X:$src)>;
13462   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13463                            VK2WM:$mask),
13464             (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13465   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13466                            VK2WM:$mask),
13467             (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13469   def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13470             (VCVTUQQ2PHZ128rm addr:$src)>;
13471   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13472                            VK2WM:$mask),
13473             (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13474   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13475                            VK2WM:$mask),
13476             (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13478   def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13479             (VCVTUQQ2PHZ128rmb addr:$src)>;
13480   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13481                            (v8f16 VR128X:$src0), VK2WM:$mask),
13482             (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13483   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13484                            v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13485             (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;