[llvm] Stop including llvm/ADT/DenseMap.h (NFC)
[llvm-project.git] / llvm / lib / Target / X86 / X86InstrAVX512.td
blobb5dac7a0c65afc80e69d7afa2be168ca7497462d
1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT).  These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                       string suffix = ""> {
22   RegisterClass RC = rc;
23   ValueType EltVT = eltvt;
24   int NumElts = numelts;
26   // Corresponding mask register class.
27   RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29   // Corresponding mask register pair class.
30   RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                               !cast<RegisterOperand>("VK" # NumElts # "Pair"));
33   // Corresponding write-mask register class.
34   RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
36   // The mask VT.
37   ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
39   // Suffix used in the instruction mnemonic.
40   string Suffix = suffix;
42   // VTName is a string name for vector VT. For vector types it will be
43   // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44   // It is a little bit complex for scalar types, where NumElts = 1.
45   // In this case we build v4f32 or v2f64
46   string VTName = "v" # !if (!eq (NumElts, 1),
47                         !if (!eq (EltVT.Size, 16), 8,
48                         !if (!eq (EltVT.Size, 32), 4,
49                         !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
51   // The vector VT.
52   ValueType VT = !cast<ValueType>(VTName);
54   string EltTypeName = !cast<string>(EltVT);
55   // Size of the element type in bits, e.g. 32 for v16i32.
56   string EltSizeName = !subst("i", "", !subst("f", "", !subst("b", "", EltTypeName)));
57   int EltSize = EltVT.Size;
59   // "i" for integer types and "f" for floating-point types
60   string TypeVariantName = !subst("b", "", !subst(EltSizeName, "", EltTypeName));
62   // Size of RC in bits, e.g. 512 for VR512.
63   int Size = VT.Size;
65   // The corresponding memory operand, e.g. i512mem for VR512.
66   X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
67   X86MemOperand ScalarMemOp = !cast<X86MemOperand>(!subst("b", "", EltTypeName) # "mem");
68   // FP scalar memory operand for intrinsics - ssmem/sdmem.
69   Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
70                            !if (!eq (EltTypeName, "bf16"), !cast<Operand>("shmem"),
71                            !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
72                            !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?))));
74   // Load patterns
75   PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
77   PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
79   PatFrag ScalarLdFrag = !cast<PatFrag>("load" # !subst("b", "", EltTypeName));
80   PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
82   PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
83                                !if (!eq (EltTypeName, "bf16"), !cast<PatFrags>("sse_load_f16"),
84                                !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
85                                !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?))));
87   // The string to specify embedded broadcast in assembly.
88   string BroadcastStr = "{1to" # NumElts # "}";
90   // 8-bit compressed displacement tuple/subvector format.  This is only
91   // defined for NumElts <= 8.
92   CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
93                                !cast<CD8VForm>("CD8VT" # NumElts), ?);
95   SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
96                           !if (!eq (Size, 256), sub_ymm, ?));
98   Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
99                      !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
100                      !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
101                      !if (!eq (EltTypeName, "bf16"), SSEPackedSingle, // FIXME?
102                      SSEPackedInt))));
104   RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
105                       !if (!eq (EltTypeName, "f16"), FR16X,
106                       !if (!eq (EltTypeName, "bf16"), FR16X,
107                       FR64X)));
109   dag ImmAllZerosV = (VT immAllZerosV);
111   string ZSuffix = !if (!eq (Size, 128), "Z128",
112                    !if (!eq (Size, 256), "Z256", "Z"));
115 def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
116 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
117 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
118 def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
119 def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
120 def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf">;
121 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
122 def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
124 // "x" in v32i8x_info means RC = VR256X
125 def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
126 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
127 def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
128 def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
129 def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
130 def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf">;
131 def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
132 def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
134 def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
135 def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
136 def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
137 def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
138 def v8f16x_info  : X86VectorVTInfo<8,  f16, VR128X, "ph">;
139 def v8bf16x_info : X86VectorVTInfo<8,  bf16, VR128X, "pbf">;
140 def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
141 def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
143 // We map scalar types to the smallest (128-bit) vector type
144 // with the appropriate element type. This allows to use the same masking logic.
145 def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
146 def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
147 def f16x_info    : X86VectorVTInfo<1,  f16, VR128X, "sh">;
148 def bf16x_info   : X86VectorVTInfo<1,  bf16, VR128X, "sbf">;
149 def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
150 def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
152 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
153                            X86VectorVTInfo i128> {
154   X86VectorVTInfo info512 = i512;
155   X86VectorVTInfo info256 = i256;
156   X86VectorVTInfo info128 = i128;
159 def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
160                                              v16i8x_info>;
161 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
162                                              v8i16x_info>;
163 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
164                                              v4i32x_info>;
165 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
166                                              v2i64x_info>;
167 def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
168                                              v8f16x_info>;
169 def avx512vl_bf16_info : AVX512VLVectorVTInfo<v32bf16_info, v16bf16x_info,
170                                              v8bf16x_info>;
171 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
172                                              v4f32x_info>;
173 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
174                                              v2f64x_info>;
176 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
177                        ValueType _vt> {
178   RegisterClass KRC = _krc;
179   RegisterClass KRCWM = _krcwm;
180   ValueType KVT = _vt;
183 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
184 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
185 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
186 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
187 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
188 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
189 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
191 // Used for matching masked operations. Ensures the operation part only has a
192 // single use.
193 def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
194                            (vselect node:$mask, node:$src1, node:$src2), [{
195   return isProfitableToFormMaskedOp(N);
196 }]>;
198 def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
199                               (X86selects node:$mask, node:$src1, node:$src2), [{
200   return isProfitableToFormMaskedOp(N);
201 }]>;
203 // This multiclass generates the masking variants from the non-masking
204 // variant.  It only provides the assembly pieces for the masking variants.
205 // It assumes custom ISel patterns for masking which can be provided as
206 // template arguments.
207 multiclass AVX512_maskable_custom<bits<8> O, Format F,
208                                   dag Outs,
209                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
210                                   string OpcodeStr,
211                                   string AttSrcAsm, string IntelSrcAsm,
212                                   list<dag> Pattern,
213                                   list<dag> MaskingPattern,
214                                   list<dag> ZeroMaskingPattern,
215                                   string MaskingConstraint = "",
216                                   bit IsCommutable = 0,
217                                   bit IsKCommutable = 0,
218                                   bit IsKZCommutable = IsCommutable,
219                                   string ClobberConstraint = ""> {
220   let isCommutable = IsCommutable, Constraints = ClobberConstraint in
221     def NAME: AVX512<O, F, Outs, Ins,
222                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
223                                      "$dst, "#IntelSrcAsm#"}",
224                        Pattern>;
226   // Prefer over VMOV*rrk Pat<>
227   let isCommutable = IsKCommutable in
228     def NAME#k: AVX512<O, F, Outs, MaskingIns,
229                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
230                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
231                        MaskingPattern>,
232               EVEX_K {
233       // In case of the 3src subclass this is overridden with a let.
234       string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
235                                !if(!eq(MaskingConstraint, ""), ClobberConstraint,
236                                    !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
237     }
239   // Zero mask does not add any restrictions to commute operands transformation.
240   // So, it is Ok to use IsCommutable instead of IsKCommutable.
241   let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
242       Constraints = ClobberConstraint in
243     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
244                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
245                                      "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
246                        ZeroMaskingPattern>,
247               EVEX_KZ;
251 // Common base class of AVX512_maskable and AVX512_maskable_3src.
252 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
253                                   dag Outs,
254                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
255                                   string OpcodeStr,
256                                   string AttSrcAsm, string IntelSrcAsm,
257                                   dag RHS, dag MaskingRHS,
258                                   SDPatternOperator Select = vselect_mask,
259                                   string MaskingConstraint = "",
260                                   bit IsCommutable = 0,
261                                   bit IsKCommutable = 0,
262                                   bit IsKZCommutable = IsCommutable,
263                                   string ClobberConstraint = ""> :
264   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
265                          AttSrcAsm, IntelSrcAsm,
266                          [(set _.RC:$dst, RHS)],
267                          [(set _.RC:$dst, MaskingRHS)],
268                          [(set _.RC:$dst,
269                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
270                          MaskingConstraint, IsCommutable,
271                          IsKCommutable, IsKZCommutable, ClobberConstraint>;
273 // This multiclass generates the unconditional/non-masking, the masking and
274 // the zero-masking variant of the vector instruction.  In the masking case, the
275 // preserved vector elements come from a new dummy input operand tied to $dst.
276 // This version uses a separate dag for non-masking and masking.
277 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
278                            dag Outs, dag Ins, string OpcodeStr,
279                            string AttSrcAsm, string IntelSrcAsm,
280                            dag RHS, dag MaskRHS,
281                            string ClobberConstraint = "",
282                            bit IsCommutable = 0, bit IsKCommutable = 0,
283                            bit IsKZCommutable = IsCommutable> :
284    AVX512_maskable_custom<O, F, Outs, Ins,
285                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
286                           !con((ins _.KRCWM:$mask), Ins),
287                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
288                           [(set _.RC:$dst, RHS)],
289                           [(set _.RC:$dst,
290                               (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
291                           [(set _.RC:$dst,
292                               (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
293                           "$src0 = $dst", IsCommutable, IsKCommutable,
294                           IsKZCommutable, ClobberConstraint>;
296 // This multiclass generates the unconditional/non-masking, the masking and
297 // the zero-masking variant of the vector instruction.  In the masking case, the
298 // preserved vector elements come from a new dummy input operand tied to $dst.
299 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
300                            dag Outs, dag Ins, string OpcodeStr,
301                            string AttSrcAsm, string IntelSrcAsm,
302                            dag RHS,
303                            bit IsCommutable = 0, bit IsKCommutable = 0,
304                            bit IsKZCommutable = IsCommutable,
305                            SDPatternOperator Select = vselect_mask,
306                            string ClobberConstraint = ""> :
307    AVX512_maskable_common<O, F, _, Outs, Ins,
308                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
309                           !con((ins _.KRCWM:$mask), Ins),
310                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
311                           (Select _.KRCWM:$mask, RHS, _.RC:$src0),
312                           Select, "$src0 = $dst", IsCommutable, IsKCommutable,
313                           IsKZCommutable, ClobberConstraint>;
315 // This multiclass generates the unconditional/non-masking, the masking and
316 // the zero-masking variant of the scalar instruction.
317 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
318                            dag Outs, dag Ins, string OpcodeStr,
319                            string AttSrcAsm, string IntelSrcAsm,
320                            dag RHS> :
321    AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
322                    RHS, 0, 0, 0, X86selects_mask>;
324 // Similar to AVX512_maskable but in this case one of the source operands
325 // ($src1) is already tied to $dst so we just use that for the preserved
326 // vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
327 // $src1.
328 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
329                                 dag Outs, dag NonTiedIns, string OpcodeStr,
330                                 string AttSrcAsm, string IntelSrcAsm,
331                                 dag RHS,
332                                 bit IsCommutable = 0,
333                                 bit IsKCommutable = 0,
334                                 SDPatternOperator Select = vselect_mask,
335                                 bit MaskOnly = 0> :
336    AVX512_maskable_common<O, F, _, Outs,
337                           !con((ins _.RC:$src1), NonTiedIns),
338                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
339                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
340                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
341                           !if(MaskOnly, (null_frag), RHS),
342                           (Select _.KRCWM:$mask, RHS, _.RC:$src1),
343                           Select, "", IsCommutable, IsKCommutable>;
345 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
346 // operand differs from the output VT. This requires a bitconvert on
347 // the preserved vector going into the vselect.
348 // NOTE: The unmasked pattern is disabled.
349 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
350                                      X86VectorVTInfo InVT,
351                                      dag Outs, dag NonTiedIns, string OpcodeStr,
352                                      string AttSrcAsm, string IntelSrcAsm,
353                                      dag RHS, bit IsCommutable = 0> :
354    AVX512_maskable_common<O, F, OutVT, Outs,
355                           !con((ins InVT.RC:$src1), NonTiedIns),
356                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
357                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
358                           OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
359                           (vselect_mask InVT.KRCWM:$mask, RHS,
360                            (bitconvert InVT.RC:$src1)),
361                            vselect_mask, "", IsCommutable>;
363 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
364                                      dag Outs, dag NonTiedIns, string OpcodeStr,
365                                      string AttSrcAsm, string IntelSrcAsm,
366                                      dag RHS,
367                                      bit IsCommutable = 0,
368                                      bit IsKCommutable = 0,
369                                      bit MaskOnly = 0> :
370    AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
371                         IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
372                         X86selects_mask, MaskOnly>;
374 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
375                                   dag Outs, dag Ins,
376                                   string OpcodeStr,
377                                   string AttSrcAsm, string IntelSrcAsm,
378                                   list<dag> Pattern> :
379    AVX512_maskable_custom<O, F, Outs, Ins,
380                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
381                           !con((ins _.KRCWM:$mask), Ins),
382                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
383                           "$src0 = $dst">;
385 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
386                                        dag Outs, dag NonTiedIns,
387                                        string OpcodeStr,
388                                        string AttSrcAsm, string IntelSrcAsm,
389                                        list<dag> Pattern> :
390    AVX512_maskable_custom<O, F, Outs,
391                           !con((ins _.RC:$src1), NonTiedIns),
392                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
393                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
394                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
395                           "">;
397 // Instruction with mask that puts result in mask register,
398 // like "compare" and "vptest"
399 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
400                                   dag Outs,
401                                   dag Ins, dag MaskingIns,
402                                   string OpcodeStr,
403                                   string AttSrcAsm, string IntelSrcAsm,
404                                   list<dag> Pattern,
405                                   list<dag> MaskingPattern,
406                                   bit IsCommutable = 0> {
407     let isCommutable = IsCommutable in {
408     def NAME: AVX512<O, F, Outs, Ins,
409                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
410                                      "$dst, "#IntelSrcAsm#"}",
411                        Pattern>;
413     def NAME#k: AVX512<O, F, Outs, MaskingIns,
414                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
415                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
416                        MaskingPattern>, EVEX_K;
417     }
420 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
421                                   dag Outs,
422                                   dag Ins, dag MaskingIns,
423                                   string OpcodeStr,
424                                   string AttSrcAsm, string IntelSrcAsm,
425                                   dag RHS, dag MaskingRHS,
426                                   bit IsCommutable = 0> :
427   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
428                          AttSrcAsm, IntelSrcAsm,
429                          [(set _.KRC:$dst, RHS)],
430                          [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
432 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
433                            dag Outs, dag Ins, string OpcodeStr,
434                            string AttSrcAsm, string IntelSrcAsm,
435                            dag RHS, dag RHS_su, bit IsCommutable = 0> :
436    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
437                           !con((ins _.KRCWM:$mask), Ins),
438                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
439                           (and _.KRCWM:$mask, RHS_su), IsCommutable>;
441 // Used by conversion instructions.
442 multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
443                                   dag Outs,
444                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
445                                   string OpcodeStr,
446                                   string AttSrcAsm, string IntelSrcAsm,
447                                   dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
448   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
449                          AttSrcAsm, IntelSrcAsm,
450                          [(set _.RC:$dst, RHS)],
451                          [(set _.RC:$dst, MaskingRHS)],
452                          [(set _.RC:$dst, ZeroMaskingRHS)],
453                          "$src0 = $dst">;
455 multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
456                                dag Outs, dag NonTiedIns, string OpcodeStr,
457                                string AttSrcAsm, string IntelSrcAsm,
458                                dag RHS, dag MaskingRHS, bit IsCommutable,
459                                bit IsKCommutable> :
460    AVX512_maskable_custom<O, F, Outs,
461                           !con((ins _.RC:$src1), NonTiedIns),
462                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
463                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
464                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
465                           [(set _.RC:$dst, RHS)],
466                           [(set _.RC:$dst,
467                             (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
468                           [(set _.RC:$dst,
469                             (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
470                           "", IsCommutable, IsKCommutable>;
472 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
473 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
474 // swizzled by ExecutionDomainFix to pxor.
475 // We set canFoldAsLoad because this can be converted to a constant-pool
476 // load of an all-zeros value if folding it would be beneficial.
477 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
478     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
479 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
480                [(set VR512:$dst, (v16i32 immAllZerosV))]>;
481 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
482                [(set VR512:$dst, (v16i32 immAllOnesV))]>;
485 let Predicates = [HasAVX512] in {
486 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
487 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
488 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
489 def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
490 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
491 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
494 // Alias instructions that allow VPTERNLOG to be used with a mask to create
495 // a mix of all ones and all zeros elements. This is done this way to force
496 // the same register to be used as input for all three sources.
497 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
498 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
499                                 (ins VK16WM:$mask), "",
500                            [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
501                                                       (v16i32 immAllOnesV),
502                                                       (v16i32 immAllZerosV)))]>;
503 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
504                                 (ins VK8WM:$mask), "",
505                 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
506                                            (v8i64 immAllOnesV),
507                                            (v8i64 immAllZerosV)))]>;
510 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
511     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
512 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
513                [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
514 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
515                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
518 let Predicates = [HasAVX512] in {
519 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
520 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
521 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
522 def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
523 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
524 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
525 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
526 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
527 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
528 def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
529 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
530 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
533 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
534 // This is expanded by ExpandPostRAPseudos.
535 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
536     isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
537   def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
538                           [(set FR16X:$dst, fp16imm0)]>;
539   def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
540                           [(set FR32X:$dst, fp32imm0)]>;
541   def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
542                           [(set FR64X:$dst, fp64imm0)]>;
543   def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
544                             [(set VR128X:$dst, fp128imm0)]>;
547 //===----------------------------------------------------------------------===//
548 // AVX-512 - VECTOR INSERT
551 // Supports two different pattern operators for mask and unmasked ops. Allows
552 // null_frag to be passed for one.
553 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
554                                   X86VectorVTInfo To,
555                                   SDPatternOperator vinsert_insert,
556                                   SDPatternOperator vinsert_for_mask,
557                                   X86FoldableSchedWrite sched> {
558   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
559     defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
560                    (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
561                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
562                    "$src3, $src2, $src1", "$src1, $src2, $src3",
563                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
564                                          (From.VT From.RC:$src2),
565                                          (iPTR imm)),
566                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
567                                            (From.VT From.RC:$src2),
568                                            (iPTR imm))>,
569                    AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
570     let mayLoad = 1 in
571     defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
572                    (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
573                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
574                    "$src3, $src2, $src1", "$src1, $src2, $src3",
575                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
576                                (From.VT (From.LdFrag addr:$src2)),
577                                (iPTR imm)),
578                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
579                                (From.VT (From.LdFrag addr:$src2)),
580                                (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
581                    EVEX_CD8<From.EltSize, From.CD8TupleForm>,
582                    Sched<[sched.Folded, sched.ReadAfterFold]>;
583   }
586 // Passes the same pattern operator for masked and unmasked ops.
587 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
588                             X86VectorVTInfo To,
589                             SDPatternOperator vinsert_insert,
590                             X86FoldableSchedWrite sched> :
591   vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
593 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
594                        X86VectorVTInfo To, PatFrag vinsert_insert,
595                        SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
596   let Predicates = p in {
597     def : Pat<(vinsert_insert:$ins
598                      (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
599               (To.VT (!cast<Instruction>(InstrStr#"rr")
600                      To.RC:$src1, From.RC:$src2,
601                      (INSERT_get_vinsert_imm To.RC:$ins)))>;
603     def : Pat<(vinsert_insert:$ins
604                   (To.VT To.RC:$src1),
605                   (From.VT (From.LdFrag addr:$src2)),
606                   (iPTR imm)),
607               (To.VT (!cast<Instruction>(InstrStr#"rm")
608                   To.RC:$src1, addr:$src2,
609                   (INSERT_get_vinsert_imm To.RC:$ins)))>;
610   }
613 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
614                             ValueType EltVT64, int Opcode256,
615                             X86FoldableSchedWrite sched> {
617   let Predicates = [HasVLX] in
618     defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
619                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
620                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
621                                  vinsert128_insert, sched>, EVEX_V256;
623   defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
624                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
625                                  X86VectorVTInfo<16, EltVT32, VR512>,
626                                  vinsert128_insert, sched>, EVEX_V512;
628   defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
629                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
630                                  X86VectorVTInfo< 8, EltVT64, VR512>,
631                                  vinsert256_insert, sched>, REX_W, EVEX_V512;
633   // Even with DQI we'd like to only use these instructions for masking.
634   let Predicates = [HasVLX, HasDQI] in
635     defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
636                                    X86VectorVTInfo< 2, EltVT64, VR128X>,
637                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
638                                    null_frag, vinsert128_insert, sched>,
639                                    VEX_W1X, EVEX_V256;
641   // Even with DQI we'd like to only use these instructions for masking.
642   let Predicates = [HasDQI] in {
643     defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
644                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
645                                  X86VectorVTInfo< 8, EltVT64, VR512>,
646                                  null_frag, vinsert128_insert, sched>,
647                                  REX_W, EVEX_V512;
649     defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
650                                    X86VectorVTInfo< 8, EltVT32, VR256X>,
651                                    X86VectorVTInfo<16, EltVT32, VR512>,
652                                    null_frag, vinsert256_insert, sched>,
653                                    EVEX_V512;
654   }
657 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
658 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
659 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
661 // Codegen pattern with the alternative types,
662 // Even with AVX512DQ we'll still use these for unmasked operations.
663 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
664               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
665 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
666               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
668 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
669               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
670 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
671               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
673 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
674               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
675 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
676               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
678 // Codegen pattern with the alternative types insert VEC128 into VEC256
679 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
680               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
681 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
682               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
683 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
684               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
685 // Codegen pattern with the alternative types insert VEC128 into VEC512
686 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
687               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
688 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
689                vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
690 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
691               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
692 // Codegen pattern with the alternative types insert VEC256 into VEC512
693 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
694               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
695 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
696               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
697 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
698               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
701 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
702                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
703                                  PatFrag vinsert_insert,
704                                  SDNodeXForm INSERT_get_vinsert_imm,
705                                  list<Predicate> p> {
706 let Predicates = p in {
707   def : Pat<(Cast.VT
708              (vselect_mask Cast.KRCWM:$mask,
709                            (bitconvert
710                             (vinsert_insert:$ins (To.VT To.RC:$src1),
711                                                  (From.VT From.RC:$src2),
712                                                  (iPTR imm))),
713                            Cast.RC:$src0)),
714             (!cast<Instruction>(InstrStr#"rrk")
715              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
716              (INSERT_get_vinsert_imm To.RC:$ins))>;
717   def : Pat<(Cast.VT
718              (vselect_mask Cast.KRCWM:$mask,
719                            (bitconvert
720                             (vinsert_insert:$ins (To.VT To.RC:$src1),
721                                                  (From.VT
722                                                   (bitconvert
723                                                    (From.LdFrag addr:$src2))),
724                                                  (iPTR imm))),
725                            Cast.RC:$src0)),
726             (!cast<Instruction>(InstrStr#"rmk")
727              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
728              (INSERT_get_vinsert_imm To.RC:$ins))>;
730   def : Pat<(Cast.VT
731              (vselect_mask Cast.KRCWM:$mask,
732                            (bitconvert
733                             (vinsert_insert:$ins (To.VT To.RC:$src1),
734                                                  (From.VT From.RC:$src2),
735                                                  (iPTR imm))),
736                            Cast.ImmAllZerosV)),
737             (!cast<Instruction>(InstrStr#"rrkz")
738              Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
739              (INSERT_get_vinsert_imm To.RC:$ins))>;
740   def : Pat<(Cast.VT
741              (vselect_mask Cast.KRCWM:$mask,
742                            (bitconvert
743                             (vinsert_insert:$ins (To.VT To.RC:$src1),
744                                                  (From.VT (From.LdFrag addr:$src2)),
745                                                  (iPTR imm))),
746                            Cast.ImmAllZerosV)),
747             (!cast<Instruction>(InstrStr#"rmkz")
748              Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
749              (INSERT_get_vinsert_imm To.RC:$ins))>;
753 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
754                              v8f32x_info, vinsert128_insert,
755                              INSERT_get_vinsert128_imm, [HasVLX]>;
756 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
757                              v4f64x_info, vinsert128_insert,
758                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
760 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
761                              v8i32x_info, vinsert128_insert,
762                              INSERT_get_vinsert128_imm, [HasVLX]>;
763 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
764                              v8i32x_info, vinsert128_insert,
765                              INSERT_get_vinsert128_imm, [HasVLX]>;
766 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
767                              v8i32x_info, vinsert128_insert,
768                              INSERT_get_vinsert128_imm, [HasVLX]>;
769 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
770                              v4i64x_info, vinsert128_insert,
771                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
772 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
773                              v4i64x_info, vinsert128_insert,
774                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
775 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
776                              v4i64x_info, vinsert128_insert,
777                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
779 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
780                              v16f32_info, vinsert128_insert,
781                              INSERT_get_vinsert128_imm, [HasAVX512]>;
782 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
783                              v8f64_info, vinsert128_insert,
784                              INSERT_get_vinsert128_imm, [HasDQI]>;
786 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
787                              v16i32_info, vinsert128_insert,
788                              INSERT_get_vinsert128_imm, [HasAVX512]>;
789 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
790                              v16i32_info, vinsert128_insert,
791                              INSERT_get_vinsert128_imm, [HasAVX512]>;
792 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
793                              v16i32_info, vinsert128_insert,
794                              INSERT_get_vinsert128_imm, [HasAVX512]>;
795 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
796                              v8i64_info, vinsert128_insert,
797                              INSERT_get_vinsert128_imm, [HasDQI]>;
798 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
799                              v8i64_info, vinsert128_insert,
800                              INSERT_get_vinsert128_imm, [HasDQI]>;
801 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
802                              v8i64_info, vinsert128_insert,
803                              INSERT_get_vinsert128_imm, [HasDQI]>;
805 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
806                              v16f32_info, vinsert256_insert,
807                              INSERT_get_vinsert256_imm, [HasDQI]>;
808 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
809                              v8f64_info, vinsert256_insert,
810                              INSERT_get_vinsert256_imm, [HasAVX512]>;
812 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
813                              v16i32_info, vinsert256_insert,
814                              INSERT_get_vinsert256_imm, [HasDQI]>;
815 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
816                              v16i32_info, vinsert256_insert,
817                              INSERT_get_vinsert256_imm, [HasDQI]>;
818 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
819                              v16i32_info, vinsert256_insert,
820                              INSERT_get_vinsert256_imm, [HasDQI]>;
821 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
822                              v8i64_info, vinsert256_insert,
823                              INSERT_get_vinsert256_imm, [HasAVX512]>;
824 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
825                              v8i64_info, vinsert256_insert,
826                              INSERT_get_vinsert256_imm, [HasAVX512]>;
827 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
828                              v8i64_info, vinsert256_insert,
829                              INSERT_get_vinsert256_imm, [HasAVX512]>;
831 // vinsertps - insert f32 to XMM
832 let ExeDomain = SSEPackedSingle in {
833 let isCommutable = 1 in
834 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
835       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
836       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
837       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
838       EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
839 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
840       (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
841       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
842       [(set VR128X:$dst, (X86insertps VR128X:$src1,
843                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
844                           timm:$src3))]>,
845       EVEX_4V, EVEX_CD8<32, CD8VT1>,
846       Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
849 //===----------------------------------------------------------------------===//
850 // AVX-512 VECTOR EXTRACT
851 //---
853 // Supports two different pattern operators for mask and unmasked ops. Allows
854 // null_frag to be passed for one.
855 multiclass vextract_for_size_split<int Opcode,
856                                    X86VectorVTInfo From, X86VectorVTInfo To,
857                                    SDPatternOperator vextract_extract,
858                                    SDPatternOperator vextract_for_mask,
859                                    SchedWrite SchedRR, SchedWrite SchedMR> {
861   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
862     defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
863                 (ins From.RC:$src1, u8imm:$idx),
864                 "vextract" # To.EltTypeName # "x" # To.NumElts,
865                 "$idx, $src1", "$src1, $idx",
866                 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
867                 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
868                 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
870     def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
871                     (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
872                     "vextract" # To.EltTypeName # "x" # To.NumElts #
873                         "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
874                     [(store (To.VT (vextract_extract:$idx
875                                     (From.VT From.RC:$src1), (iPTR imm))),
876                              addr:$dst)]>, EVEX,
877                     Sched<[SchedMR]>;
879     let mayStore = 1, hasSideEffects = 0 in
880     def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
881                     (ins To.MemOp:$dst, To.KRCWM:$mask,
882                                         From.RC:$src1, u8imm:$idx),
883                      "vextract" # To.EltTypeName # "x" # To.NumElts #
884                           "\t{$idx, $src1, $dst {${mask}}|"
885                           "$dst {${mask}}, $src1, $idx}", []>,
886                     EVEX_K, EVEX, Sched<[SchedMR]>;
887   }
890 // Passes the same pattern operator for masked and unmasked ops.
891 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
892                              X86VectorVTInfo To,
893                              SDPatternOperator vextract_extract,
894                              SchedWrite SchedRR, SchedWrite SchedMR> :
895   vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
897 // Codegen pattern for the alternative types
898 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
899                 X86VectorVTInfo To, PatFrag vextract_extract,
900                 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
901   let Predicates = p in {
902      def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
903                (To.VT (!cast<Instruction>(InstrStr#"rr")
904                           From.RC:$src1,
905                           (EXTRACT_get_vextract_imm To.RC:$ext)))>;
906      def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
907                               (iPTR imm))), addr:$dst),
908                (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
909                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
910   }
913 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
914                              ValueType EltVT64, int Opcode256,
915                              SchedWrite SchedRR, SchedWrite SchedMR> {
916   let Predicates = [HasAVX512] in {
917     defm NAME # "32x4Z" : vextract_for_size<Opcode128,
918                                    X86VectorVTInfo<16, EltVT32, VR512>,
919                                    X86VectorVTInfo< 4, EltVT32, VR128X>,
920                                    vextract128_extract, SchedRR, SchedMR>,
921                                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
922     defm NAME # "64x4Z" : vextract_for_size<Opcode256,
923                                    X86VectorVTInfo< 8, EltVT64, VR512>,
924                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
925                                    vextract256_extract, SchedRR, SchedMR>,
926                                        REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
927   }
928   let Predicates = [HasVLX] in
929     defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
930                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
931                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
932                                  vextract128_extract, SchedRR, SchedMR>,
933                                      EVEX_V256, EVEX_CD8<32, CD8VT4>;
935   // Even with DQI we'd like to only use these instructions for masking.
936   let Predicates = [HasVLX, HasDQI] in
937     defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
938                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
939                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
940                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
941                                      VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
943   // Even with DQI we'd like to only use these instructions for masking.
944   let Predicates = [HasDQI] in {
945     defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
946                                  X86VectorVTInfo< 8, EltVT64, VR512>,
947                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
948                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
949                                      REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
950     defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
951                                  X86VectorVTInfo<16, EltVT32, VR512>,
952                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
953                                  null_frag, vextract256_extract, SchedRR, SchedMR>,
954                                      EVEX_V512, EVEX_CD8<32, CD8VT8>;
955   }
958 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
959 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
960 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
962 // extract_subvector codegen patterns with the alternative types.
963 // Even with AVX512DQ we'll still use these for unmasked operations.
964 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
965           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
966 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
967           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
969 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
970           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
971 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
972           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
974 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
975           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
976 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
977           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
979 // Codegen pattern with the alternative types extract VEC128 from VEC256
980 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
981           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
982 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
983           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
984 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
985           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
987 // Codegen pattern with the alternative types extract VEC128 from VEC512
988 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
989                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
990 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
991                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
992 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
993                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
994 // Codegen pattern with the alternative types extract VEC256 from VEC512
995 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
996                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
997 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
998                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
999 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
1000                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
1003 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1004 // smaller extract to enable EVEX->VEX.
1005 let Predicates = [NoVLX] in {
1006 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1007           (v2i64 (VEXTRACTI128rr
1008                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1009                   (iPTR 1)))>;
1010 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1011           (v2f64 (VEXTRACTF128rr
1012                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1013                   (iPTR 1)))>;
1014 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1015           (v4i32 (VEXTRACTI128rr
1016                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1017                   (iPTR 1)))>;
1018 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1019           (v4f32 (VEXTRACTF128rr
1020                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1021                   (iPTR 1)))>;
1022 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1023           (v8i16 (VEXTRACTI128rr
1024                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1025                   (iPTR 1)))>;
1026 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1027           (v8f16 (VEXTRACTF128rr
1028                   (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1029                   (iPTR 1)))>;
1030 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1031           (v16i8 (VEXTRACTI128rr
1032                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1033                   (iPTR 1)))>;
1036 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1037 // smaller extract to enable EVEX->VEX.
1038 let Predicates = [HasVLX] in {
1039 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1040           (v2i64 (VEXTRACTI32x4Z256rr
1041                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1042                   (iPTR 1)))>;
1043 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1044           (v2f64 (VEXTRACTF32x4Z256rr
1045                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1046                   (iPTR 1)))>;
1047 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1048           (v4i32 (VEXTRACTI32x4Z256rr
1049                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1050                   (iPTR 1)))>;
1051 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1052           (v4f32 (VEXTRACTF32x4Z256rr
1053                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1054                   (iPTR 1)))>;
1055 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1056           (v8i16 (VEXTRACTI32x4Z256rr
1057                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1058                   (iPTR 1)))>;
1059 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1060           (v8f16 (VEXTRACTF32x4Z256rr
1061                   (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1062                   (iPTR 1)))>;
1063 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1064           (v16i8 (VEXTRACTI32x4Z256rr
1065                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1066                   (iPTR 1)))>;
1070 // Additional patterns for handling a bitcast between the vselect and the
1071 // extract_subvector.
1072 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1073                                   X86VectorVTInfo To, X86VectorVTInfo Cast,
1074                                   PatFrag vextract_extract,
1075                                   SDNodeXForm EXTRACT_get_vextract_imm,
1076                                   list<Predicate> p> {
1077 let Predicates = p in {
1078   def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1079                                    (bitconvert
1080                                     (To.VT (vextract_extract:$ext
1081                                             (From.VT From.RC:$src), (iPTR imm)))),
1082                                    To.RC:$src0)),
1083             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1084                       Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1085                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1087   def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1088                                    (bitconvert
1089                                     (To.VT (vextract_extract:$ext
1090                                             (From.VT From.RC:$src), (iPTR imm)))),
1091                                    Cast.ImmAllZerosV)),
1092             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1093                       Cast.KRCWM:$mask, From.RC:$src,
1094                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1098 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1099                               v4f32x_info, vextract128_extract,
1100                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1101 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1102                               v2f64x_info, vextract128_extract,
1103                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1105 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1106                               v4i32x_info, vextract128_extract,
1107                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1108 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1109                               v4i32x_info, vextract128_extract,
1110                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1111 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1112                               v4i32x_info, vextract128_extract,
1113                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1114 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1115                               v2i64x_info, vextract128_extract,
1116                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1117 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1118                               v2i64x_info, vextract128_extract,
1119                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1120 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1121                               v2i64x_info, vextract128_extract,
1122                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1124 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1125                               v4f32x_info, vextract128_extract,
1126                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1127 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1128                               v2f64x_info, vextract128_extract,
1129                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1131 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1132                               v4i32x_info, vextract128_extract,
1133                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1134 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1135                               v4i32x_info, vextract128_extract,
1136                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1137 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1138                               v4i32x_info, vextract128_extract,
1139                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1140 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1141                               v2i64x_info, vextract128_extract,
1142                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1143 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1144                               v2i64x_info, vextract128_extract,
1145                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1146 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1147                               v2i64x_info, vextract128_extract,
1148                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1150 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1151                               v8f32x_info, vextract256_extract,
1152                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1153 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1154                               v4f64x_info, vextract256_extract,
1155                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1157 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1158                               v8i32x_info, vextract256_extract,
1159                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1160 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1161                               v8i32x_info, vextract256_extract,
1162                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1163 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1164                               v8i32x_info, vextract256_extract,
1165                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1166 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1167                               v4i64x_info, vextract256_extract,
1168                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1169 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1170                               v4i64x_info, vextract256_extract,
1171                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1172 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1173                               v4i64x_info, vextract256_extract,
1174                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1176 // vextractps - extract 32 bits from XMM
1177 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1178       (ins VR128X:$src1, u8imm:$src2),
1179       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1180       [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1181       EVEX, WIG, Sched<[WriteVecExtract]>;
1183 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1184       (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1185       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1186       [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1187                           addr:$dst)]>,
1188       EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1190 //===---------------------------------------------------------------------===//
1191 // AVX-512 BROADCAST
1192 //---
1193 // broadcast with a scalar argument.
1194 multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1195                                    X86VectorVTInfo SrcInfo> {
1196   def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1197             (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1198              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1199   def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1200                                        (X86VBroadcast SrcInfo.FRC:$src),
1201                                        DestInfo.RC:$src0)),
1202             (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1203              DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1204              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1205   def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1206                                        (X86VBroadcast SrcInfo.FRC:$src),
1207                                        DestInfo.ImmAllZerosV)),
1208             (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1209              DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1212 // Split version to allow mask and broadcast node to be different types. This
1213 // helps support the 32x2 broadcasts.
1214 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1215                                      SchedWrite SchedRR, SchedWrite SchedRM,
1216                                      X86VectorVTInfo MaskInfo,
1217                                      X86VectorVTInfo DestInfo,
1218                                      X86VectorVTInfo SrcInfo,
1219                                      bit IsConvertibleToThreeAddress,
1220                                      SDPatternOperator UnmaskedOp = X86VBroadcast,
1221                                      SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1222   let hasSideEffects = 0 in
1223   def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1224                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1225                     [(set MaskInfo.RC:$dst,
1226                       (MaskInfo.VT
1227                        (bitconvert
1228                         (DestInfo.VT
1229                          (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1230                     DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1231   def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1232                       (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1233                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1234                        "${dst} {${mask}} {z}, $src}"),
1235                        [(set MaskInfo.RC:$dst,
1236                          (vselect_mask MaskInfo.KRCWM:$mask,
1237                           (MaskInfo.VT
1238                            (bitconvert
1239                             (DestInfo.VT
1240                              (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1241                           MaskInfo.ImmAllZerosV))],
1242                        DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1243   let Constraints = "$src0 = $dst" in
1244   def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1245                      (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1246                           SrcInfo.RC:$src),
1247                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1248                      "${dst} {${mask}}, $src}"),
1249                      [(set MaskInfo.RC:$dst,
1250                        (vselect_mask MaskInfo.KRCWM:$mask,
1251                         (MaskInfo.VT
1252                          (bitconvert
1253                           (DestInfo.VT
1254                            (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1255                         MaskInfo.RC:$src0))],
1256                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1258   let hasSideEffects = 0, mayLoad = 1 in
1259   def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1260                     (ins SrcInfo.ScalarMemOp:$src),
1261                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1262                     [(set MaskInfo.RC:$dst,
1263                       (MaskInfo.VT
1264                        (bitconvert
1265                         (DestInfo.VT
1266                          (UnmaskedBcastOp addr:$src)))))],
1267                     DestInfo.ExeDomain>, T8PD, EVEX,
1268                     EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1270   def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1271                       (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1272                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1273                        "${dst} {${mask}} {z}, $src}"),
1274                        [(set MaskInfo.RC:$dst,
1275                          (vselect_mask MaskInfo.KRCWM:$mask,
1276                           (MaskInfo.VT
1277                            (bitconvert
1278                             (DestInfo.VT
1279                              (SrcInfo.BroadcastLdFrag addr:$src)))),
1280                           MaskInfo.ImmAllZerosV))],
1281                        DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1282                        EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1284   let Constraints = "$src0 = $dst",
1285       isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1286   def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1287                      (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1288                           SrcInfo.ScalarMemOp:$src),
1289                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1290                      "${dst} {${mask}}, $src}"),
1291                      [(set MaskInfo.RC:$dst,
1292                        (vselect_mask MaskInfo.KRCWM:$mask,
1293                         (MaskInfo.VT
1294                          (bitconvert
1295                           (DestInfo.VT
1296                            (SrcInfo.BroadcastLdFrag addr:$src)))),
1297                         MaskInfo.RC:$src0))],
1298                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1299                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1302 // Helper class to force mask and broadcast result to same type.
1303 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1304                                SchedWrite SchedRR, SchedWrite SchedRM,
1305                                X86VectorVTInfo DestInfo,
1306                                X86VectorVTInfo SrcInfo,
1307                                bit IsConvertibleToThreeAddress> :
1308   avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1309                             DestInfo, DestInfo, SrcInfo,
1310                             IsConvertibleToThreeAddress>;
1312 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1313                                   AVX512VLVectorVTInfo _> {
1314   let Predicates = [HasAVX512] in {
1315     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1316                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1317               avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1318               EVEX_V512;
1319   }
1321   let Predicates = [HasVLX] in {
1322     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1323                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1324                  avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1325                  EVEX_V256;
1326   }
1329 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1330                                   AVX512VLVectorVTInfo _> {
1331   let Predicates = [HasAVX512] in {
1332     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1333                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1334               avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1335               EVEX_V512;
1336   }
1338   let Predicates = [HasVLX] in {
1339     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1340                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1341                  avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1342                  EVEX_V256;
1343     defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1344                                      WriteFShuffle256Ld, _.info128, _.info128, 1>,
1345                  avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1346                  EVEX_V128;
1347   }
1349 defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1350                                        avx512vl_f32_info>;
1351 defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1352                                        avx512vl_f64_info>, VEX_W1X;
1354 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1355                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1356                                     RegisterClass SrcRC> {
1357   // Fold with a mask even if it has multiple uses since it is cheap.
1358   let ExeDomain = _.ExeDomain in
1359   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1360                           (ins SrcRC:$src),
1361                           "vpbroadcast"#_.Suffix, "$src", "$src",
1362                           (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1363                           /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1364                           T8PD, EVEX, Sched<[SchedRR]>;
1367 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1368                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1369                                     RegisterClass SrcRC, SubRegIndex Subreg> {
1370   let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1371   defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1372                          (outs _.RC:$dst), (ins GR32:$src),
1373                          !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1374                          !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1375                          "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1376                          "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1378   def : Pat <(_.VT (OpNode SrcRC:$src)),
1379              (!cast<Instruction>(Name#rr)
1380               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1382   // Fold with a mask even if it has multiple uses since it is cheap.
1383   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1384              (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1385               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1387   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1388              (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1389               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1392 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1393                       AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1394                       RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1395   let Predicates = [prd] in
1396     defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1397               OpNode, SrcRC, Subreg>, EVEX_V512;
1398   let Predicates = [prd, HasVLX] in {
1399     defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1400               _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1401     defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1402               _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1403   }
1406 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1407                                        SDPatternOperator OpNode,
1408                                        RegisterClass SrcRC, Predicate prd> {
1409   let Predicates = [prd] in
1410     defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1411                                       SrcRC>, EVEX_V512;
1412   let Predicates = [prd, HasVLX] in {
1413     defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1414                                          SrcRC>, EVEX_V256;
1415     defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1416                                          SrcRC>, EVEX_V128;
1417   }
1420 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1421                        avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1422 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1423                        avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1424                        HasBWI>;
1425 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1426                                                  X86VBroadcast, GR32, HasAVX512>;
1427 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1428                                                  X86VBroadcast, GR64, HasAVX512>, REX_W;
1430 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1431                                       AVX512VLVectorVTInfo _, Predicate prd,
1432                                       bit IsConvertibleToThreeAddress> {
1433   let Predicates = [prd] in {
1434     defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1435                                    WriteShuffle256Ld, _.info512, _.info128,
1436                                    IsConvertibleToThreeAddress>,
1437                                   EVEX_V512;
1438   }
1439   let Predicates = [prd, HasVLX] in {
1440     defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1441                                     WriteShuffle256Ld, _.info256, _.info128,
1442                                     IsConvertibleToThreeAddress>,
1443                                  EVEX_V256;
1444     defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1445                                     WriteShuffleXLd, _.info128, _.info128,
1446                                     IsConvertibleToThreeAddress>,
1447                                  EVEX_V128;
1448   }
1451 defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1452                                            avx512vl_i8_info, HasBWI, 0>;
1453 defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1454                                            avx512vl_i16_info, HasBWI, 0>;
1455 defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1456                                            avx512vl_i32_info, HasAVX512, 1>;
1457 defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1458                                            avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1460 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1461                                       SDPatternOperator OpNode,
1462                                       X86VectorVTInfo _Dst,
1463                                       X86VectorVTInfo _Src> {
1464   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1465                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1466                            (_Dst.VT (OpNode addr:$src))>,
1467                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1468                            AVX5128IBase, EVEX;
1471 // This should be used for the AVX512DQ broadcast instructions. It disables
1472 // the unmasked patterns so that we only use the DQ instructions when masking
1473 //  is requested.
1474 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1475                                          SDPatternOperator OpNode,
1476                                          X86VectorVTInfo _Dst,
1477                                          X86VectorVTInfo _Src> {
1478   let hasSideEffects = 0, mayLoad = 1 in
1479   defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1480                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1481                            (null_frag),
1482                            (_Dst.VT (OpNode addr:$src))>,
1483                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1484                            AVX5128IBase, EVEX;
1486 let Predicates = [HasBWI] in {
1487   def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1488             (VPBROADCASTWZrm addr:$src)>;
1490   def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1491             (VPBROADCASTWZrr VR128X:$src)>;
1492   def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1493             (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1495 let Predicates = [HasVLX, HasBWI] in {
1496   def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1497             (VPBROADCASTWZ128rm addr:$src)>;
1498   def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1499             (VPBROADCASTWZ256rm addr:$src)>;
1501   def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1502             (VPBROADCASTWZ128rr VR128X:$src)>;
1503   def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1504             (VPBROADCASTWZ256rr VR128X:$src)>;
1506   def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1507             (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1508   def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1509             (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1512 //===----------------------------------------------------------------------===//
1513 // AVX-512 BROADCAST SUBVECTORS
1516 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1517                        X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1518                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1519 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1520                        X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1521                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1522 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1523                        X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
1524                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1525 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1526                        X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
1527                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1529 let Predicates = [HasAVX512] in {
1530 def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1531           (VBROADCASTF64X4rm addr:$src)>;
1532 def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1533           (VBROADCASTF64X4rm addr:$src)>;
1534 def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1535           (VBROADCASTF64X4rm addr:$src)>;
1536 def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1537           (VBROADCASTI64X4rm addr:$src)>;
1538 def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1539           (VBROADCASTI64X4rm addr:$src)>;
1540 def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1541           (VBROADCASTI64X4rm addr:$src)>;
1542 def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1543           (VBROADCASTI64X4rm addr:$src)>;
1545 def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1546           (VBROADCASTF32X4rm addr:$src)>;
1547 def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1548           (VBROADCASTF32X4rm addr:$src)>;
1549 def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1550           (VBROADCASTF32X4rm addr:$src)>;
1551 def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1552           (VBROADCASTI32X4rm addr:$src)>;
1553 def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1554           (VBROADCASTI32X4rm addr:$src)>;
1555 def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1556           (VBROADCASTI32X4rm addr:$src)>;
1557 def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1558           (VBROADCASTI32X4rm addr:$src)>;
1560 // Patterns for selects of bitcasted operations.
1561 def : Pat<(vselect_mask VK16WM:$mask,
1562                         (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1563                         (v16f32 immAllZerosV)),
1564           (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1565 def : Pat<(vselect_mask VK16WM:$mask,
1566                         (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1567                         VR512:$src0),
1568           (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1569 def : Pat<(vselect_mask VK16WM:$mask,
1570                         (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1571                         (v16i32 immAllZerosV)),
1572           (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1573 def : Pat<(vselect_mask VK16WM:$mask,
1574                         (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1575                         VR512:$src0),
1576           (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1578 def : Pat<(vselect_mask VK8WM:$mask,
1579                         (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1580                         (v8f64 immAllZerosV)),
1581           (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1582 def : Pat<(vselect_mask VK8WM:$mask,
1583                         (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1584                         VR512:$src0),
1585           (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1586 def : Pat<(vselect_mask VK8WM:$mask,
1587                         (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1588                         (v8i64 immAllZerosV)),
1589           (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1590 def : Pat<(vselect_mask VK8WM:$mask,
1591                         (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1592                         VR512:$src0),
1593           (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1596 let Predicates = [HasVLX] in {
1597 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1598                            X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1599                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1600 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1601                            X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1602                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1604 def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1605           (VBROADCASTF32X4Z256rm addr:$src)>;
1606 def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1607           (VBROADCASTF32X4Z256rm addr:$src)>;
1608 def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1609           (VBROADCASTF32X4Z256rm addr:$src)>;
1610 def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1611           (VBROADCASTI32X4Z256rm addr:$src)>;
1612 def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1613           (VBROADCASTI32X4Z256rm addr:$src)>;
1614 def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1615           (VBROADCASTI32X4Z256rm addr:$src)>;
1616 def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1617           (VBROADCASTI32X4Z256rm addr:$src)>;
1619 // Patterns for selects of bitcasted operations.
1620 def : Pat<(vselect_mask VK8WM:$mask,
1621                         (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1622                         (v8f32 immAllZerosV)),
1623           (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1624 def : Pat<(vselect_mask VK8WM:$mask,
1625                         (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1626                         VR256X:$src0),
1627           (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1628 def : Pat<(vselect_mask VK8WM:$mask,
1629                         (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1630                         (v8i32 immAllZerosV)),
1631           (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1632 def : Pat<(vselect_mask VK8WM:$mask,
1633                         (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1634                         VR256X:$src0),
1635           (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1638 let Predicates = [HasVLX, HasDQI] in {
1639 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1640                            X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
1641                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1642 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1643                            X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
1644                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1646 // Patterns for selects of bitcasted operations.
1647 def : Pat<(vselect_mask VK4WM:$mask,
1648                         (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1649                         (v4f64 immAllZerosV)),
1650           (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1651 def : Pat<(vselect_mask VK4WM:$mask,
1652                         (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1653                         VR256X:$src0),
1654           (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1655 def : Pat<(vselect_mask VK4WM:$mask,
1656                         (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1657                         (v4i64 immAllZerosV)),
1658           (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1659 def : Pat<(vselect_mask VK4WM:$mask,
1660                         (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1661                         VR256X:$src0),
1662           (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1665 let Predicates = [HasDQI] in {
1666 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1667                        X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
1668                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1669 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1670                        X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1671                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1672 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1673                        X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
1674                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1675 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1676                        X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1677                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1679 // Patterns for selects of bitcasted operations.
1680 def : Pat<(vselect_mask VK16WM:$mask,
1681                         (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1682                         (v16f32 immAllZerosV)),
1683           (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1684 def : Pat<(vselect_mask VK16WM:$mask,
1685                         (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1686                         VR512:$src0),
1687           (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1688 def : Pat<(vselect_mask VK16WM:$mask,
1689                         (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1690                         (v16i32 immAllZerosV)),
1691           (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1692 def : Pat<(vselect_mask VK16WM:$mask,
1693                         (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1694                         VR512:$src0),
1695           (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1697 def : Pat<(vselect_mask VK8WM:$mask,
1698                         (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1699                         (v8f64 immAllZerosV)),
1700           (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1701 def : Pat<(vselect_mask VK8WM:$mask,
1702                         (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1703                         VR512:$src0),
1704           (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1705 def : Pat<(vselect_mask VK8WM:$mask,
1706                         (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1707                         (v8i64 immAllZerosV)),
1708           (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1709 def : Pat<(vselect_mask VK8WM:$mask,
1710                         (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1711                         VR512:$src0),
1712           (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1715 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1716                                         AVX512VLVectorVTInfo _Dst,
1717                                         AVX512VLVectorVTInfo _Src> {
1718   let Predicates = [HasDQI] in
1719     defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1720                                           WriteShuffle256Ld, _Dst.info512,
1721                                           _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1722                                           EVEX_V512;
1723   let Predicates = [HasDQI, HasVLX] in
1724     defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1725                                           WriteShuffle256Ld, _Dst.info256,
1726                                           _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1727                                           EVEX_V256;
1730 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1731                                          AVX512VLVectorVTInfo _Dst,
1732                                          AVX512VLVectorVTInfo _Src> :
1733   avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1735   let Predicates = [HasDQI, HasVLX] in
1736     defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1737                                           WriteShuffleXLd, _Dst.info128,
1738                                           _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1739                                           EVEX_V128;
1742 defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1743                                           avx512vl_i32_info, avx512vl_i64_info>;
1744 defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1745                                           avx512vl_f32_info, avx512vl_f64_info>;
1747 //===----------------------------------------------------------------------===//
1748 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1749 //---
1750 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1751                                   X86VectorVTInfo _, RegisterClass KRC> {
1752   def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1753                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1754                   [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1755                   EVEX, Sched<[WriteShuffle]>;
1758 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1759                                  AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1760   let Predicates = [HasCDI] in
1761     defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1762   let Predicates = [HasCDI, HasVLX] in {
1763     defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1764     defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1765   }
1768 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1769                                                avx512vl_i32_info, VK16>;
1770 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1771                                                avx512vl_i64_info, VK8>, REX_W;
1773 //===----------------------------------------------------------------------===//
1774 // -- VPERMI2 - 3 source operands form --
1775 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1776                          X86FoldableSchedWrite sched,
1777                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1778 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1779     hasSideEffects = 0 in {
1780   defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1781           (ins _.RC:$src2, _.RC:$src3),
1782           OpcodeStr, "$src3, $src2", "$src2, $src3",
1783           (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1784           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1786   let mayLoad = 1 in
1787   defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1788             (ins _.RC:$src2, _.MemOp:$src3),
1789             OpcodeStr, "$src3, $src2", "$src2, $src3",
1790             (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1791                    (_.VT (_.LdFrag addr:$src3)))), 1>,
1792             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1793   }
1796 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1797                             X86FoldableSchedWrite sched,
1798                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1799   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1800       hasSideEffects = 0, mayLoad = 1 in
1801   defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1802               (ins _.RC:$src2, _.ScalarMemOp:$src3),
1803               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1804               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1805               (_.VT (X86VPermt2 _.RC:$src2,
1806                IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1807               AVX5128IBase, EVEX_4V, EVEX_B,
1808               Sched<[sched.Folded, sched.ReadAfterFold]>;
1811 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1812                                X86FoldableSchedWrite sched,
1813                                AVX512VLVectorVTInfo VTInfo,
1814                                AVX512VLVectorVTInfo ShuffleMask> {
1815   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1816                            ShuffleMask.info512>,
1817             avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1818                              ShuffleMask.info512>, EVEX_V512;
1819   let Predicates = [HasVLX] in {
1820   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1821                                ShuffleMask.info128>,
1822                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1823                                   ShuffleMask.info128>, EVEX_V128;
1824   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1825                                ShuffleMask.info256>,
1826                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1827                                   ShuffleMask.info256>, EVEX_V256;
1828   }
1831 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1832                                   X86FoldableSchedWrite sched,
1833                                   AVX512VLVectorVTInfo VTInfo,
1834                                   AVX512VLVectorVTInfo Idx,
1835                                   Predicate Prd> {
1836   let Predicates = [Prd] in
1837   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1838                            Idx.info512>, EVEX_V512;
1839   let Predicates = [Prd, HasVLX] in {
1840   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1841                                Idx.info128>, EVEX_V128;
1842   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1843                                Idx.info256>,  EVEX_V256;
1844   }
1847 defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1848                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1849 defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1850                   avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1851 defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1852                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1853                   REX_W, EVEX_CD8<16, CD8VF>;
1854 defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1855                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1856                   EVEX_CD8<8, CD8VF>;
1857 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1858                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1859 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1860                   avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1862 // Extra patterns to deal with extra bitcasts due to passthru and index being
1863 // different types on the fp versions.
1864 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1865                                   X86VectorVTInfo IdxVT,
1866                                   X86VectorVTInfo CastVT> {
1867   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1868                                 (X86VPermt2 (_.VT _.RC:$src2),
1869                                             (IdxVT.VT (bitconvert
1870                                                        (CastVT.VT _.RC:$src1))),
1871                                             _.RC:$src3),
1872                                 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1873             (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1874                                                 _.RC:$src2, _.RC:$src3)>;
1875   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1876                                 (X86VPermt2 _.RC:$src2,
1877                                             (IdxVT.VT (bitconvert
1878                                                        (CastVT.VT _.RC:$src1))),
1879                                             (_.LdFrag addr:$src3)),
1880                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1881             (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1882                                                 _.RC:$src2, addr:$src3)>;
1883   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1884                                  (X86VPermt2 _.RC:$src2,
1885                                              (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1886                                              (_.BroadcastLdFrag addr:$src3)),
1887                                  (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1888             (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1889                                                  _.RC:$src2, addr:$src3)>;
1892 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1893 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1894 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1895 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1897 // VPERMT2
1898 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1899                          X86FoldableSchedWrite sched,
1900                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1901 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1902   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1903           (ins IdxVT.RC:$src2, _.RC:$src3),
1904           OpcodeStr, "$src3, $src2", "$src2, $src3",
1905           (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1906           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1908   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1909             (ins IdxVT.RC:$src2, _.MemOp:$src3),
1910             OpcodeStr, "$src3, $src2", "$src2, $src3",
1911             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1912                    (_.LdFrag addr:$src3))), 1>,
1913             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1914   }
1916 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1917                             X86FoldableSchedWrite sched,
1918                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1919   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1920   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1921               (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1922               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1923               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1924               (_.VT (X86VPermt2 _.RC:$src1,
1925                IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1926               AVX5128IBase, EVEX_4V, EVEX_B,
1927               Sched<[sched.Folded, sched.ReadAfterFold]>;
1930 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1931                                X86FoldableSchedWrite sched,
1932                                AVX512VLVectorVTInfo VTInfo,
1933                                AVX512VLVectorVTInfo ShuffleMask> {
1934   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1935                               ShuffleMask.info512>,
1936             avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1937                               ShuffleMask.info512>, EVEX_V512;
1938   let Predicates = [HasVLX] in {
1939   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1940                               ShuffleMask.info128>,
1941                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1942                               ShuffleMask.info128>, EVEX_V128;
1943   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1944                               ShuffleMask.info256>,
1945                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1946                               ShuffleMask.info256>, EVEX_V256;
1947   }
1950 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1951                                   X86FoldableSchedWrite sched,
1952                                   AVX512VLVectorVTInfo VTInfo,
1953                                   AVX512VLVectorVTInfo Idx, Predicate Prd> {
1954   let Predicates = [Prd] in
1955   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1956                            Idx.info512>, EVEX_V512;
1957   let Predicates = [Prd, HasVLX] in {
1958   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1959                                Idx.info128>, EVEX_V128;
1960   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1961                                Idx.info256>, EVEX_V256;
1962   }
1965 defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1966                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1967 defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1968                   avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1969 defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1970                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1971                   REX_W, EVEX_CD8<16, CD8VF>;
1972 defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1973                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1974                   EVEX_CD8<8, CD8VF>;
1975 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1976                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1977 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1978                   avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1980 //===----------------------------------------------------------------------===//
1981 // AVX-512 - BLEND using mask
1984 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1985                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1986   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1987   def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1988              (ins _.RC:$src1, _.RC:$src2),
1989              !strconcat(OpcodeStr,
1990              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1991              EVEX_4V, Sched<[sched]>;
1992   def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1993              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1994              !strconcat(OpcodeStr,
1995              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1996              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1997   def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1998              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1999              !strconcat(OpcodeStr,
2000              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
2001              []>, EVEX_4V, EVEX_KZ, Sched<[sched]>;
2002   let mayLoad = 1 in {
2003   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2004              (ins _.RC:$src1, _.MemOp:$src2),
2005              !strconcat(OpcodeStr,
2006              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
2007              []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
2008              Sched<[sched.Folded, sched.ReadAfterFold]>;
2009   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2010              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2011              !strconcat(OpcodeStr,
2012              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
2013              []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
2014              Sched<[sched.Folded, sched.ReadAfterFold]>;
2015   def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2016              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2017              !strconcat(OpcodeStr,
2018              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
2019              []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
2020              Sched<[sched.Folded, sched.ReadAfterFold]>;
2021   }
2022   }
2024 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
2025                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2026   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
2027   def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2028       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2029        !strconcat(OpcodeStr,
2030             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2031             "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2032       EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2033       Sched<[sched.Folded, sched.ReadAfterFold]>;
2035   def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2036       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2037        !strconcat(OpcodeStr,
2038             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2039             "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2040       EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2041       Sched<[sched.Folded, sched.ReadAfterFold]>;
2043   def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2044       (ins _.RC:$src1, _.ScalarMemOp:$src2),
2045        !strconcat(OpcodeStr,
2046             "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2047             "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2048       EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2049       Sched<[sched.Folded, sched.ReadAfterFold]>;
2050   }
2053 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2054                         AVX512VLVectorVTInfo VTInfo> {
2055   defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2056            WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2057                                  EVEX_V512;
2059   let Predicates = [HasVLX] in {
2060     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2061                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2062                                       EVEX_V256;
2063     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2064                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2065                                       EVEX_V128;
2066   }
2069 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2070                         AVX512VLVectorVTInfo VTInfo> {
2071   let Predicates = [HasBWI] in
2072     defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2073                                EVEX_V512;
2075   let Predicates = [HasBWI, HasVLX] in {
2076     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2077                                   EVEX_V256;
2078     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2079                                   EVEX_V128;
2080   }
2083 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2084                               avx512vl_f32_info>;
2085 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2086                               avx512vl_f64_info>, REX_W;
2087 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2088                               avx512vl_i32_info>;
2089 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2090                               avx512vl_i64_info>, REX_W;
2091 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2092                               avx512vl_i8_info>;
2093 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2094                               avx512vl_i16_info>, REX_W;
2096 //===----------------------------------------------------------------------===//
2097 // Compare Instructions
2098 //===----------------------------------------------------------------------===//
2100 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2102 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2103                              PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2104                              X86FoldableSchedWrite sched> {
2105   defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2106                       (outs _.KRC:$dst),
2107                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2108                       "vcmp"#_.Suffix,
2109                       "$cc, $src2, $src1", "$src1, $src2, $cc",
2110                       (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2111                       (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2112                                  timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2113   let mayLoad = 1 in
2114   defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2115                     (outs _.KRC:$dst),
2116                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2117                     "vcmp"#_.Suffix,
2118                     "$cc, $src2, $src1", "$src1, $src2, $cc",
2119                     (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2120                         timm:$cc),
2121                     (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2122                         timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2123                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2125   let Uses = [MXCSR] in
2126   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2127                      (outs _.KRC:$dst),
2128                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2129                      "vcmp"#_.Suffix,
2130                      "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2131                      (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2132                                 timm:$cc),
2133                      (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2134                                    timm:$cc)>,
2135                      EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2137   let isCodeGenOnly = 1 in {
2138     let isCommutable = 1 in
2139     def rr : AVX512Ii8<0xC2, MRMSrcReg,
2140                 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2141                 !strconcat("vcmp", _.Suffix,
2142                            "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2143                 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2144                                           _.FRC:$src2,
2145                                           timm:$cc))]>,
2146                 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2147     def rm : AVX512Ii8<0xC2, MRMSrcMem,
2148               (outs _.KRC:$dst),
2149               (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2150               !strconcat("vcmp", _.Suffix,
2151                          "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2152               [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2153                                         (_.ScalarLdFrag addr:$src2),
2154                                         timm:$cc))]>,
2155               EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2156               Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2157   }
2160 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2161                           (X86cmpms node:$src1, node:$src2, node:$cc), [{
2162   return N->hasOneUse();
2163 }]>;
2164 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2165                           (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2166   return N->hasOneUse();
2167 }]>;
2169 let Predicates = [HasAVX512] in {
2170   let ExeDomain = SSEPackedSingle in
2171   defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2172                                    X86cmpms_su, X86cmpmsSAE_su,
2173                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2174   let ExeDomain = SSEPackedDouble in
2175   defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2176                                    X86cmpms_su, X86cmpmsSAE_su,
2177                                    SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W;
2179 let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2180   defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2181                                    X86cmpms_su, X86cmpmsSAE_su,
2182                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2184 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2185                               X86FoldableSchedWrite sched,
2186                               X86VectorVTInfo _, bit IsCommutable> {
2187   let isCommutable = IsCommutable, hasSideEffects = 0 in
2188   def rr : AVX512BI<opc, MRMSrcReg,
2189              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2190              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2191              []>, EVEX_4V, Sched<[sched]>;
2192   let mayLoad = 1, hasSideEffects = 0 in
2193   def rm : AVX512BI<opc, MRMSrcMem,
2194              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2195              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2196              []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2197   let isCommutable = IsCommutable, hasSideEffects = 0 in
2198   def rrk : AVX512BI<opc, MRMSrcReg,
2199               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2200               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2201                           "$dst {${mask}}, $src1, $src2}"),
2202               []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2203   let mayLoad = 1, hasSideEffects = 0 in
2204   def rmk : AVX512BI<opc, MRMSrcMem,
2205               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2206               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2207                           "$dst {${mask}}, $src1, $src2}"),
2208               []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2211 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2212                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
2213                                   bit IsCommutable> :
2214            avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2215   let mayLoad = 1, hasSideEffects = 0 in {
2216   def rmb : AVX512BI<opc, MRMSrcMem,
2217               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2218               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2219                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2220               []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2221   def rmbk : AVX512BI<opc, MRMSrcMem,
2222                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2223                                        _.ScalarMemOp:$src2),
2224                !strconcat(OpcodeStr,
2225                           "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2226                           "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2227                []>, EVEX_4V, EVEX_K, EVEX_B,
2228                Sched<[sched.Folded, sched.ReadAfterFold]>;
2229   }
2232 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2233                                  X86SchedWriteWidths sched,
2234                                  AVX512VLVectorVTInfo VTInfo, Predicate prd,
2235                                  bit IsCommutable = 0> {
2236   let Predicates = [prd] in
2237   defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2238                               VTInfo.info512, IsCommutable>, EVEX_V512;
2240   let Predicates = [prd, HasVLX] in {
2241     defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2242                                    VTInfo.info256, IsCommutable>, EVEX_V256;
2243     defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2244                                    VTInfo.info128, IsCommutable>, EVEX_V128;
2245   }
2248 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2249                                      X86SchedWriteWidths sched,
2250                                      AVX512VLVectorVTInfo VTInfo,
2251                                      Predicate prd, bit IsCommutable = 0> {
2252   let Predicates = [prd] in
2253   defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2254                                   VTInfo.info512, IsCommutable>, EVEX_V512;
2256   let Predicates = [prd, HasVLX] in {
2257     defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2258                                        VTInfo.info256, IsCommutable>, EVEX_V256;
2259     defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2260                                        VTInfo.info128, IsCommutable>, EVEX_V128;
2261   }
2264 // This fragment treats X86cmpm as commutable to help match loads in both
2265 // operands for PCMPEQ.
2266 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2267 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2268                          (setcc node:$src1, node:$src2, SETGT)>;
2270 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2271 // increase the pattern complexity the way an immediate would.
2272 let AddedComplexity = 2 in {
2273 // FIXME: Is there a better scheduler class for VPCMP?
2274 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2275                       SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2276                 EVEX_CD8<8, CD8VF>, WIG;
2278 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2279                       SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2280                 EVEX_CD8<16, CD8VF>, WIG;
2282 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2283                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2284                 EVEX_CD8<32, CD8VF>;
2286 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2287                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2288                 T8PD, REX_W, EVEX_CD8<64, CD8VF>;
2290 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2291                       SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2292                 EVEX_CD8<8, CD8VF>, WIG;
2294 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2295                       SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2296                 EVEX_CD8<16, CD8VF>, WIG;
2298 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2299                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2300                 EVEX_CD8<32, CD8VF>;
2302 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2303                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2304                 T8PD, REX_W, EVEX_CD8<64, CD8VF>;
2307 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2308   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2309   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2310   return getI8Imm(SSECC, SDLoc(N));
2311 }]>;
2313 // Swapped operand version of the above.
2314 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2315   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2316   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2317   SSECC = X86::getSwappedVPCMPImm(SSECC);
2318   return getI8Imm(SSECC, SDLoc(N));
2319 }]>;
2321 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2322                           PatFrag Frag_su,
2323                           X86FoldableSchedWrite sched,
2324                           X86VectorVTInfo _, string Name> {
2325   let isCommutable = 1 in
2326   def rri : AVX512AIi8<opc, MRMSrcReg,
2327              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2328              !strconcat("vpcmp", Suffix,
2329                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2330              [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2331                                                 (_.VT _.RC:$src2),
2332                                                 cond)))]>,
2333              EVEX_4V, Sched<[sched]>;
2334   def rmi : AVX512AIi8<opc, MRMSrcMem,
2335              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2336              !strconcat("vpcmp", Suffix,
2337                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2338              [(set _.KRC:$dst, (_.KVT
2339                                 (Frag:$cc
2340                                  (_.VT _.RC:$src1),
2341                                  (_.VT (_.LdFrag addr:$src2)),
2342                                  cond)))]>,
2343              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2344   let isCommutable = 1 in
2345   def rrik : AVX512AIi8<opc, MRMSrcReg,
2346               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2347                                       u8imm:$cc),
2348               !strconcat("vpcmp", Suffix,
2349                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2350                          "$dst {${mask}}, $src1, $src2, $cc}"),
2351               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2352                                      (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2353                                                          (_.VT _.RC:$src2),
2354                                                          cond))))]>,
2355               EVEX_4V, EVEX_K, Sched<[sched]>;
2356   def rmik : AVX512AIi8<opc, MRMSrcMem,
2357               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2358                                     u8imm:$cc),
2359               !strconcat("vpcmp", Suffix,
2360                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2361                          "$dst {${mask}}, $src1, $src2, $cc}"),
2362               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2363                                      (_.KVT
2364                                       (Frag_su:$cc
2365                                        (_.VT _.RC:$src1),
2366                                        (_.VT (_.LdFrag addr:$src2)),
2367                                        cond))))]>,
2368               EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2370   def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2371                              (_.VT _.RC:$src1), cond)),
2372             (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2373              _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2375   def : Pat<(and _.KRCWM:$mask,
2376                  (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2377                                      (_.VT _.RC:$src1), cond))),
2378             (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2379              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2380              (X86pcmpm_imm_commute $cc))>;
2383 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2384                               PatFrag Frag_su, X86FoldableSchedWrite sched,
2385                               X86VectorVTInfo _, string Name> :
2386            avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2387   def rmib : AVX512AIi8<opc, MRMSrcMem,
2388              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2389                                      u8imm:$cc),
2390              !strconcat("vpcmp", Suffix,
2391                         "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2392                         "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2393              [(set _.KRC:$dst, (_.KVT (Frag:$cc
2394                                        (_.VT _.RC:$src1),
2395                                        (_.BroadcastLdFrag addr:$src2),
2396                                        cond)))]>,
2397              EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2398   def rmibk : AVX512AIi8<opc, MRMSrcMem,
2399               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2400                                        _.ScalarMemOp:$src2, u8imm:$cc),
2401               !strconcat("vpcmp", Suffix,
2402                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2403                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2404               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2405                                      (_.KVT (Frag_su:$cc
2406                                              (_.VT _.RC:$src1),
2407                                              (_.BroadcastLdFrag addr:$src2),
2408                                              cond))))]>,
2409               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2411   def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2412                     (_.VT _.RC:$src1), cond)),
2413             (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2414              _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2416   def : Pat<(and _.KRCWM:$mask,
2417                  (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2418                                      (_.VT _.RC:$src1), cond))),
2419             (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2420              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2421              (X86pcmpm_imm_commute $cc))>;
2424 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2425                              PatFrag Frag_su, X86SchedWriteWidths sched,
2426                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2427   let Predicates = [prd] in
2428   defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2429                           sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2431   let Predicates = [prd, HasVLX] in {
2432     defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2433                                sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2434     defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2435                                sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2436   }
2439 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2440                                  PatFrag Frag_su, X86SchedWriteWidths sched,
2441                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2442   let Predicates = [prd] in
2443   defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2444                               sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2446   let Predicates = [prd, HasVLX] in {
2447     defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2448                                    sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2449     defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2450                                    sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2451   }
2454 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2455                        (setcc node:$src1, node:$src2, node:$cc), [{
2456   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2457   return !ISD::isUnsignedIntSetCC(CC);
2458 }], X86pcmpm_imm>;
2460 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2461                           (setcc node:$src1, node:$src2, node:$cc), [{
2462   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2463   return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2464 }], X86pcmpm_imm>;
2466 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2467                         (setcc node:$src1, node:$src2, node:$cc), [{
2468   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2469   return ISD::isUnsignedIntSetCC(CC);
2470 }], X86pcmpm_imm>;
2472 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2473                            (setcc node:$src1, node:$src2, node:$cc), [{
2474   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2475   return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2476 }], X86pcmpm_imm>;
2478 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2479 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2480                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2481                                 EVEX_CD8<8, CD8VF>;
2482 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2483                                  SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2484                                  EVEX_CD8<8, CD8VF>;
2486 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2487                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2488                                 REX_W, EVEX_CD8<16, CD8VF>;
2489 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2490                                  SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2491                                  REX_W, EVEX_CD8<16, CD8VF>;
2493 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2494                                     SchedWriteVecALU, avx512vl_i32_info,
2495                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2496 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2497                                      SchedWriteVecALU, avx512vl_i32_info,
2498                                      HasAVX512>, EVEX_CD8<32, CD8VF>;
2500 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2501                                     SchedWriteVecALU, avx512vl_i64_info,
2502                                     HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2503 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2504                                      SchedWriteVecALU, avx512vl_i64_info,
2505                                      HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2507 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2508                          (X86cmpm node:$src1, node:$src2, node:$cc), [{
2509   return N->hasOneUse();
2510 }]>;
2512 def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2513   uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2514   return getI8Imm(Imm, SDLoc(N));
2515 }]>;
2517 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2518                               string Name> {
2519 let Uses = [MXCSR], mayRaiseFPException = 1 in {
2520   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2521                    (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2522                    "vcmp"#_.Suffix,
2523                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2524                    (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2525                    (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2526                    1>, Sched<[sched]>;
2528   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2529                 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2530                 "vcmp"#_.Suffix,
2531                 "$cc, $src2, $src1", "$src1, $src2, $cc",
2532                 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2533                              timm:$cc),
2534                 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2535                             timm:$cc)>,
2536                 Sched<[sched.Folded, sched.ReadAfterFold]>;
2538   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2539                 (outs _.KRC:$dst),
2540                 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2541                 "vcmp"#_.Suffix,
2542                 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2543                 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2544                 (X86any_cmpm (_.VT _.RC:$src1),
2545                              (_.VT (_.BroadcastLdFrag addr:$src2)),
2546                              timm:$cc),
2547                 (X86cmpm_su (_.VT _.RC:$src1),
2548                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2549                             timm:$cc)>,
2550                 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2551   }
2553   // Patterns for selecting with loads in other operand.
2554   def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2555                          timm:$cc),
2556             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2557                                                       (X86cmpm_imm_commute timm:$cc))>;
2559   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2560                                             (_.VT _.RC:$src1),
2561                                             timm:$cc)),
2562             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2563                                                        _.RC:$src1, addr:$src2,
2564                                                        (X86cmpm_imm_commute timm:$cc))>;
2566   def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2567                          (_.VT _.RC:$src1), timm:$cc),
2568             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2569                                                        (X86cmpm_imm_commute timm:$cc))>;
2571   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2572                                             (_.VT _.RC:$src1),
2573                                             timm:$cc)),
2574             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2575                                                         _.RC:$src1, addr:$src2,
2576                                                         (X86cmpm_imm_commute timm:$cc))>;
2578   // Patterns for mask intrinsics.
2579   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2580                       (_.KVT immAllOnesV)),
2581             (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2583   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2584             (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2585                                                        _.RC:$src2, timm:$cc)>;
2587   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2588                       (_.KVT immAllOnesV)),
2589             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2591   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2592                       _.KRCWM:$mask),
2593             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2594                                                        addr:$src2, timm:$cc)>;
2596   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2597                       (_.KVT immAllOnesV)),
2598             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2600   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2601                       _.KRCWM:$mask),
2602             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2603                                                         addr:$src2, timm:$cc)>;
2605   // Patterns for mask intrinsics with loads in other operand.
2606   def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2607                       (_.KVT immAllOnesV)),
2608             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2609                                                       (X86cmpm_imm_commute timm:$cc))>;
2611   def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2612                       _.KRCWM:$mask),
2613             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2614                                                        _.RC:$src1, addr:$src2,
2615                                                        (X86cmpm_imm_commute timm:$cc))>;
2617   def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2618                       (_.KVT immAllOnesV)),
2619             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2620                                                        (X86cmpm_imm_commute timm:$cc))>;
2622   def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2623                       _.KRCWM:$mask),
2624             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2625                                                         _.RC:$src1, addr:$src2,
2626                                                         (X86cmpm_imm_commute  timm:$cc))>;
2629 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2630   // comparison code form (VCMP[EQ/LT/LE/...]
2631   let Uses = [MXCSR] in
2632   defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2633                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2634                      (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2635                      "vcmp"#_.Suffix,
2636                      "$cc, {sae}, $src2, $src1",
2637                      "$src1, $src2, {sae}, $cc",
2638                      [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2639                                         (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2640                      [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2641                                         (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2642                      EVEX_B, Sched<[sched]>;
2645 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2646                        Predicate Pred = HasAVX512> {
2647   let Predicates = [Pred] in {
2648     defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2649                 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2651   }
2652   let Predicates = [Pred,HasVLX] in {
2653    defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2654    defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2655   }
2658 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2659                           AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
2660 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2661                           AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2662 defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2663                           AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;
2665 // Patterns to select fp compares with load as first operand.
2666 let Predicates = [HasAVX512] in {
2667   def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2668             (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2670   def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2671             (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2674 let Predicates = [HasFP16] in {
2675   def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2676             (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2679 // ----------------------------------------------------------------
2680 // FPClass
2682 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2683                               (X86Vfpclasss node:$src1, node:$src2), [{
2684   return N->hasOneUse();
2685 }]>;
2687 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2688                              (X86Vfpclass node:$src1, node:$src2), [{
2689   return N->hasOneUse();
2690 }]>;
2692 //handle fpclass instruction  mask =  op(reg_scalar,imm)
2693 //                                    op(mem_scalar,imm)
2694 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2695                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2696                                  Predicate prd> {
2697   let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2698       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2699                       (ins _.RC:$src1, i32u8imm:$src2),
2700                       OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2701                       [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2702                               (i32 timm:$src2)))]>,
2703                       Sched<[sched]>;
2704       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2705                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2706                       OpcodeStr#_.Suffix#
2707                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2708                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2709                                       (X86Vfpclasss_su (_.VT _.RC:$src1),
2710                                       (i32 timm:$src2))))]>,
2711                       EVEX_K, Sched<[sched]>;
2712     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2713                     (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2714                     OpcodeStr#_.Suffix#
2715                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2716                     [(set _.KRC:$dst,
2717                           (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2718                                         (i32 timm:$src2)))]>,
2719                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2720     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2721                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2722                     OpcodeStr#_.Suffix#
2723                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2724                     [(set _.KRC:$dst,(and _.KRCWM:$mask,
2725                         (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2726                             (i32 timm:$src2))))]>,
2727                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2728   }
2731 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2732 //                                  fpclass(reg_vec, mem_vec, imm)
2733 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
2734 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2735                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2736                                  string mem>{
2737   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2738   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2739                       (ins _.RC:$src1, i32u8imm:$src2),
2740                       OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2741                       [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2742                                        (i32 timm:$src2)))]>,
2743                       Sched<[sched]>;
2744   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2745                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2746                       OpcodeStr#_.Suffix#
2747                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2748                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2749                                        (X86Vfpclass_su (_.VT _.RC:$src1),
2750                                        (i32 timm:$src2))))]>,
2751                       EVEX_K, Sched<[sched]>;
2752   def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2753                     (ins _.MemOp:$src1, i32u8imm:$src2),
2754                     OpcodeStr#_.Suffix#"{"#mem#"}"#
2755                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2756                     [(set _.KRC:$dst,(X86Vfpclass
2757                                      (_.VT (_.LdFrag addr:$src1)),
2758                                      (i32 timm:$src2)))]>,
2759                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2760   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2761                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2762                     OpcodeStr#_.Suffix#"{"#mem#"}"#
2763                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2764                     [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2765                                   (_.VT (_.LdFrag addr:$src1)),
2766                                   (i32 timm:$src2))))]>,
2767                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2768   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2769                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2770                     OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2771                                       _.BroadcastStr#", $dst|$dst, ${src1}"
2772                                                   #_.BroadcastStr#", $src2}",
2773                     [(set _.KRC:$dst,(X86Vfpclass
2774                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2775                                      (i32 timm:$src2)))]>,
2776                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2777   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2778                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2779                     OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2780                           _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2781                                                    _.BroadcastStr#", $src2}",
2782                     [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2783                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2784                                      (i32 timm:$src2))))]>,
2785                     EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2786   }
2788   // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2789   // the memory form.
2790   def : InstAlias<OpcodeStr#_.Suffix#mem#
2791                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2792                   (!cast<Instruction>(NAME#"rr")
2793                    _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2794   def : InstAlias<OpcodeStr#_.Suffix#mem#
2795                   "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2796                   (!cast<Instruction>(NAME#"rrk")
2797                    _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2798   def : InstAlias<OpcodeStr#_.Suffix#mem#
2799                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2800                   _.BroadcastStr#", $src2}",
2801                   (!cast<Instruction>(NAME#"rmb")
2802                    _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2803   def : InstAlias<OpcodeStr#_.Suffix#mem#
2804                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2805                   "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2806                   (!cast<Instruction>(NAME#"rmbk")
2807                    _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2810 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2811                                      bits<8> opc, X86SchedWriteWidths sched,
2812                                      Predicate prd>{
2813   let Predicates = [prd] in {
2814     defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2815                                       _.info512, "z">, EVEX_V512;
2816   }
2817   let Predicates = [prd, HasVLX] in {
2818     defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2819                                       _.info128, "x">, EVEX_V128;
2820     defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2821                                       _.info256, "y">, EVEX_V256;
2822   }
2825 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2826                                  bits<8> opcScalar, X86SchedWriteWidths sched> {
2827   defm PH : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f16_info, opcVec,
2828                                       sched, HasFP16>,
2829                                       EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2830   defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2831                                    sched.Scl, f16x_info, HasFP16>,
2832                                    EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2833   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2834                                       sched, HasDQI>,
2835                                       EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2836   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2837                                       sched, HasDQI>,
2838                                       EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W;
2839   defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2840                                    sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2841                                    EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2842   defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2843                                    sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2844                                    EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W;
2847 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2849 //-----------------------------------------------------------------
2850 // Mask register copy, including
2851 // - copy between mask registers
2852 // - load/store mask registers
2853 // - copy from GPR to mask register and vice versa
2855 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2856                          string OpcodeStr, RegisterClass KRC,
2857                          ValueType vvt, X86MemOperand x86memop> {
2858   let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2859   def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2860              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2861              Sched<[WriteMove]>;
2862   def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2863              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2864              [(set KRC:$dst, (vvt (load addr:$src)))]>,
2865              Sched<[WriteLoad]>;
2866   def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2867              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2868              [(store KRC:$src, addr:$dst)]>,
2869              Sched<[WriteStore]>;
2872 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2873                              string OpcodeStr,
2874                              RegisterClass KRC, RegisterClass GRC> {
2875   let hasSideEffects = 0 in {
2876     def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2877                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2878                Sched<[WriteMove]>;
2879     def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2880                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2881                Sched<[WriteMove]>;
2882   }
2885 let Predicates = [HasDQI] in
2886   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2887                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2888                VEX, PD;
2890 let Predicates = [HasAVX512] in
2891   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2892                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2893                VEX, PS;
2895 let Predicates = [HasBWI] in {
2896   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2897                VEX, PD, REX_W;
2898   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2899                VEX, XD;
2900   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2901                VEX, PS, REX_W;
2902   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2903                VEX, XD, REX_W;
2906 // GR from/to mask register
2907 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2908           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2909 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2910           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2911 def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2912           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2914 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2915           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2916 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2917           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2919 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2920           (KMOVWrk VK16:$src)>;
2921 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2922           (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2923 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2924           (COPY_TO_REGCLASS VK16:$src, GR32)>;
2925 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2926           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2928 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2929           (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2930 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2931           (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2932 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2933           (COPY_TO_REGCLASS VK8:$src, GR32)>;
2934 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2935           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2937 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2938           (COPY_TO_REGCLASS GR32:$src, VK32)>;
2939 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2940           (COPY_TO_REGCLASS VK32:$src, GR32)>;
2941 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2942           (COPY_TO_REGCLASS GR64:$src, VK64)>;
2943 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2944           (COPY_TO_REGCLASS VK64:$src, GR64)>;
2946 // Load/store kreg
2947 let Predicates = [HasDQI] in {
2948   def : Pat<(v1i1 (load addr:$src)),
2949             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2950   def : Pat<(v2i1 (load addr:$src)),
2951             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2952   def : Pat<(v4i1 (load addr:$src)),
2953             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2956 let Predicates = [HasAVX512] in {
2957   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2958             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2959   def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2960             (KMOVWkm addr:$src)>;
2963 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2964                          SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2965                                               SDTCVecEltisVT<1, i1>,
2966                                               SDTCisPtrTy<2>]>>;
2968 let Predicates = [HasAVX512] in {
2969   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2970     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2971               (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2973     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2974               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2976     def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2977               (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2979     def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2980               (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2981   }
2983   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2984   defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2985   defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2986   defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2987   defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2988   defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2989   defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2991   def : Pat<(insert_subvector (v16i1 immAllZerosV),
2992                               (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2993             (KMOVWkr (AND32ri
2994                       (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2995                       (i32 1)))>;
2998 // Mask unary operation
2999 // - KNOT
3000 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
3001                             RegisterClass KRC, SDPatternOperator OpNode,
3002                             X86FoldableSchedWrite sched, Predicate prd> {
3003   let Predicates = [prd] in
3004     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
3005                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3006                [(set KRC:$dst, (OpNode KRC:$src))]>,
3007                Sched<[sched]>;
3010 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
3011                                 SDPatternOperator OpNode,
3012                                 X86FoldableSchedWrite sched> {
3013   defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3014                             sched, HasDQI>, VEX, PD;
3015   defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3016                             sched, HasAVX512>, VEX, PS;
3017   defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3018                             sched, HasBWI>, VEX, PD, REX_W;
3019   defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3020                             sched, HasBWI>, VEX, PS, REX_W;
3023 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3024 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
3026 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
3027 let Predicates = [HasAVX512, NoDQI] in
3028 def : Pat<(vnot VK8:$src),
3029           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
3031 def : Pat<(vnot VK4:$src),
3032           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
3033 def : Pat<(vnot VK2:$src),
3034           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
3035 def : Pat<(vnot VK1:$src),
3036           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
3038 // Mask binary operation
3039 // - KAND, KANDN, KOR, KXNOR, KXOR
3040 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
3041                            RegisterClass KRC, SDPatternOperator OpNode,
3042                            X86FoldableSchedWrite sched, Predicate prd,
3043                            bit IsCommutable> {
3044   let Predicates = [prd], isCommutable = IsCommutable in
3045     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
3046                !strconcat(OpcodeStr,
3047                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3048                [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
3049                Sched<[sched]>;
3052 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
3053                                  SDPatternOperator OpNode,
3054                                  X86FoldableSchedWrite sched, bit IsCommutable,
3055                                  Predicate prdW = HasAVX512> {
3056   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3057                              sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3058   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3059                              sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3060   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3061                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PD;
3062   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3063                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PS;
3066 // These nodes use 'vnot' instead of 'not' to support vectors.
3067 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3068 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3070 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3071 defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3072 defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3073 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3074 defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3075 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3076 defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3078 multiclass avx512_binop_pat<SDPatternOperator VOpNode,
3079                             Instruction Inst> {
3080   // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3081   // for the DQI set, this type is legal and KxxxB instruction is used
3082   let Predicates = [NoDQI] in
3083   def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3084             (COPY_TO_REGCLASS
3085               (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3086                     (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3088   // All types smaller than 8 bits require conversion anyway
3089   def : Pat<(VOpNode VK1:$src1, VK1:$src2),
3090         (COPY_TO_REGCLASS (Inst
3091                            (COPY_TO_REGCLASS VK1:$src1, VK16),
3092                            (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3093   def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3094         (COPY_TO_REGCLASS (Inst
3095                            (COPY_TO_REGCLASS VK2:$src1, VK16),
3096                            (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3097   def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3098         (COPY_TO_REGCLASS (Inst
3099                            (COPY_TO_REGCLASS VK4:$src1, VK16),
3100                            (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3103 defm : avx512_binop_pat<and,   KANDWrr>;
3104 defm : avx512_binop_pat<vandn, KANDNWrr>;
3105 defm : avx512_binop_pat<or,    KORWrr>;
3106 defm : avx512_binop_pat<vxnor, KXNORWrr>;
3107 defm : avx512_binop_pat<xor,   KXORWrr>;
3109 // Mask unpacking
3110 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3111                              X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3112                              Predicate prd> {
3113   let Predicates = [prd] in {
3114     let hasSideEffects = 0 in
3115     def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3116                (ins Src.KRC:$src1, Src.KRC:$src2),
3117                "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3118                VEX_4V, VEX_L, Sched<[sched]>;
3120     def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3121               (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3122   }
3125 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3126 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3127 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, REX_W;
3129 // Mask bit testing
3130 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3131                               SDNode OpNode, X86FoldableSchedWrite sched,
3132                               Predicate prd> {
3133   let Predicates = [prd], Defs = [EFLAGS] in
3134     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3135                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3136                [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3137                Sched<[sched]>;
3140 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3141                                 X86FoldableSchedWrite sched,
3142                                 Predicate prdW = HasAVX512> {
3143   defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3144                                                                 VEX, PD;
3145   defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3146                                                                 VEX, PS;
3147   defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3148                                                                 VEX, PS, REX_W;
3149   defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3150                                                                 VEX, PD, REX_W;
3153 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3154 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3155 defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3157 // Mask shift
3158 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3159                                SDNode OpNode, X86FoldableSchedWrite sched> {
3160   let Predicates = [HasAVX512] in
3161     def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3162                  !strconcat(OpcodeStr,
3163                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3164                             [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3165                  Sched<[sched]>;
3168 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3169                                  SDNode OpNode, X86FoldableSchedWrite sched> {
3170   defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3171                                sched>, VEX, TAPD, REX_W;
3172   let Predicates = [HasDQI] in
3173   defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3174                                sched>, VEX, TAPD;
3175   let Predicates = [HasBWI] in {
3176   defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3177                                sched>, VEX, TAPD, REX_W;
3178   defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3179                                sched>, VEX, TAPD;
3180   }
3183 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3184 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3186 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3187 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3188                                                  string InstStr,
3189                                                  X86VectorVTInfo Narrow,
3190                                                  X86VectorVTInfo Wide> {
3191 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3192                                 (Narrow.VT Narrow.RC:$src2), cond)),
3193           (COPY_TO_REGCLASS
3194            (!cast<Instruction>(InstStr#"Zrri")
3195             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3196             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3197             (X86pcmpm_imm $cc)), Narrow.KRC)>;
3199 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3200                            (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3201                                                     (Narrow.VT Narrow.RC:$src2),
3202                                                     cond)))),
3203           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3204            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3205            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3206            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3207            (X86pcmpm_imm $cc)), Narrow.KRC)>;
3210 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3211                                                      string InstStr,
3212                                                      X86VectorVTInfo Narrow,
3213                                                      X86VectorVTInfo Wide> {
3214 // Broadcast load.
3215 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3216                                 (Narrow.BroadcastLdFrag addr:$src2), cond)),
3217           (COPY_TO_REGCLASS
3218            (!cast<Instruction>(InstStr#"Zrmib")
3219             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3220             addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3222 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3223                            (Narrow.KVT
3224                             (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3225                                          (Narrow.BroadcastLdFrag addr:$src2),
3226                                          cond)))),
3227           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3228            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3229            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3230            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3232 // Commuted with broadcast load.
3233 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3234                                 (Narrow.VT Narrow.RC:$src1),
3235                                 cond)),
3236           (COPY_TO_REGCLASS
3237            (!cast<Instruction>(InstStr#"Zrmib")
3238             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3239             addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3241 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3242                            (Narrow.KVT
3243                             (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3244                                          (Narrow.VT Narrow.RC:$src1),
3245                                          cond)))),
3246           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3247            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3248            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3249            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3252 // Same as above, but for fp types which don't use PatFrags.
3253 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3254                                                 X86VectorVTInfo Narrow,
3255                                                 X86VectorVTInfo Wide> {
3256 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3257                                (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3258           (COPY_TO_REGCLASS
3259            (!cast<Instruction>(InstStr#"Zrri")
3260             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3261             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3262             timm:$cc), Narrow.KRC)>;
3264 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3265                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3266                                        (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3267           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3268            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3269            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3270            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3271            timm:$cc), Narrow.KRC)>;
3273 // Broadcast load.
3274 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3275                                (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3276           (COPY_TO_REGCLASS
3277            (!cast<Instruction>(InstStr#"Zrmbi")
3278             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3279             addr:$src2, timm:$cc), Narrow.KRC)>;
3281 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3282                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3283                                        (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3284           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3285            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3286            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3287            addr:$src2, timm:$cc), Narrow.KRC)>;
3289 // Commuted with broadcast load.
3290 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3291                                (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3292           (COPY_TO_REGCLASS
3293            (!cast<Instruction>(InstStr#"Zrmbi")
3294             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3295             addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3297 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3298                            (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3299                                        (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3300           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3301            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3302            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3303            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3306 let Predicates = [HasAVX512, NoVLX] in {
3307   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3308   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3310   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3311   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3313   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3314   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3316   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3317   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3319   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3320   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3322   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3323   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3325   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3326   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3328   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3329   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3331   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3332   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3333   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3334   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3337 let Predicates = [HasBWI, NoVLX] in {
3338   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3339   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3341   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3342   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3344   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3345   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3347   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3348   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3351 // Mask setting all 0s or 1s
3352 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3353   let Predicates = [HasAVX512] in
3354     let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3355         SchedRW = [WriteZero] in
3356       def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3357                      [(set KRC:$dst, (VT Val))]>;
3360 multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3361   defm W : avx512_mask_setop<VK16, v16i1, Val>;
3362   defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3363   defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3366 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3367 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3369 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3370 let Predicates = [HasAVX512] in {
3371   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3372   def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3373   def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3374   def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3375   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3376   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3377   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3378   def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3381 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3382 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3383                                              RegisterClass RC, ValueType VT> {
3384   def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3385             (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3387   def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3388             (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3390 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3391 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3392 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3393 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3394 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3395 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3397 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3398 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3399 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3400 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3401 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3403 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3404 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3405 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3406 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3408 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3409 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3410 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3412 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3413 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3415 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3417 //===----------------------------------------------------------------------===//
3418 // AVX-512 - Aligned and unaligned load and store
3421 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3422                        X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3423                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3424                        bit NoRMPattern = 0,
3425                        SDPatternOperator SelectOprr = vselect> {
3426   let hasSideEffects = 0 in {
3427   let isMoveReg = 1 in
3428   def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3429                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3430                     _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3431                     EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3432   def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3433                       (ins _.KRCWM:$mask,  _.RC:$src),
3434                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3435                        "${dst} {${mask}} {z}, $src}"),
3436                        [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3437                                            (_.VT _.RC:$src),
3438                                            _.ImmAllZerosV)))], _.ExeDomain>,
3439                        EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3441   let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3442   def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3443                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3444                     !if(NoRMPattern, [],
3445                         [(set _.RC:$dst,
3446                           (_.VT (ld_frag addr:$src)))]),
3447                     _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3448                     EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3450   let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3451     def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3452                       (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3453                       !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3454                       "${dst} {${mask}}, $src1}"),
3455                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3456                                           (_.VT _.RC:$src1),
3457                                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3458                        EVEX, EVEX_K, Sched<[Sched.RR]>;
3459     def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3460                      (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3461                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3462                       "${dst} {${mask}}, $src1}"),
3463                      [(set _.RC:$dst, (_.VT
3464                          (vselect_mask _.KRCWM:$mask,
3465                           (_.VT (ld_frag addr:$src1)),
3466                            (_.VT _.RC:$src0))))], _.ExeDomain>,
3467                      EVEX, EVEX_K, Sched<[Sched.RM]>;
3468   }
3469   def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3470                   (ins _.KRCWM:$mask, _.MemOp:$src),
3471                   OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3472                                 "${dst} {${mask}} {z}, $src}",
3473                   [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3474                     (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3475                   _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3476   }
3477   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3478             (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3480   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3481             (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3483   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3484             (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3485              _.KRCWM:$mask, addr:$ptr)>;
3488 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3489                                  AVX512VLVectorVTInfo _, Predicate prd,
3490                                  X86SchedWriteMoveLSWidths Sched,
3491                                  string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3492   let Predicates = [prd] in
3493   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3494                        _.info512.AlignedLdFrag, masked_load_aligned,
3495                        Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3497   let Predicates = [prd, HasVLX] in {
3498   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3499                           _.info256.AlignedLdFrag, masked_load_aligned,
3500                           Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3501   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3502                           _.info128.AlignedLdFrag, masked_load_aligned,
3503                           Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3504   }
3507 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3508                           AVX512VLVectorVTInfo _, Predicate prd,
3509                           X86SchedWriteMoveLSWidths Sched,
3510                           string EVEX2VEXOvrd, bit NoRMPattern = 0,
3511                           SDPatternOperator SelectOprr = vselect> {
3512   let Predicates = [prd] in
3513   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3514                        masked_load, Sched.ZMM, "",
3515                        NoRMPattern, SelectOprr>, EVEX_V512;
3517   let Predicates = [prd, HasVLX] in {
3518   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3519                          masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3520                          NoRMPattern, SelectOprr>, EVEX_V256;
3521   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3522                          masked_load, Sched.XMM, EVEX2VEXOvrd,
3523                          NoRMPattern, SelectOprr>, EVEX_V128;
3524   }
3527 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3528                         X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3529                         X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3530                         bit NoMRPattern = 0> {
3531   let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3532   let isMoveReg = 1 in
3533   def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3534                          OpcodeStr # "\t{$src, $dst|$dst, $src}",
3535                          [], _.ExeDomain>, EVEX,
3536                          Sched<[Sched.RR]>,
3537                          EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3538   def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3539                          (ins _.KRCWM:$mask, _.RC:$src),
3540                          OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3541                          "${dst} {${mask}}, $src}",
3542                          [], _.ExeDomain>,  EVEX, EVEX_K,
3543                          Sched<[Sched.RR]>;
3544   def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3545                           (ins _.KRCWM:$mask, _.RC:$src),
3546                           OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3547                           "${dst} {${mask}} {z}, $src}",
3548                           [], _.ExeDomain>, EVEX, EVEX_KZ,
3549                           Sched<[Sched.RR]>;
3550   }
3552   let hasSideEffects = 0, mayStore = 1 in
3553   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3554                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3555                     !if(NoMRPattern, [],
3556                         [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3557                     _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3558                     EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3559   def mrk : AVX512PI<opc, MRMDestMem, (outs),
3560                      (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3561               OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3562                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>;
3564   def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3565            (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3566                                                         _.KRCWM:$mask, _.RC:$src)>;
3568   def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3569                   (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3570                    _.RC:$dst, _.RC:$src), 0>;
3571   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3572                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3573                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3574   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3575                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3576                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3579 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3580                             AVX512VLVectorVTInfo _, Predicate prd,
3581                             X86SchedWriteMoveLSWidths Sched,
3582                             string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3583   let Predicates = [prd] in
3584   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3585                         masked_store, Sched.ZMM, "",
3586                         NoMRPattern>, EVEX_V512;
3587   let Predicates = [prd, HasVLX] in {
3588     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3589                              masked_store, Sched.YMM,
3590                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3591     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3592                              masked_store, Sched.XMM, EVEX2VEXOvrd,
3593                              NoMRPattern>, EVEX_V128;
3594   }
3597 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3598                                   AVX512VLVectorVTInfo _, Predicate prd,
3599                                   X86SchedWriteMoveLSWidths Sched,
3600                                   string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3601   let Predicates = [prd] in
3602   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3603                         masked_store_aligned, Sched.ZMM, "",
3604                         NoMRPattern>, EVEX_V512;
3606   let Predicates = [prd, HasVLX] in {
3607     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3608                              masked_store_aligned, Sched.YMM,
3609                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3610     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3611                              masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3612                              NoMRPattern>, EVEX_V128;
3613   }
3616 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3617                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3618                avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3619                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3620                PS, EVEX_CD8<32, CD8VF>;
3622 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3623                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3624                avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3625                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3626                PD, REX_W, EVEX_CD8<64, CD8VF>;
3628 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3629                               SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3630                avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3631                                SchedWriteFMoveLS, "VMOVUPS">,
3632                                PS, EVEX_CD8<32, CD8VF>;
3634 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3635                               SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3636                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3637                                SchedWriteFMoveLS, "VMOVUPD">,
3638                PD, REX_W, EVEX_CD8<64, CD8VF>;
3640 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3641                                        HasAVX512, SchedWriteVecMoveLS,
3642                                        "VMOVDQA", 1>,
3643                  avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3644                                         HasAVX512, SchedWriteVecMoveLS,
3645                                         "VMOVDQA", 1>,
3646                  PD, EVEX_CD8<32, CD8VF>;
3648 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3649                                        HasAVX512, SchedWriteVecMoveLS,
3650                                        "VMOVDQA">,
3651                  avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3652                                         HasAVX512, SchedWriteVecMoveLS,
3653                                         "VMOVDQA">,
3654                  PD, REX_W, EVEX_CD8<64, CD8VF>;
3656 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3657                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3658                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3659                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3660                 XD, EVEX_CD8<8, CD8VF>;
3662 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3663                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3664                  avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3665                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3666                  XD, REX_W, EVEX_CD8<16, CD8VF>;
3668 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3669                                 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3670                  avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3671                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3672                  XS, EVEX_CD8<32, CD8VF>;
3674 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3675                                 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3676                  avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3677                                  SchedWriteVecMoveLS, "VMOVDQU">,
3678                  XS, REX_W, EVEX_CD8<64, CD8VF>;
3680 // Special instructions to help with spilling when we don't have VLX. We need
3681 // to load or store from a ZMM register instead. These are converted in
3682 // expandPostRAPseudos.
3683 let isReMaterializable = 1, canFoldAsLoad = 1,
3684     isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3685 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3686                             "", []>, Sched<[WriteFLoadX]>;
3687 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3688                             "", []>, Sched<[WriteFLoadY]>;
3689 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3690                             "", []>, Sched<[WriteFLoadX]>;
3691 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3692                             "", []>, Sched<[WriteFLoadY]>;
3695 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3696 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3697                             "", []>, Sched<[WriteFStoreX]>;
3698 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3699                             "", []>, Sched<[WriteFStoreY]>;
3700 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3701                             "", []>, Sched<[WriteFStoreX]>;
3702 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3703                             "", []>, Sched<[WriteFStoreY]>;
3706 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3707                           (v8i64 VR512:$src))),
3708    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3709                                               VK8), VR512:$src)>;
3711 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3712                            (v16i32 VR512:$src))),
3713                   (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3715 // These patterns exist to prevent the above patterns from introducing a second
3716 // mask inversion when one already exists.
3717 def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3718                           (v8i64 immAllZerosV),
3719                           (v8i64 VR512:$src))),
3720                  (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3721 def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3722                            (v16i32 immAllZerosV),
3723                            (v16i32 VR512:$src))),
3724                   (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3726 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3727                               X86VectorVTInfo Wide> {
3728  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3729                                Narrow.RC:$src1, Narrow.RC:$src0)),
3730            (EXTRACT_SUBREG
3731             (Wide.VT
3732              (!cast<Instruction>(InstrStr#"rrk")
3733               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3734               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3735               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3736             Narrow.SubRegIdx)>;
3738  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3739                                Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3740            (EXTRACT_SUBREG
3741             (Wide.VT
3742              (!cast<Instruction>(InstrStr#"rrkz")
3743               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3744               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3745             Narrow.SubRegIdx)>;
3748 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3749 // available. Use a 512-bit operation and extract.
3750 let Predicates = [HasAVX512, NoVLX] in {
3751   defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3752   defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3753   defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3754   defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3756   defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3757   defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3758   defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3759   defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3762 let Predicates = [HasBWI, NoVLX] in {
3763   defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3764   defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3766   defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3767   defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3769   defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
3770   defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
3772   defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
3773   defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
3776 let Predicates = [HasAVX512] in {
3777   // 512-bit load.
3778   def : Pat<(alignedloadv16i32 addr:$src),
3779             (VMOVDQA64Zrm addr:$src)>;
3780   def : Pat<(alignedloadv32i16 addr:$src),
3781             (VMOVDQA64Zrm addr:$src)>;
3782   def : Pat<(alignedloadv32f16 addr:$src),
3783             (VMOVAPSZrm addr:$src)>;
3784   def : Pat<(alignedloadv32bf16 addr:$src),
3785             (VMOVAPSZrm addr:$src)>;
3786   def : Pat<(alignedloadv64i8 addr:$src),
3787             (VMOVDQA64Zrm addr:$src)>;
3788   def : Pat<(loadv16i32 addr:$src),
3789             (VMOVDQU64Zrm addr:$src)>;
3790   def : Pat<(loadv32i16 addr:$src),
3791             (VMOVDQU64Zrm addr:$src)>;
3792   def : Pat<(loadv32f16 addr:$src),
3793             (VMOVUPSZrm addr:$src)>;
3794   def : Pat<(loadv32bf16 addr:$src),
3795             (VMOVUPSZrm addr:$src)>;
3796   def : Pat<(loadv64i8 addr:$src),
3797             (VMOVDQU64Zrm addr:$src)>;
3799   // 512-bit store.
3800   def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3801             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3802   def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3803             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3804   def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3805             (VMOVAPSZmr addr:$dst, VR512:$src)>;
3806   def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
3807             (VMOVAPSZmr addr:$dst, VR512:$src)>;
3808   def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3809             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3810   def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3811             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3812   def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3813             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3814   def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3815             (VMOVUPSZmr addr:$dst, VR512:$src)>;
3816   def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
3817             (VMOVUPSZmr addr:$dst, VR512:$src)>;
3818   def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3819             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3822 let Predicates = [HasVLX] in {
3823   // 128-bit load.
3824   def : Pat<(alignedloadv4i32 addr:$src),
3825             (VMOVDQA64Z128rm addr:$src)>;
3826   def : Pat<(alignedloadv8i16 addr:$src),
3827             (VMOVDQA64Z128rm addr:$src)>;
3828   def : Pat<(alignedloadv8f16 addr:$src),
3829             (VMOVAPSZ128rm addr:$src)>;
3830   def : Pat<(alignedloadv8bf16 addr:$src),
3831             (VMOVAPSZ128rm addr:$src)>;
3832   def : Pat<(alignedloadv16i8 addr:$src),
3833             (VMOVDQA64Z128rm addr:$src)>;
3834   def : Pat<(loadv4i32 addr:$src),
3835             (VMOVDQU64Z128rm addr:$src)>;
3836   def : Pat<(loadv8i16 addr:$src),
3837             (VMOVDQU64Z128rm addr:$src)>;
3838   def : Pat<(loadv8f16 addr:$src),
3839             (VMOVUPSZ128rm addr:$src)>;
3840   def : Pat<(loadv8bf16 addr:$src),
3841             (VMOVUPSZ128rm addr:$src)>;
3842   def : Pat<(loadv16i8 addr:$src),
3843             (VMOVDQU64Z128rm addr:$src)>;
3845   // 128-bit store.
3846   def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3847             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3848   def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3849             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3850   def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3851             (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3852   def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
3853             (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3854   def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3855             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3856   def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3857             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3858   def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3859             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3860   def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3861             (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3862   def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
3863             (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3864   def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3865             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3867   // 256-bit load.
3868   def : Pat<(alignedloadv8i32 addr:$src),
3869             (VMOVDQA64Z256rm addr:$src)>;
3870   def : Pat<(alignedloadv16i16 addr:$src),
3871             (VMOVDQA64Z256rm addr:$src)>;
3872   def : Pat<(alignedloadv16f16 addr:$src),
3873             (VMOVAPSZ256rm addr:$src)>;
3874   def : Pat<(alignedloadv16bf16 addr:$src),
3875             (VMOVAPSZ256rm addr:$src)>;
3876   def : Pat<(alignedloadv32i8 addr:$src),
3877             (VMOVDQA64Z256rm addr:$src)>;
3878   def : Pat<(loadv8i32 addr:$src),
3879             (VMOVDQU64Z256rm addr:$src)>;
3880   def : Pat<(loadv16i16 addr:$src),
3881             (VMOVDQU64Z256rm addr:$src)>;
3882   def : Pat<(loadv16f16 addr:$src),
3883             (VMOVUPSZ256rm addr:$src)>;
3884   def : Pat<(loadv16bf16 addr:$src),
3885             (VMOVUPSZ256rm addr:$src)>;
3886   def : Pat<(loadv32i8 addr:$src),
3887             (VMOVDQU64Z256rm addr:$src)>;
3889   // 256-bit store.
3890   def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3891             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3892   def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3893             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3894   def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3895             (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3896   def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
3897             (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3898   def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3899             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3900   def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3901             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3902   def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3903             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3904   def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3905             (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3906   def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
3907             (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3908   def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3909             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3912 multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
3913 let Predicates = [HasBWI] in {
3914   def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
3915             (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3916   def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
3917             (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3918   def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3919                      (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3920             (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3921   def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3922                      (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
3923             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3924   def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3925                      (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3926             (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3927   def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3928                      (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
3929             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3930   def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
3931             (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3932   def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
3933             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3934   def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
3935             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3937   def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
3938             (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3940 let Predicates = [HasBWI, HasVLX] in {
3941   def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
3942             (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3943   def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
3944             (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3945   def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3946                      (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3947             (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3948   def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3949                      (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
3950             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3951   def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3952                      (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3953             (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3954   def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3955                      (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
3956             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3957   def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
3958             (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3959   def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
3960             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3961   def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
3962             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3964   def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
3965             (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3967   def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
3968             (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3969   def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
3970             (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3971   def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3972                      (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3973             (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3974   def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3975                      (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
3976             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3977   def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3978                      (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3979             (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3980   def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3981                      (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
3982             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3983   def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
3984             (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3985   def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
3986             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3987   def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
3988             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3990   def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
3991             (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3995 defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
3996 defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;
3998 // Move Int Doubleword to Packed Double Int
4000 let ExeDomain = SSEPackedInt in {
4001 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
4002                       "vmovd\t{$src, $dst|$dst, $src}",
4003                       [(set VR128X:$dst,
4004                         (v4i32 (scalar_to_vector GR32:$src)))]>,
4005                         EVEX, Sched<[WriteVecMoveFromGpr]>;
4006 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
4007                       "vmovd\t{$src, $dst|$dst, $src}",
4008                       [(set VR128X:$dst,
4009                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
4010                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
4011 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
4012                       "vmovq\t{$src, $dst|$dst, $src}",
4013                         [(set VR128X:$dst,
4014                           (v2i64 (scalar_to_vector GR64:$src)))]>,
4015                       EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
4016 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
4017 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
4018                       (ins i64mem:$src),
4019                       "vmovq\t{$src, $dst|$dst, $src}", []>,
4020                       EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
4021 let isCodeGenOnly = 1 in {
4022 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
4023                        "vmovq\t{$src, $dst|$dst, $src}",
4024                        [(set FR64X:$dst, (bitconvert GR64:$src))]>,
4025                        EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
4026 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
4027                          "vmovq\t{$src, $dst|$dst, $src}",
4028                          [(set GR64:$dst, (bitconvert FR64X:$src))]>,
4029                          EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
4031 } // ExeDomain = SSEPackedInt
4033 // Move Int Doubleword to Single Scalar
4035 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4036 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
4037                       "vmovd\t{$src, $dst|$dst, $src}",
4038                       [(set FR32X:$dst, (bitconvert GR32:$src))]>,
4039                       EVEX, Sched<[WriteVecMoveFromGpr]>;
4040 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4042 // Move doubleword from xmm register to r/m32
4044 let ExeDomain = SSEPackedInt in {
4045 def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
4046                        "vmovd\t{$src, $dst|$dst, $src}",
4047                        [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
4048                                         (iPTR 0)))]>,
4049                        EVEX, Sched<[WriteVecMoveToGpr]>;
4050 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
4051                        (ins i32mem:$dst, VR128X:$src),
4052                        "vmovd\t{$src, $dst|$dst, $src}",
4053                        [(store (i32 (extractelt (v4i32 VR128X:$src),
4054                                      (iPTR 0))), addr:$dst)]>,
4055                        EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
4056 } // ExeDomain = SSEPackedInt
4058 // Move quadword from xmm1 register to r/m64
4060 let ExeDomain = SSEPackedInt in {
4061 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
4062                       "vmovq\t{$src, $dst|$dst, $src}",
4063                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
4064                                                    (iPTR 0)))]>,
4065                       PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
4066                       Requires<[HasAVX512]>;
4068 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
4069 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
4070                       "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
4071                       EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>,
4072                       Requires<[HasAVX512, In64BitMode]>;
4074 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
4075                       (ins i64mem:$dst, VR128X:$src),
4076                       "vmovq\t{$src, $dst|$dst, $src}",
4077                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
4078                               addr:$dst)]>,
4079                       EVEX, PD, REX_W, EVEX_CD8<64, CD8VT1>,
4080                       Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
4082 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
4083 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
4084                              (ins VR128X:$src),
4085                              "vmovq\t{$src, $dst|$dst, $src}", []>,
4086                              EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>;
4087 } // ExeDomain = SSEPackedInt
4089 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
4090                 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
4092 let Predicates = [HasAVX512] in {
4093   def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
4094             (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
4097 // Move Scalar Single to Double Int
4099 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4100 def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
4101                       (ins FR32X:$src),
4102                       "vmovd\t{$src, $dst|$dst, $src}",
4103                       [(set GR32:$dst, (bitconvert FR32X:$src))]>,
4104                       EVEX, Sched<[WriteVecMoveToGpr]>;
4105 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4107 // Move Quadword Int to Packed Quadword Int
4109 let ExeDomain = SSEPackedInt in {
4110 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
4111                       (ins i64mem:$src),
4112                       "vmovq\t{$src, $dst|$dst, $src}",
4113                       [(set VR128X:$dst,
4114                         (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
4115                       EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
4116 } // ExeDomain = SSEPackedInt
4118 // Allow "vmovd" but print "vmovq".
4119 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4120                 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
4121 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4122                 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
4124 // Conversions between masks and scalar fp.
4125 def : Pat<(v32i1 (bitconvert FR32X:$src)),
4126           (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
4127 def : Pat<(f32 (bitconvert VK32:$src)),
4128           (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
4130 def : Pat<(v64i1 (bitconvert FR64X:$src)),
4131           (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
4132 def : Pat<(f64 (bitconvert VK64:$src)),
4133           (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
4135 //===----------------------------------------------------------------------===//
4136 // AVX-512  MOVSH, MOVSS, MOVSD
4137 //===----------------------------------------------------------------------===//
4139 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
4140                               X86VectorVTInfo _, Predicate prd = HasAVX512> {
4141   let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
4142   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4143              (ins _.RC:$src1, _.RC:$src2),
4144              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4145              [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
4146              _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
4147   let Predicates = [prd] in {
4148   def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4149               (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4150               !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
4151               "$dst {${mask}} {z}, $src1, $src2}"),
4152               [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4153                                       (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4154                                       _.ImmAllZerosV)))],
4155               _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
4156   let Constraints = "$src0 = $dst"  in
4157   def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4158              (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4159              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
4160              "$dst {${mask}}, $src1, $src2}"),
4161              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4162                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4163                                      (_.VT _.RC:$src0))))],
4164              _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
4165   let canFoldAsLoad = 1, isReMaterializable = 1 in {
4166   def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
4167              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4168              [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
4169              _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4170   // _alt version uses FR32/FR64 register class.
4171   let isCodeGenOnly = 1 in
4172   def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
4173                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4174                  [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
4175                  _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4176   }
4177   let mayLoad = 1, hasSideEffects = 0 in {
4178     let Constraints = "$src0 = $dst" in
4179     def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4180                (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
4181                !strconcat(asm, "\t{$src, $dst {${mask}}|",
4182                "$dst {${mask}}, $src}"),
4183                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
4184     def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4185                (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
4186                !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
4187                "$dst {${mask}} {z}, $src}"),
4188                [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
4189   }
4190   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4191              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4192              [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
4193              EVEX, Sched<[WriteFStore]>;
4194   let mayStore = 1, hasSideEffects = 0 in
4195   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4196               (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
4197               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4198               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>;
4199   }
4202 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
4203                                   VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4205 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4206                                   VEX_LIG, XD, REX_W, EVEX_CD8<64, CD8VT1>;
4208 defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
4209                                   HasFP16>,
4210                                   VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
4212 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4213                                        PatLeaf ZeroFP, X86VectorVTInfo _> {
4215 def : Pat<(_.VT (OpNode _.RC:$src0,
4216                         (_.VT (scalar_to_vector
4217                                   (_.EltVT (X86selects VK1WM:$mask,
4218                                                        (_.EltVT _.FRC:$src1),
4219                                                        (_.EltVT _.FRC:$src2))))))),
4220           (!cast<Instruction>(InstrStr#rrk)
4221                         (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4222                         VK1WM:$mask,
4223                         (_.VT _.RC:$src0),
4224                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4226 def : Pat<(_.VT (OpNode _.RC:$src0,
4227                         (_.VT (scalar_to_vector
4228                                   (_.EltVT (X86selects VK1WM:$mask,
4229                                                        (_.EltVT _.FRC:$src1),
4230                                                        (_.EltVT ZeroFP))))))),
4231           (!cast<Instruction>(InstrStr#rrkz)
4232                         VK1WM:$mask,
4233                         (_.VT _.RC:$src0),
4234                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4237 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4238                                         dag Mask, RegisterClass MaskRC> {
4240 def : Pat<(masked_store
4241              (_.info512.VT (insert_subvector undef,
4242                                (_.info128.VT _.info128.RC:$src),
4243                                (iPTR 0))), addr:$dst, Mask),
4244           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4245                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4246                       _.info128.RC:$src)>;
4250 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4251                                                AVX512VLVectorVTInfo _,
4252                                                dag Mask, RegisterClass MaskRC,
4253                                                SubRegIndex subreg> {
4255 def : Pat<(masked_store
4256              (_.info512.VT (insert_subvector undef,
4257                                (_.info128.VT _.info128.RC:$src),
4258                                (iPTR 0))), addr:$dst, Mask),
4259           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4260                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4261                       _.info128.RC:$src)>;
4265 // This matches the more recent codegen from clang that avoids emitting a 512
4266 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4267 // bits on AVX512F only targets.
4268 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4269                                                AVX512VLVectorVTInfo _,
4270                                                dag Mask512, dag Mask128,
4271                                                RegisterClass MaskRC,
4272                                                SubRegIndex subreg> {
4274 // AVX512F pattern.
4275 def : Pat<(masked_store
4276              (_.info512.VT (insert_subvector undef,
4277                                (_.info128.VT _.info128.RC:$src),
4278                                (iPTR 0))), addr:$dst, Mask512),
4279           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4280                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4281                       _.info128.RC:$src)>;
4283 // AVX512VL pattern.
4284 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4285           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4286                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4287                       _.info128.RC:$src)>;
4290 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4291                                        dag Mask, RegisterClass MaskRC> {
4293 def : Pat<(_.info128.VT (extract_subvector
4294                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4295                                         _.info512.ImmAllZerosV)),
4296                            (iPTR 0))),
4297           (!cast<Instruction>(InstrStr#rmkz)
4298                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4299                       addr:$srcAddr)>;
4301 def : Pat<(_.info128.VT (extract_subvector
4302                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4303                       (_.info512.VT (insert_subvector undef,
4304                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4305                             (iPTR 0))))),
4306                 (iPTR 0))),
4307           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4308                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4309                       addr:$srcAddr)>;
4313 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4314                                               AVX512VLVectorVTInfo _,
4315                                               dag Mask, RegisterClass MaskRC,
4316                                               SubRegIndex subreg> {
4318 def : Pat<(_.info128.VT (extract_subvector
4319                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4320                                         _.info512.ImmAllZerosV)),
4321                            (iPTR 0))),
4322           (!cast<Instruction>(InstrStr#rmkz)
4323                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4324                       addr:$srcAddr)>;
4326 def : Pat<(_.info128.VT (extract_subvector
4327                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4328                       (_.info512.VT (insert_subvector undef,
4329                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4330                             (iPTR 0))))),
4331                 (iPTR 0))),
4332           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4333                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4334                       addr:$srcAddr)>;
4338 // This matches the more recent codegen from clang that avoids emitting a 512
4339 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4340 // bits on AVX512F only targets.
4341 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4342                                               AVX512VLVectorVTInfo _,
4343                                               dag Mask512, dag Mask128,
4344                                               RegisterClass MaskRC,
4345                                               SubRegIndex subreg> {
4346 // AVX512F patterns.
4347 def : Pat<(_.info128.VT (extract_subvector
4348                          (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4349                                         _.info512.ImmAllZerosV)),
4350                            (iPTR 0))),
4351           (!cast<Instruction>(InstrStr#rmkz)
4352                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4353                       addr:$srcAddr)>;
4355 def : Pat<(_.info128.VT (extract_subvector
4356                 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4357                       (_.info512.VT (insert_subvector undef,
4358                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4359                             (iPTR 0))))),
4360                 (iPTR 0))),
4361           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4362                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4363                       addr:$srcAddr)>;
4365 // AVX512Vl patterns.
4366 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4367                          _.info128.ImmAllZerosV)),
4368           (!cast<Instruction>(InstrStr#rmkz)
4369                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4370                       addr:$srcAddr)>;
4372 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4373                          (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4374           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4375                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4376                       addr:$srcAddr)>;
4379 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4380 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4382 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4383                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4384 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4385                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4386 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4387                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4389 let Predicates = [HasFP16] in {
4390 defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4391 defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4392                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4393 defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4394                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4395 defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4396                    (v32i1 (insert_subvector
4397                            (v32i1 immAllZerosV),
4398                            (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4399                            (iPTR 0))),
4400                    (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4401                    GR8, sub_8bit>;
4403 defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4404                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4405 defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4406                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4407 defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4408                    (v32i1 (insert_subvector
4409                            (v32i1 immAllZerosV),
4410                            (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4411                            (iPTR 0))),
4412                    (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4413                    GR8, sub_8bit>;
4415 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4416           (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4417            (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4418            VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4419            (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4421 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4422           (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4423            (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4426 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4427                    (v16i1 (insert_subvector
4428                            (v16i1 immAllZerosV),
4429                            (v4i1 (extract_subvector
4430                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4431                                   (iPTR 0))),
4432                            (iPTR 0))),
4433                    (v4i1 (extract_subvector
4434                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4435                           (iPTR 0))), GR8, sub_8bit>;
4436 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4437                    (v8i1
4438                     (extract_subvector
4439                      (v16i1
4440                       (insert_subvector
4441                        (v16i1 immAllZerosV),
4442                        (v2i1 (extract_subvector
4443                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4444                               (iPTR 0))),
4445                        (iPTR 0))),
4446                      (iPTR 0))),
4447                    (v2i1 (extract_subvector
4448                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4449                           (iPTR 0))), GR8, sub_8bit>;
4451 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4452                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4453 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4454                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4455 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4456                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4458 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4459                    (v16i1 (insert_subvector
4460                            (v16i1 immAllZerosV),
4461                            (v4i1 (extract_subvector
4462                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4463                                   (iPTR 0))),
4464                            (iPTR 0))),
4465                    (v4i1 (extract_subvector
4466                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4467                           (iPTR 0))), GR8, sub_8bit>;
4468 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4469                    (v8i1
4470                     (extract_subvector
4471                      (v16i1
4472                       (insert_subvector
4473                        (v16i1 immAllZerosV),
4474                        (v2i1 (extract_subvector
4475                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4476                               (iPTR 0))),
4477                        (iPTR 0))),
4478                      (iPTR 0))),
4479                    (v2i1 (extract_subvector
4480                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4481                           (iPTR 0))), GR8, sub_8bit>;
4483 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4484           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4485            (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4486            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4487            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4489 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4490           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4491            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4493 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4494           (COPY_TO_REGCLASS
4495            (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4496                                                        VK1WM:$mask, addr:$src)),
4497            FR32X)>;
4498 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4499           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4501 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4502           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4503            (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4504            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4505            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4507 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4508           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4509            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4511 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4512           (COPY_TO_REGCLASS
4513            (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4514                                                        VK1WM:$mask, addr:$src)),
4515            FR64X)>;
4516 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4517           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4520 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4521           (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4522 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4523           (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4525 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4526           (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4527 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4528           (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4530 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4531   let Predicates = [HasFP16] in {
4532     def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4533         (ins VR128X:$src1, VR128X:$src2),
4534         "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4535         []>, T_MAP5XS, EVEX_4V, VEX_LIG,
4536         Sched<[SchedWriteFShuffle.XMM]>;
4538     let Constraints = "$src0 = $dst" in
4539     def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4540         (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4541          VR128X:$src1, VR128X:$src2),
4542         "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4543           "$dst {${mask}}, $src1, $src2}",
4544         []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
4545         Sched<[SchedWriteFShuffle.XMM]>;
4547     def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4548         (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4549         "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4550           "$dst {${mask}} {z}, $src1, $src2}",
4551         []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
4552         Sched<[SchedWriteFShuffle.XMM]>;
4553   }
4554   def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4555                            (ins VR128X:$src1, VR128X:$src2),
4556                            "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4557                            []>, XS, EVEX_4V, VEX_LIG,
4558                            Sched<[SchedWriteFShuffle.XMM]>;
4560   let Constraints = "$src0 = $dst" in
4561   def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4562                              (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4563                                                    VR128X:$src1, VR128X:$src2),
4564                              "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4565                                         "$dst {${mask}}, $src1, $src2}",
4566                              []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4567                              Sched<[SchedWriteFShuffle.XMM]>;
4569   def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4570                          (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4571                          "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4572                                     "$dst {${mask}} {z}, $src1, $src2}",
4573                          []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4574                          Sched<[SchedWriteFShuffle.XMM]>;
4576   def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4577                            (ins VR128X:$src1, VR128X:$src2),
4578                            "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4579                            []>, XD, EVEX_4V, VEX_LIG, REX_W,
4580                            Sched<[SchedWriteFShuffle.XMM]>;
4582   let Constraints = "$src0 = $dst" in
4583   def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4584                              (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4585                                                    VR128X:$src1, VR128X:$src2),
4586                              "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4587                                         "$dst {${mask}}, $src1, $src2}",
4588                              []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4589                              REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4591   def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4592                               (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4593                                                           VR128X:$src2),
4594                               "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4595                                          "$dst {${mask}} {z}, $src1, $src2}",
4596                               []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4597                               REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4600 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4601                 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4602 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4603                              "$dst {${mask}}, $src1, $src2}",
4604                 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4605                                 VR128X:$src1, VR128X:$src2), 0>;
4606 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4607                              "$dst {${mask}} {z}, $src1, $src2}",
4608                 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4609                                  VR128X:$src1, VR128X:$src2), 0>;
4610 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4611                 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4612 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4613                              "$dst {${mask}}, $src1, $src2}",
4614                 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4615                                 VR128X:$src1, VR128X:$src2), 0>;
4616 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4617                              "$dst {${mask}} {z}, $src1, $src2}",
4618                 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4619                                  VR128X:$src1, VR128X:$src2), 0>;
4620 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4621                 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4622 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4623                              "$dst {${mask}}, $src1, $src2}",
4624                 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4625                                 VR128X:$src1, VR128X:$src2), 0>;
4626 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4627                              "$dst {${mask}} {z}, $src1, $src2}",
4628                 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4629                                  VR128X:$src1, VR128X:$src2), 0>;
4631 let Predicates = [HasAVX512, OptForSize] in {
4632   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4633             (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4634   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4635             (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4637   // Move low f32 and clear high bits.
4638   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4639             (SUBREG_TO_REG (i32 0),
4640              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4641               (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4642   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4643             (SUBREG_TO_REG (i32 0),
4644              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4645               (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4647   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4648             (SUBREG_TO_REG (i32 0),
4649              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4650               (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4651   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4652             (SUBREG_TO_REG (i32 0),
4653              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4654               (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4657 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4658 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4659 let Predicates = [HasAVX512, OptForSpeed] in {
4660   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4661             (SUBREG_TO_REG (i32 0),
4662              (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4663                           (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4664                           (i8 1))), sub_xmm)>;
4665   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4666             (SUBREG_TO_REG (i32 0),
4667              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4668                           (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4669                           (i8 3))), sub_xmm)>;
4672 let Predicates = [HasAVX512] in {
4673   def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4674             (VMOVSSZrm addr:$src)>;
4675   def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4676             (VMOVSDZrm addr:$src)>;
4678   // Represent the same patterns above but in the form they appear for
4679   // 256-bit types
4680   def : Pat<(v8f32 (X86vzload32 addr:$src)),
4681             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4682   def : Pat<(v4f64 (X86vzload64 addr:$src)),
4683             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4685   // Represent the same patterns above but in the form they appear for
4686   // 512-bit types
4687   def : Pat<(v16f32 (X86vzload32 addr:$src)),
4688             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4689   def : Pat<(v8f64 (X86vzload64 addr:$src)),
4690             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4692 let Predicates = [HasFP16] in {
4693   def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4694             (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4695   def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
4696             (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;
4698   // FIXME we need better canonicalization in dag combine
4699   def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4700             (SUBREG_TO_REG (i32 0),
4701              (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4702               (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4703   def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
4704             (SUBREG_TO_REG (i32 0),
4705              (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4706               (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4708   // FIXME we need better canonicalization in dag combine
4709   def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4710             (SUBREG_TO_REG (i32 0),
4711              (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4712               (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4713   def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
4714             (SUBREG_TO_REG (i32 0),
4715              (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4716               (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;
4718   def : Pat<(v8f16 (X86vzload16 addr:$src)),
4719             (VMOVSHZrm addr:$src)>;
4721   def : Pat<(v16f16 (X86vzload16 addr:$src)),
4722             (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4724   def : Pat<(v32f16 (X86vzload16 addr:$src)),
4725             (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4728 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4729 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4730                                 (ins VR128X:$src),
4731                                 "vmovq\t{$src, $dst|$dst, $src}",
4732                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
4733                                                    (v2i64 VR128X:$src))))]>,
4734                                 EVEX, REX_W;
4737 let Predicates = [HasAVX512] in {
4738   def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
4739             (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
4740                                               GR8:$src, sub_8bit)))>;
4741   def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4742             (VMOVDI2PDIZrr GR32:$src)>;
4744   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4745             (VMOV64toPQIZrr GR64:$src)>;
4747   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4748   def : Pat<(v4i32 (X86vzload32 addr:$src)),
4749             (VMOVDI2PDIZrm addr:$src)>;
4750   def : Pat<(v8i32 (X86vzload32 addr:$src)),
4751             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4752   def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4753             (VMOVZPQILo2PQIZrr VR128X:$src)>;
4754   def : Pat<(v2i64 (X86vzload64 addr:$src)),
4755             (VMOVQI2PQIZrm addr:$src)>;
4756   def : Pat<(v4i64 (X86vzload64 addr:$src)),
4757             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4759   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4760   def : Pat<(v16i32 (X86vzload32 addr:$src)),
4761             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4762   def : Pat<(v8i64 (X86vzload64 addr:$src)),
4763             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4765   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4766             (SUBREG_TO_REG (i32 0),
4767              (v2f64 (VMOVZPQILo2PQIZrr
4768                      (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4769              sub_xmm)>;
4770   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4771             (SUBREG_TO_REG (i32 0),
4772              (v2i64 (VMOVZPQILo2PQIZrr
4773                      (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4774              sub_xmm)>;
4776   def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4777             (SUBREG_TO_REG (i32 0),
4778              (v2f64 (VMOVZPQILo2PQIZrr
4779                      (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4780              sub_xmm)>;
4781   def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4782             (SUBREG_TO_REG (i32 0),
4783              (v2i64 (VMOVZPQILo2PQIZrr
4784                      (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4785              sub_xmm)>;
4788 //===----------------------------------------------------------------------===//
4789 // AVX-512 - Non-temporals
4790 //===----------------------------------------------------------------------===//
4792 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4793                       (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4794                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4795                       EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4797 let Predicates = [HasVLX] in {
4798   def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4799                        (ins i256mem:$src),
4800                        "vmovntdqa\t{$src, $dst|$dst, $src}",
4801                        [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4802                        EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4804   def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4805                       (ins i128mem:$src),
4806                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4807                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4808                       EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4811 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4812                         X86SchedWriteMoveLS Sched,
4813                         PatFrag st_frag = alignednontemporalstore> {
4814   let SchedRW = [Sched.MR], AddedComplexity = 400 in
4815   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4816                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4817                     [(st_frag (_.VT _.RC:$src), addr:$dst)],
4818                     _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4821 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4822                            AVX512VLVectorVTInfo VTInfo,
4823                            X86SchedWriteMoveLSWidths Sched> {
4824   let Predicates = [HasAVX512] in
4825     defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4827   let Predicates = [HasAVX512, HasVLX] in {
4828     defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4829     defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4830   }
4833 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4834                                 SchedWriteVecMoveLSNT>, PD;
4835 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4836                                 SchedWriteFMoveLSNT>, PD, REX_W;
4837 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4838                                 SchedWriteFMoveLSNT>, PS;
4840 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4841   def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4842             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4843   def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4844             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4845   def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4846             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4848   def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4849             (VMOVNTDQAZrm addr:$src)>;
4850   def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4851             (VMOVNTDQAZrm addr:$src)>;
4852   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4853             (VMOVNTDQAZrm addr:$src)>;
4854   def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4855             (VMOVNTDQAZrm addr:$src)>;
4856   def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4857             (VMOVNTDQAZrm addr:$src)>;
4858   def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4859             (VMOVNTDQAZrm addr:$src)>;
4862 let Predicates = [HasVLX], AddedComplexity = 400 in {
4863   def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4864             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4865   def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4866             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4867   def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4868             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4870   def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4871             (VMOVNTDQAZ256rm addr:$src)>;
4872   def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4873             (VMOVNTDQAZ256rm addr:$src)>;
4874   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4875             (VMOVNTDQAZ256rm addr:$src)>;
4876   def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4877             (VMOVNTDQAZ256rm addr:$src)>;
4878   def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4879             (VMOVNTDQAZ256rm addr:$src)>;
4880   def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4881             (VMOVNTDQAZ256rm addr:$src)>;
4883   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4884             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4885   def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4886             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4887   def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4888             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4890   def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4891             (VMOVNTDQAZ128rm addr:$src)>;
4892   def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4893             (VMOVNTDQAZ128rm addr:$src)>;
4894   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4895             (VMOVNTDQAZ128rm addr:$src)>;
4896   def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4897             (VMOVNTDQAZ128rm addr:$src)>;
4898   def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4899             (VMOVNTDQAZ128rm addr:$src)>;
4900   def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4901             (VMOVNTDQAZ128rm addr:$src)>;
4904 //===----------------------------------------------------------------------===//
4905 // AVX-512 - Integer arithmetic
4907 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4908                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4909                            bit IsCommutable = 0> {
4910   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4911                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4912                     "$src2, $src1", "$src1, $src2",
4913                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4914                     IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4915                     Sched<[sched]>;
4917   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4918                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4919                   "$src2, $src1", "$src1, $src2",
4920                   (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4921                   AVX512BIBase, EVEX_4V,
4922                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4925 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4926                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
4927                             bit IsCommutable = 0> :
4928            avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4929   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4930                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4931                   "${src2}"#_.BroadcastStr#", $src1",
4932                   "$src1, ${src2}"#_.BroadcastStr,
4933                   (_.VT (OpNode _.RC:$src1,
4934                                 (_.BroadcastLdFrag addr:$src2)))>,
4935                   AVX512BIBase, EVEX_4V, EVEX_B,
4936                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4939 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4940                               AVX512VLVectorVTInfo VTInfo,
4941                               X86SchedWriteWidths sched, Predicate prd,
4942                               bit IsCommutable = 0> {
4943   let Predicates = [prd] in
4944     defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4945                              IsCommutable>, EVEX_V512;
4947   let Predicates = [prd, HasVLX] in {
4948     defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4949                                 sched.YMM, IsCommutable>, EVEX_V256;
4950     defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4951                                 sched.XMM, IsCommutable>, EVEX_V128;
4952   }
4955 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4956                                AVX512VLVectorVTInfo VTInfo,
4957                                X86SchedWriteWidths sched, Predicate prd,
4958                                bit IsCommutable = 0> {
4959   let Predicates = [prd] in
4960     defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4961                              IsCommutable>, EVEX_V512;
4963   let Predicates = [prd, HasVLX] in {
4964     defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4965                                  sched.YMM, IsCommutable>, EVEX_V256;
4966     defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4967                                  sched.XMM, IsCommutable>, EVEX_V128;
4968   }
4971 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4972                                 X86SchedWriteWidths sched, Predicate prd,
4973                                 bit IsCommutable = 0> {
4974   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4975                                   sched, prd, IsCommutable>,
4976                                   REX_W, EVEX_CD8<64, CD8VF>;
4979 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4980                                 X86SchedWriteWidths sched, Predicate prd,
4981                                 bit IsCommutable = 0> {
4982   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4983                                   sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4986 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4987                                 X86SchedWriteWidths sched, Predicate prd,
4988                                 bit IsCommutable = 0> {
4989   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4990                                  sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4991                                  WIG;
4994 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4995                                 X86SchedWriteWidths sched, Predicate prd,
4996                                 bit IsCommutable = 0> {
4997   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4998                                  sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4999                                  WIG;
5002 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
5003                                  SDNode OpNode, X86SchedWriteWidths sched,
5004                                  Predicate prd, bit IsCommutable = 0> {
5005   defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
5006                                    IsCommutable>;
5008   defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
5009                                    IsCommutable>;
5012 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
5013                                  SDNode OpNode, X86SchedWriteWidths sched,
5014                                  Predicate prd, bit IsCommutable = 0> {
5015   defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
5016                                    IsCommutable>;
5018   defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
5019                                    IsCommutable>;
5022 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
5023                                   bits<8> opc_d, bits<8> opc_q,
5024                                   string OpcodeStr, SDNode OpNode,
5025                                   X86SchedWriteWidths sched,
5026                                   bit IsCommutable = 0> {
5027   defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
5028                                     sched, HasAVX512, IsCommutable>,
5029               avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
5030                                     sched, HasBWI, IsCommutable>;
5033 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
5034                             X86FoldableSchedWrite sched,
5035                             SDNode OpNode,X86VectorVTInfo _Src,
5036                             X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
5037                             bit IsCommutable = 0> {
5038   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
5039                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5040                             "$src2, $src1","$src1, $src2",
5041                             (_Dst.VT (OpNode
5042                                          (_Src.VT _Src.RC:$src1),
5043                                          (_Src.VT _Src.RC:$src2))),
5044                             IsCommutable>,
5045                             AVX512BIBase, EVEX_4V, Sched<[sched]>;
5046   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5047                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5048                         "$src2, $src1", "$src1, $src2",
5049                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5050                                       (_Src.LdFrag addr:$src2)))>,
5051                         AVX512BIBase, EVEX_4V,
5052                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5054   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5055                     (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
5056                     OpcodeStr,
5057                     "${src2}"#_Brdct.BroadcastStr#", $src1",
5058                      "$src1, ${src2}"#_Brdct.BroadcastStr,
5059                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5060                                  (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
5061                     AVX512BIBase, EVEX_4V, EVEX_B,
5062                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5065 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
5066                                     SchedWriteVecALU, 1>;
5067 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
5068                                     SchedWriteVecALU, 0>;
5069 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
5070                                     SchedWriteVecALU, HasBWI, 1>;
5071 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
5072                                     SchedWriteVecALU, HasBWI, 0>;
5073 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
5074                                      SchedWriteVecALU, HasBWI, 1>;
5075 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
5076                                      SchedWriteVecALU, HasBWI, 0>;
5077 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
5078                                     SchedWritePMULLD, HasAVX512, 1>, T8PD;
5079 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
5080                                     SchedWriteVecIMul, HasBWI, 1>;
5081 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
5082                                     SchedWriteVecIMul, HasDQI, 1>, T8PD,
5083                                     NotEVEX2VEXConvertible;
5084 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
5085                                     HasBWI, 1>;
5086 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
5087                                      HasBWI, 1>;
5088 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
5089                                       SchedWriteVecIMul, HasBWI, 1>, T8PD;
5090 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
5091                                    SchedWriteVecALU, HasBWI, 1>;
5092 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
5093                                     SchedWriteVecIMul, HasAVX512, 1>, T8PD;
5094 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
5095                                      SchedWriteVecIMul, HasAVX512, 1>;
5097 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
5098                             X86SchedWriteWidths sched,
5099                             AVX512VLVectorVTInfo _SrcVTInfo,
5100                             AVX512VLVectorVTInfo _DstVTInfo,
5101                             SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
5102   let Predicates = [prd] in
5103     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
5104                                  _SrcVTInfo.info512, _DstVTInfo.info512,
5105                                  v8i64_info, IsCommutable>,
5106                                   EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W;
5107   let Predicates = [HasVLX, prd] in {
5108     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
5109                                       _SrcVTInfo.info256, _DstVTInfo.info256,
5110                                       v4i64x_info, IsCommutable>,
5111                                       EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W;
5112     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
5113                                       _SrcVTInfo.info128, _DstVTInfo.info128,
5114                                       v2i64x_info, IsCommutable>,
5115                                      EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W;
5116   }
5119 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
5120                                 avx512vl_i8_info, avx512vl_i8_info,
5121                                 X86multishift, HasVBMI, 0>, T8PD;
5123 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5124                             X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
5125                             X86FoldableSchedWrite sched> {
5126   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5127                     (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
5128                     OpcodeStr,
5129                     "${src2}"#_Src.BroadcastStr#", $src1",
5130                      "$src1, ${src2}"#_Src.BroadcastStr,
5131                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5132                                  (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
5133                     EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
5134                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5137 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
5138                             SDNode OpNode,X86VectorVTInfo _Src,
5139                             X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
5140                             bit IsCommutable = 0> {
5141   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
5142                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5143                             "$src2, $src1","$src1, $src2",
5144                             (_Dst.VT (OpNode
5145                                          (_Src.VT _Src.RC:$src1),
5146                                          (_Src.VT _Src.RC:$src2))),
5147                             IsCommutable, IsCommutable>,
5148                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
5149   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5150                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5151                         "$src2, $src1", "$src1, $src2",
5152                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5153                                       (_Src.LdFrag addr:$src2)))>,
5154                          EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
5155                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5158 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
5159                                     SDNode OpNode> {
5160   let Predicates = [HasBWI] in
5161   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
5162                                  v32i16_info, SchedWriteShuffle.ZMM>,
5163                 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
5164                                  v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
5165   let Predicates = [HasBWI, HasVLX] in {
5166     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
5167                                      v16i16x_info, SchedWriteShuffle.YMM>,
5168                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
5169                                       v16i16x_info, SchedWriteShuffle.YMM>,
5170                                       EVEX_V256;
5171     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
5172                                      v8i16x_info, SchedWriteShuffle.XMM>,
5173                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
5174                                       v8i16x_info, SchedWriteShuffle.XMM>,
5175                                       EVEX_V128;
5176   }
5178 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
5179                             SDNode OpNode> {
5180   let Predicates = [HasBWI] in
5181   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
5182                                 SchedWriteShuffle.ZMM>, EVEX_V512, WIG;
5183   let Predicates = [HasBWI, HasVLX] in {
5184     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
5185                                      v32i8x_info, SchedWriteShuffle.YMM>,
5186                                      EVEX_V256, WIG;
5187     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
5188                                      v16i8x_info, SchedWriteShuffle.XMM>,
5189                                      EVEX_V128, WIG;
5190   }
5193 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
5194                             SDNode OpNode, AVX512VLVectorVTInfo _Src,
5195                             AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
5196   let Predicates = [HasBWI] in
5197   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
5198                                 _Dst.info512, SchedWriteVecIMul.ZMM,
5199                                 IsCommutable>, EVEX_V512;
5200   let Predicates = [HasBWI, HasVLX] in {
5201     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
5202                                      _Dst.info256, SchedWriteVecIMul.YMM,
5203                                      IsCommutable>, EVEX_V256;
5204     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
5205                                      _Dst.info128, SchedWriteVecIMul.XMM,
5206                                      IsCommutable>, EVEX_V128;
5207   }
5210 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
5211 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
5212 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
5213 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
5215 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
5216                      avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, WIG;
5217 defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
5218                      avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG;
5220 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
5221                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
5222 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
5223                                     SchedWriteVecALU, HasBWI, 1>;
5224 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
5225                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
5226 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
5227                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
5228                                     NotEVEX2VEXConvertible;
5230 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
5231                                     SchedWriteVecALU, HasBWI, 1>;
5232 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
5233                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
5234 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
5235                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
5236 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
5237                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
5238                                     NotEVEX2VEXConvertible;
5240 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
5241                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
5242 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
5243                                     SchedWriteVecALU, HasBWI, 1>;
5244 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5245                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
5246 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5247                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
5248                                     NotEVEX2VEXConvertible;
5250 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5251                                     SchedWriteVecALU, HasBWI, 1>;
5252 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5253                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
5254 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5255                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
5256 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5257                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
5258                                     NotEVEX2VEXConvertible;
5260 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5261 let Predicates = [HasDQI, NoVLX] in {
5262   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5263             (EXTRACT_SUBREG
5264                 (VPMULLQZrr
5265                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5266                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5267              sub_ymm)>;
5268   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5269             (EXTRACT_SUBREG
5270                 (VPMULLQZrmb
5271                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5272                     addr:$src2),
5273              sub_ymm)>;
5275   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5276             (EXTRACT_SUBREG
5277                 (VPMULLQZrr
5278                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5279                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5280              sub_xmm)>;
5281   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5282             (EXTRACT_SUBREG
5283                 (VPMULLQZrmb
5284                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5285                     addr:$src2),
5286              sub_xmm)>;
5289 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5290   def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5291             (EXTRACT_SUBREG
5292                 (!cast<Instruction>(Instr#"rr")
5293                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5294                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5295              sub_ymm)>;
5296   def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5297             (EXTRACT_SUBREG
5298                 (!cast<Instruction>(Instr#"rmb")
5299                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5300                     addr:$src2),
5301              sub_ymm)>;
5303   def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5304             (EXTRACT_SUBREG
5305                 (!cast<Instruction>(Instr#"rr")
5306                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5307                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5308              sub_xmm)>;
5309   def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5310             (EXTRACT_SUBREG
5311                 (!cast<Instruction>(Instr#"rmb")
5312                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5313                     addr:$src2),
5314              sub_xmm)>;
5317 let Predicates = [HasAVX512, NoVLX] in {
5318   defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5319   defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5320   defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5321   defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5324 //===----------------------------------------------------------------------===//
5325 // AVX-512  Logical Instructions
5326 //===----------------------------------------------------------------------===//
5328 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5329                                    SchedWriteVecLogic, HasAVX512, 1>;
5330 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5331                                   SchedWriteVecLogic, HasAVX512, 1>;
5332 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5333                                    SchedWriteVecLogic, HasAVX512, 1>;
5334 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5335                                     SchedWriteVecLogic, HasAVX512>;
5337 let Predicates = [HasVLX] in {
5338   def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5339             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5340   def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5341             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5343   def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5344             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5345   def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5346             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5348   def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5349             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5350   def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5351             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5353   def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5354             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5355   def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5356             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5358   def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5359             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5360   def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5361             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5363   def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5364             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5365   def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5366             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5368   def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5369             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5370   def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5371             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5373   def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5374             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5375   def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5376             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5378   def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5379             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5380   def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5381             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5383   def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5384             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5385   def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5386             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5388   def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5389             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5390   def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5391             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5393   def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5394             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5395   def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5396             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5398   def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5399             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5400   def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5401             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5403   def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5404             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5405   def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5406             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5408   def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5409             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5410   def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5411             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5413   def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5414             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5415   def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5416             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5419 let Predicates = [HasAVX512] in {
5420   def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5421             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5422   def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5423             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5425   def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5426             (VPORQZrr VR512:$src1, VR512:$src2)>;
5427   def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5428             (VPORQZrr VR512:$src1, VR512:$src2)>;
5430   def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5431             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5432   def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5433             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5435   def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5436             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5437   def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5438             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5440   def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5441             (VPANDQZrm VR512:$src1, addr:$src2)>;
5442   def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5443             (VPANDQZrm VR512:$src1, addr:$src2)>;
5445   def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5446             (VPORQZrm VR512:$src1, addr:$src2)>;
5447   def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5448             (VPORQZrm VR512:$src1, addr:$src2)>;
5450   def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5451             (VPXORQZrm VR512:$src1, addr:$src2)>;
5452   def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5453             (VPXORQZrm VR512:$src1, addr:$src2)>;
5455   def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5456             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5457   def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5458             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5461 // Patterns to catch vselect with different type than logic op.
5462 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5463                                     X86VectorVTInfo _,
5464                                     X86VectorVTInfo IntInfo> {
5465   // Masked register-register logical operations.
5466   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5467                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5468                    _.RC:$src0)),
5469             (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5470              _.RC:$src1, _.RC:$src2)>;
5472   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5473                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5474                    _.ImmAllZerosV)),
5475             (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5476              _.RC:$src2)>;
5478   // Masked register-memory logical operations.
5479   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5480                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5481                                             (load addr:$src2)))),
5482                    _.RC:$src0)),
5483             (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5484              _.RC:$src1, addr:$src2)>;
5485   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5486                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5487                                             (load addr:$src2)))),
5488                    _.ImmAllZerosV)),
5489             (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5490              addr:$src2)>;
5493 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5494                                          X86VectorVTInfo _,
5495                                          X86VectorVTInfo IntInfo> {
5496   // Register-broadcast logical operations.
5497   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5498                    (bitconvert
5499                     (IntInfo.VT (OpNode _.RC:$src1,
5500                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5501                    _.RC:$src0)),
5502             (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5503              _.RC:$src1, addr:$src2)>;
5504   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5505                    (bitconvert
5506                     (IntInfo.VT (OpNode _.RC:$src1,
5507                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5508                    _.ImmAllZerosV)),
5509             (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5510              _.RC:$src1, addr:$src2)>;
5513 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5514                                          AVX512VLVectorVTInfo SelectInfo,
5515                                          AVX512VLVectorVTInfo IntInfo> {
5516 let Predicates = [HasVLX] in {
5517   defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5518                                  IntInfo.info128>;
5519   defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5520                                  IntInfo.info256>;
5522 let Predicates = [HasAVX512] in {
5523   defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5524                                  IntInfo.info512>;
5528 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5529                                                AVX512VLVectorVTInfo SelectInfo,
5530                                                AVX512VLVectorVTInfo IntInfo> {
5531 let Predicates = [HasVLX] in {
5532   defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5533                                        SelectInfo.info128, IntInfo.info128>;
5534   defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5535                                        SelectInfo.info256, IntInfo.info256>;
5537 let Predicates = [HasAVX512] in {
5538   defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5539                                        SelectInfo.info512, IntInfo.info512>;
5543 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5544   // i64 vselect with i32/i16/i8 logic op
5545   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5546                                        avx512vl_i32_info>;
5547   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5548                                        avx512vl_i16_info>;
5549   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5550                                        avx512vl_i8_info>;
5552   // i32 vselect with i64/i16/i8 logic op
5553   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5554                                        avx512vl_i64_info>;
5555   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5556                                        avx512vl_i16_info>;
5557   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5558                                        avx512vl_i8_info>;
5560   // f32 vselect with i64/i32/i16/i8 logic op
5561   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5562                                        avx512vl_i64_info>;
5563   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5564                                        avx512vl_i32_info>;
5565   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5566                                        avx512vl_i16_info>;
5567   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5568                                        avx512vl_i8_info>;
5570   // f64 vselect with i64/i32/i16/i8 logic op
5571   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5572                                        avx512vl_i64_info>;
5573   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5574                                        avx512vl_i32_info>;
5575   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5576                                        avx512vl_i16_info>;
5577   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5578                                        avx512vl_i8_info>;
5580   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5581                                              avx512vl_f32_info,
5582                                              avx512vl_i32_info>;
5583   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5584                                              avx512vl_f64_info,
5585                                              avx512vl_i64_info>;
5588 defm : avx512_logical_lowering_types<"VPAND", and>;
5589 defm : avx512_logical_lowering_types<"VPOR",  or>;
5590 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5591 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5593 //===----------------------------------------------------------------------===//
5594 // AVX-512  FP arithmetic
5595 //===----------------------------------------------------------------------===//
5597 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5598                             SDPatternOperator OpNode, SDNode VecNode,
5599                             X86FoldableSchedWrite sched, bit IsCommutable> {
5600   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5601   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5602                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5603                            "$src2, $src1", "$src1, $src2",
5604                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5605                            Sched<[sched]>;
5607   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5608                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5609                          "$src2, $src1", "$src1, $src2",
5610                          (_.VT (VecNode _.RC:$src1,
5611                                         (_.ScalarIntMemFrags addr:$src2)))>,
5612                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5613   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5614   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5615                          (ins _.FRC:$src1, _.FRC:$src2),
5616                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5617                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5618                           Sched<[sched]> {
5619     let isCommutable = IsCommutable;
5620   }
5621   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5622                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5623                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5624                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5625                          (_.ScalarLdFrag addr:$src2)))]>,
5626                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5627   }
5628   }
5631 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5632                                   SDNode VecNode, X86FoldableSchedWrite sched> {
5633   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5634   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5635                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5636                           "$rc, $src2, $src1", "$src1, $src2, $rc",
5637                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5638                           (i32 timm:$rc))>,
5639                           EVEX_B, EVEX_RC, Sched<[sched]>;
5641 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5642                                 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5643                                 X86FoldableSchedWrite sched, bit IsCommutable,
5644                                 string EVEX2VexOvrd> {
5645   let ExeDomain = _.ExeDomain in {
5646   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5647                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5648                            "$src2, $src1", "$src1, $src2",
5649                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5650                            Sched<[sched]>, SIMD_EXC;
5652   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5653                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5654                          "$src2, $src1", "$src1, $src2",
5655                          (_.VT (VecNode _.RC:$src1,
5656                                         (_.ScalarIntMemFrags addr:$src2)))>,
5657                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5659   let isCodeGenOnly = 1, Predicates = [HasAVX512],
5660       Uses = [MXCSR], mayRaiseFPException = 1 in {
5661   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5662                          (ins _.FRC:$src1, _.FRC:$src2),
5663                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5664                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5665                           Sched<[sched]>,
5666                           EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5667     let isCommutable = IsCommutable;
5668   }
5669   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5670                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5671                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5672                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5673                          (_.ScalarLdFrag addr:$src2)))]>,
5674                          Sched<[sched.Folded, sched.ReadAfterFold]>,
5675                          EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5676   }
5678   let Uses = [MXCSR] in
5679   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5680                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5681                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5682                             (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5683                             EVEX_B, Sched<[sched]>;
5684   }
5687 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5688                                 SDNode VecNode, SDNode RndNode,
5689                                 X86SchedWriteSizes sched, bit IsCommutable> {
5690   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5691                               sched.PS.Scl, IsCommutable>,
5692              avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5693                               sched.PS.Scl>,
5694                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5695   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5696                               sched.PD.Scl, IsCommutable>,
5697              avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5698                               sched.PD.Scl>,
5699                               XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5700   let Predicates = [HasFP16] in
5701     defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5702                                 VecNode, sched.PH.Scl, IsCommutable>,
5703                avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5704                                 sched.PH.Scl>,
5705                                 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5708 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5709                               SDNode VecNode, SDNode SaeNode,
5710                               X86SchedWriteSizes sched, bit IsCommutable> {
5711   defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5712                               VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5713                               NAME#"SS">,
5714                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5715   defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5716                               VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5717                               NAME#"SD">,
5718                               XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5719   let Predicates = [HasFP16] in {
5720     defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5721                                 VecNode, SaeNode, sched.PH.Scl, IsCommutable,
5722                                 NAME#"SH">,
5723                                 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
5724                                 NotEVEX2VEXConvertible;
5725   }
5727 defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5728                                  SchedWriteFAddSizes, 1>;
5729 defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5730                                  SchedWriteFMulSizes, 1>;
5731 defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5732                                  SchedWriteFAddSizes, 0>;
5733 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5734                                  SchedWriteFDivSizes, 0>;
5735 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5736                                SchedWriteFCmpSizes, 0>;
5737 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5738                                SchedWriteFCmpSizes, 0>;
5740 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5741 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5742 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5743                                     X86VectorVTInfo _, SDNode OpNode,
5744                                     X86FoldableSchedWrite sched,
5745                                     string EVEX2VEXOvrd> {
5746   let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5747   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5748                          (ins _.FRC:$src1, _.FRC:$src2),
5749                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5750                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5751                           Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5752     let isCommutable = 1;
5753   }
5754   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5755                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5756                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5757                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5758                          (_.ScalarLdFrag addr:$src2)))]>,
5759                          Sched<[sched.Folded, sched.ReadAfterFold]>,
5760                          EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5761   }
5763 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5764                                          SchedWriteFCmp.Scl, "VMINCSS">, XS,
5765                                          EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5767 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5768                                          SchedWriteFCmp.Scl, "VMINCSD">, XD,
5769                                          REX_W, EVEX_4V, VEX_LIG,
5770                                          EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5772 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5773                                          SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5774                                          EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5776 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5777                                          SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5778                                          REX_W, EVEX_4V, VEX_LIG,
5779                                          EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5781 defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5782                                          SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
5783                                          EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5784                                          NotEVEX2VEXConvertible;
5785 defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5786                                          SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
5787                                          EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5788                                          NotEVEX2VEXConvertible;
5790 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5791                             SDPatternOperator MaskOpNode,
5792                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
5793                             bit IsCommutable,
5794                             bit IsKCommutable = IsCommutable,
5795                             string suffix = _.Suffix,
5796                             string ClobberConstraint = "",
5797                             bit MayRaiseFPException = 1> {
5798   let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5799       Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5800   defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5801                                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5802                                  "$src2, $src1", "$src1, $src2",
5803                                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5804                                  (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5805                                  IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>;
5806   let mayLoad = 1 in {
5807     defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5808                                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5809                                    "$src2, $src1", "$src1, $src2",
5810                                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5811                                    (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5812                                    ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5813     defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5814                                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5815                                     "${src2}"#_.BroadcastStr#", $src1",
5816                                     "$src1, ${src2}"#_.BroadcastStr,
5817                                     (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5818                                     (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5819                                     ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5820     }
5821   }
5824 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5825                                   SDPatternOperator OpNodeRnd,
5826                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
5827                                   string suffix = _.Suffix,
5828                                   string ClobberConstraint = ""> {
5829   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5830   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5831                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5832                   "$rc, $src2, $src1", "$src1, $src2, $rc",
5833                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5834                   0, 0, 0, vselect_mask, ClobberConstraint>,
5835                   EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5838 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5839                                 SDPatternOperator OpNodeSAE,
5840                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5841   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5842   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5843                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5844                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5845                   (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5846                   EVEX_4V, EVEX_B, Sched<[sched]>;
5849 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5850                              SDPatternOperator MaskOpNode,
5851                              Predicate prd, X86SchedWriteSizes sched,
5852                              bit IsCommutable = 0,
5853                              bit IsPD128Commutable = IsCommutable> {
5854   let Predicates = [prd] in {
5855   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5856                               sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5857                               EVEX_CD8<32, CD8VF>;
5858   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5859                               sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, REX_W,
5860                               EVEX_CD8<64, CD8VF>;
5861   }
5863     // Define only if AVX512VL feature is present.
5864   let Predicates = [prd, HasVLX] in {
5865     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5866                                    sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5867                                    EVEX_CD8<32, CD8VF>;
5868     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5869                                    sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5870                                    EVEX_CD8<32, CD8VF>;
5871     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5872                                    sched.PD.XMM, IsPD128Commutable,
5873                                    IsCommutable>, EVEX_V128, PD, REX_W,
5874                                    EVEX_CD8<64, CD8VF>;
5875     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5876                                    sched.PD.YMM, IsCommutable>, EVEX_V256, PD, REX_W,
5877                                    EVEX_CD8<64, CD8VF>;
5878   }
5881 multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5882                               SDPatternOperator MaskOpNode,
5883                               X86SchedWriteSizes sched, bit IsCommutable = 0> {
5884   let Predicates = [HasFP16] in {
5885     defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5886                                 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
5887                                 EVEX_CD8<16, CD8VF>;
5888   }
5889   let Predicates = [HasVLX, HasFP16] in {
5890     defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5891                                    sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
5892                                    EVEX_CD8<16, CD8VF>;
5893     defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5894                                    sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
5895                                    EVEX_CD8<16, CD8VF>;
5896   }
5899 let Uses = [MXCSR] in
5900 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5901                                    X86SchedWriteSizes sched> {
5902   let Predicates = [HasFP16] in {
5903     defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5904                                       v32f16_info>,
5905                                       EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5906   }
5907   defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5908                                     v16f32_info>,
5909                                     EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5910   defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5911                                     v8f64_info>,
5912                                     EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>;
5915 let Uses = [MXCSR] in
5916 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5917                                  X86SchedWriteSizes sched> {
5918   let Predicates = [HasFP16] in {
5919     defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5920                                     v32f16_info>,
5921                                     EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5922   }
5923   defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5924                                   v16f32_info>,
5925                                   EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5926   defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5927                                   v8f64_info>,
5928                                   EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>;
5931 defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5932                               SchedWriteFAddSizes, 1>,
5933             avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5934             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5935 defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5936                               SchedWriteFMulSizes, 1>,
5937             avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5938             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5939 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5940                               SchedWriteFAddSizes>,
5941             avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5942             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5943 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5944                               SchedWriteFDivSizes>,
5945             avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5946             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5947 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5948                               SchedWriteFCmpSizes, 0>,
5949             avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5950             avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5951 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5952                               SchedWriteFCmpSizes, 0>,
5953             avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5954             avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5955 let isCodeGenOnly = 1 in {
5956   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5957                                  SchedWriteFCmpSizes, 1>,
5958                avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5959                                  SchedWriteFCmpSizes, 1>;
5960   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5961                                  SchedWriteFCmpSizes, 1>,
5962                avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5963                                  SchedWriteFCmpSizes, 1>;
5965 let Uses = []<Register>, mayRaiseFPException = 0 in {
5966 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5967                                SchedWriteFLogicSizes, 1>;
5968 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5969                                SchedWriteFLogicSizes, 0>;
5970 defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5971                                SchedWriteFLogicSizes, 1>;
5972 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5973                                SchedWriteFLogicSizes, 1>;
5976 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5977                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5978   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5979   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5980                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5981                   "$src2, $src1", "$src1, $src2",
5982                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5983                   EVEX_4V, Sched<[sched]>;
5984   defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5985                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5986                   "$src2, $src1", "$src1, $src2",
5987                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5988                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5989   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5990                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5991                    "${src2}"#_.BroadcastStr#", $src1",
5992                    "$src1, ${src2}"#_.BroadcastStr,
5993                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5994                    EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5995   }
5998 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5999                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6000   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
6001   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6002                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
6003                   "$src2, $src1", "$src1, $src2",
6004                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
6005                   Sched<[sched]>;
6006   defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6007                   (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
6008                   "$src2, $src1", "$src1, $src2",
6009                   (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
6010                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6011   }
6014 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
6015                                 X86SchedWriteWidths sched> {
6016   let Predicates = [HasFP16] in {
6017     defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
6018                avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
6019                                 EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
6020     defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
6021                avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
6022                              EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
6023   }
6024   defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
6025              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
6026                               EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
6027   defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
6028              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
6029                               EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
6030   defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
6031              avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
6032                                     X86scalefsRnd, sched.Scl>,
6033                                     EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
6034   defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
6035              avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
6036                                     X86scalefsRnd, sched.Scl>,
6037                                     EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8PD;
6039   // Define only if AVX512VL feature is present.
6040   let Predicates = [HasVLX] in {
6041     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
6042                                    EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
6043     defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
6044                                    EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
6045     defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
6046                                    EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
6047     defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
6048                                    EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
6049   }
6051   let Predicates = [HasFP16, HasVLX] in {
6052     defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
6053                                    EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6054     defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
6055                                    EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6056   }
6058 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
6059                                     SchedWriteFAdd>, NotEVEX2VEXConvertible;
6061 //===----------------------------------------------------------------------===//
6062 // AVX-512  VPTESTM instructions
6063 //===----------------------------------------------------------------------===//
6065 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
6066                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6067   // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
6068   // There are just too many permutations due to commutability and bitcasts.
6069   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6070   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
6071                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6072                       "$src2, $src1", "$src1, $src2",
6073                    (null_frag), (null_frag), 1>,
6074                    EVEX_4V, Sched<[sched]>;
6075   let mayLoad = 1 in
6076   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6077                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6078                        "$src2, $src1", "$src1, $src2",
6079                    (null_frag), (null_frag)>,
6080                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6081                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6082   }
6085 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
6086                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6087   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
6088   defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6089                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6090                     "${src2}"#_.BroadcastStr#", $src1",
6091                     "$src1, ${src2}"#_.BroadcastStr,
6092                     (null_frag), (null_frag)>,
6093                     EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6094                     Sched<[sched.Folded, sched.ReadAfterFold]>;
6097 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
6098                                   X86SchedWriteWidths sched,
6099                                   AVX512VLVectorVTInfo _> {
6100   let Predicates  = [HasAVX512] in
6101   defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
6102            avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
6104   let Predicates = [HasAVX512, HasVLX] in {
6105   defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
6106               avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
6107   defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
6108               avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
6109   }
6112 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
6113                             X86SchedWriteWidths sched> {
6114   defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
6115                                  avx512vl_i32_info>;
6116   defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
6117                                  avx512vl_i64_info>, REX_W;
6120 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
6121                             X86SchedWriteWidths sched> {
6122   let Predicates = [HasBWI] in {
6123   defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
6124                             v32i16_info>, EVEX_V512, REX_W;
6125   defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
6126                             v64i8_info>, EVEX_V512;
6127   }
6129   let Predicates = [HasVLX, HasBWI] in {
6130   defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
6131                             v16i16x_info>, EVEX_V256, REX_W;
6132   defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
6133                             v8i16x_info>, EVEX_V128, REX_W;
6134   defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
6135                             v32i8x_info>, EVEX_V256;
6136   defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
6137                             v16i8x_info>, EVEX_V128;
6138   }
6141 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
6142                                    X86SchedWriteWidths sched> :
6143   avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
6144   avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
6146 defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
6147                                          SchedWriteVecLogic>, T8PD;
6148 defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
6149                                          SchedWriteVecLogic>, T8XS;
6151 //===----------------------------------------------------------------------===//
6152 // AVX-512  Shift instructions
6153 //===----------------------------------------------------------------------===//
6155 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
6156                             string OpcodeStr, SDNode OpNode,
6157                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6158   let ExeDomain = _.ExeDomain in {
6159   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
6160                    (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
6161                       "$src2, $src1", "$src1, $src2",
6162                    (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
6163                    Sched<[sched]>;
6164   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6165                    (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
6166                        "$src2, $src1", "$src1, $src2",
6167                    (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
6168                           (i8 timm:$src2)))>,
6169                    Sched<[sched.Folded]>;
6170   }
6173 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
6174                              string OpcodeStr, SDNode OpNode,
6175                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6176   let ExeDomain = _.ExeDomain in
6177   defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6178                    (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
6179       "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
6180      (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
6181      EVEX_B, Sched<[sched.Folded]>;
6184 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6185                             X86FoldableSchedWrite sched, ValueType SrcVT,
6186                             X86VectorVTInfo _> {
6187    // src2 is always 128-bit
6188   let ExeDomain = _.ExeDomain in {
6189   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6190                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
6191                       "$src2, $src1", "$src1, $src2",
6192                    (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
6193                    AVX512BIBase, EVEX_4V, Sched<[sched]>;
6194   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6195                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
6196                        "$src2, $src1", "$src1, $src2",
6197                    (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
6198                    AVX512BIBase,
6199                    EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6200   }
6203 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6204                               X86SchedWriteWidths sched, ValueType SrcVT,
6205                               AVX512VLVectorVTInfo VTInfo,
6206                               Predicate prd> {
6207   let Predicates = [prd] in
6208   defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
6209                                VTInfo.info512>, EVEX_V512,
6210                                EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
6211   let Predicates = [prd, HasVLX] in {
6212   defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
6213                                VTInfo.info256>, EVEX_V256,
6214                                EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
6215   defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
6216                                VTInfo.info128>, EVEX_V128,
6217                                EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
6218   }
6221 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
6222                               string OpcodeStr, SDNode OpNode,
6223                               X86SchedWriteWidths sched,
6224                               bit NotEVEX2VEXConvertibleQ = 0> {
6225   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
6226                               avx512vl_i32_info, HasAVX512>;
6227   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6228   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
6229                               avx512vl_i64_info, HasAVX512>, REX_W;
6230   defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
6231                               avx512vl_i16_info, HasBWI>;
6234 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6235                                   string OpcodeStr, SDNode OpNode,
6236                                   X86SchedWriteWidths sched,
6237                                   AVX512VLVectorVTInfo VTInfo> {
6238   let Predicates = [HasAVX512] in
6239   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6240                               sched.ZMM, VTInfo.info512>,
6241              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
6242                                VTInfo.info512>, EVEX_V512;
6243   let Predicates = [HasAVX512, HasVLX] in {
6244   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6245                               sched.YMM, VTInfo.info256>,
6246              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
6247                                VTInfo.info256>, EVEX_V256;
6248   defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6249                               sched.XMM, VTInfo.info128>,
6250              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
6251                                VTInfo.info128>, EVEX_V128;
6252   }
6255 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
6256                               string OpcodeStr, SDNode OpNode,
6257                               X86SchedWriteWidths sched> {
6258   let Predicates = [HasBWI] in
6259   defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6260                                sched.ZMM, v32i16_info>, EVEX_V512, WIG;
6261   let Predicates = [HasVLX, HasBWI] in {
6262   defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6263                                sched.YMM, v16i16x_info>, EVEX_V256, WIG;
6264   defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6265                                sched.XMM, v8i16x_info>, EVEX_V128, WIG;
6266   }
6269 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6270                                Format ImmFormR, Format ImmFormM,
6271                                string OpcodeStr, SDNode OpNode,
6272                                X86SchedWriteWidths sched,
6273                                bit NotEVEX2VEXConvertibleQ = 0> {
6274   defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6275                                  sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6276   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6277   defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6278                                  sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
6281 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6282                                  SchedWriteVecShiftImm>,
6283              avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6284                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6286 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6287                                  SchedWriteVecShiftImm>,
6288              avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6289                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6291 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6292                                  SchedWriteVecShiftImm, 1>,
6293              avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6294                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6296 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6297                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6298 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6299                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6301 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6302                                 SchedWriteVecShift>;
6303 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6304                                 SchedWriteVecShift, 1>;
6305 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6306                                 SchedWriteVecShift>;
6308 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6309 let Predicates = [HasAVX512, NoVLX] in {
6310   def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6311             (EXTRACT_SUBREG (v8i64
6312               (VPSRAQZrr
6313                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6314                  VR128X:$src2)), sub_ymm)>;
6316   def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6317             (EXTRACT_SUBREG (v8i64
6318               (VPSRAQZrr
6319                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6320                  VR128X:$src2)), sub_xmm)>;
6322   def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6323             (EXTRACT_SUBREG (v8i64
6324               (VPSRAQZri
6325                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6326                  timm:$src2)), sub_ymm)>;
6328   def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6329             (EXTRACT_SUBREG (v8i64
6330               (VPSRAQZri
6331                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6332                  timm:$src2)), sub_xmm)>;
6335 //===-------------------------------------------------------------------===//
6336 // Variable Bit Shifts
6337 //===-------------------------------------------------------------------===//
6339 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6340                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6341   let ExeDomain = _.ExeDomain in {
6342   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6343                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6344                       "$src2, $src1", "$src1, $src2",
6345                    (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6346                    AVX5128IBase, EVEX_4V, Sched<[sched]>;
6347   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6348                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6349                        "$src2, $src1", "$src1, $src2",
6350                    (_.VT (OpNode _.RC:$src1,
6351                    (_.VT (_.LdFrag addr:$src2))))>,
6352                    AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6353                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6354   }
6357 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6358                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6359   let ExeDomain = _.ExeDomain in
6360   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6361                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6362                     "${src2}"#_.BroadcastStr#", $src1",
6363                     "$src1, ${src2}"#_.BroadcastStr,
6364                     (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6365                     AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6366                     Sched<[sched.Folded, sched.ReadAfterFold]>;
6369 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6370                                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6371   let Predicates  = [HasAVX512] in
6372   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6373            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6375   let Predicates = [HasAVX512, HasVLX] in {
6376   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6377               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6378   defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6379               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6380   }
6383 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6384                                   SDNode OpNode, X86SchedWriteWidths sched> {
6385   defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6386                                  avx512vl_i32_info>;
6387   defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6388                                  avx512vl_i64_info>, REX_W;
6391 // Use 512bit version to implement 128/256 bit in case NoVLX.
6392 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6393                                      SDNode OpNode, list<Predicate> p> {
6394   let Predicates = p in {
6395   def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6396                                   (_.info256.VT _.info256.RC:$src2))),
6397             (EXTRACT_SUBREG
6398                 (!cast<Instruction>(OpcodeStr#"Zrr")
6399                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6400                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6401              sub_ymm)>;
6403   def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6404                                   (_.info128.VT _.info128.RC:$src2))),
6405             (EXTRACT_SUBREG
6406                 (!cast<Instruction>(OpcodeStr#"Zrr")
6407                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6408                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6409              sub_xmm)>;
6410   }
6412 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6413                               SDNode OpNode, X86SchedWriteWidths sched> {
6414   let Predicates = [HasBWI] in
6415   defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6416               EVEX_V512, REX_W;
6417   let Predicates = [HasVLX, HasBWI] in {
6419   defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6420               EVEX_V256, REX_W;
6421   defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6422               EVEX_V128, REX_W;
6423   }
6426 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6427               avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6429 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6430               avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6432 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6433               avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6435 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6436 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6438 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6439 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6440 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6441 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6444 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6445 let Predicates = [HasAVX512, NoVLX] in {
6446   def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6447             (EXTRACT_SUBREG (v8i64
6448               (VPROLVQZrr
6449                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6450                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6451                        sub_xmm)>;
6452   def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6453             (EXTRACT_SUBREG (v8i64
6454               (VPROLVQZrr
6455                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6456                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6457                        sub_ymm)>;
6459   def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6460             (EXTRACT_SUBREG (v16i32
6461               (VPROLVDZrr
6462                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6463                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6464                         sub_xmm)>;
6465   def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6466             (EXTRACT_SUBREG (v16i32
6467               (VPROLVDZrr
6468                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6469                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6470                         sub_ymm)>;
6472   def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6473             (EXTRACT_SUBREG (v8i64
6474               (VPROLQZri
6475                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6476                         timm:$src2)), sub_xmm)>;
6477   def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6478             (EXTRACT_SUBREG (v8i64
6479               (VPROLQZri
6480                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6481                        timm:$src2)), sub_ymm)>;
6483   def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6484             (EXTRACT_SUBREG (v16i32
6485               (VPROLDZri
6486                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6487                         timm:$src2)), sub_xmm)>;
6488   def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6489             (EXTRACT_SUBREG (v16i32
6490               (VPROLDZri
6491                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6492                         timm:$src2)), sub_ymm)>;
6495 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6496 let Predicates = [HasAVX512, NoVLX] in {
6497   def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6498             (EXTRACT_SUBREG (v8i64
6499               (VPRORVQZrr
6500                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6501                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6502                        sub_xmm)>;
6503   def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6504             (EXTRACT_SUBREG (v8i64
6505               (VPRORVQZrr
6506                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6507                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6508                        sub_ymm)>;
6510   def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6511             (EXTRACT_SUBREG (v16i32
6512               (VPRORVDZrr
6513                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6514                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6515                         sub_xmm)>;
6516   def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6517             (EXTRACT_SUBREG (v16i32
6518               (VPRORVDZrr
6519                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6520                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6521                         sub_ymm)>;
6523   def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6524             (EXTRACT_SUBREG (v8i64
6525               (VPRORQZri
6526                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6527                         timm:$src2)), sub_xmm)>;
6528   def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6529             (EXTRACT_SUBREG (v8i64
6530               (VPRORQZri
6531                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6532                        timm:$src2)), sub_ymm)>;
6534   def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6535             (EXTRACT_SUBREG (v16i32
6536               (VPRORDZri
6537                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6538                         timm:$src2)), sub_xmm)>;
6539   def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6540             (EXTRACT_SUBREG (v16i32
6541               (VPRORDZri
6542                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6543                         timm:$src2)), sub_ymm)>;
6546 //===-------------------------------------------------------------------===//
6547 // 1-src variable permutation VPERMW/D/Q
6548 //===-------------------------------------------------------------------===//
6550 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6551                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6552   let Predicates  = [HasAVX512] in
6553   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6554            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6556   let Predicates = [HasAVX512, HasVLX] in
6557   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6558               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6561 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6562                                  string OpcodeStr, SDNode OpNode,
6563                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6564   let Predicates = [HasAVX512] in
6565   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6566                               sched, VTInfo.info512>,
6567              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6568                                sched, VTInfo.info512>, EVEX_V512;
6569   let Predicates = [HasAVX512, HasVLX] in
6570   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6571                               sched, VTInfo.info256>,
6572              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6573                                sched, VTInfo.info256>, EVEX_V256;
6576 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6577                               Predicate prd, SDNode OpNode,
6578                               X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6579   let Predicates = [prd] in
6580   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6581               EVEX_V512 ;
6582   let Predicates = [HasVLX, prd] in {
6583   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6584               EVEX_V256 ;
6585   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6586               EVEX_V128 ;
6587   }
6590 defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6591                                WriteVarShuffle256, avx512vl_i16_info>, REX_W;
6592 defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6593                                WriteVarShuffle256, avx512vl_i8_info>;
6595 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6596                                     WriteVarShuffle256, avx512vl_i32_info>;
6597 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6598                                     WriteVarShuffle256, avx512vl_i64_info>, REX_W;
6599 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6600                                      WriteFVarShuffle256, avx512vl_f32_info>;
6601 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6602                                      WriteFVarShuffle256, avx512vl_f64_info>, REX_W;
6604 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6605                              X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6606                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6607 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6608                              X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6609                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6611 //===----------------------------------------------------------------------===//
6612 // AVX-512 - VPERMIL
6613 //===----------------------------------------------------------------------===//
6615 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6616                              X86FoldableSchedWrite sched, X86VectorVTInfo _,
6617                              X86VectorVTInfo Ctrl> {
6618   defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6619                   (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6620                   "$src2, $src1", "$src1, $src2",
6621                   (_.VT (OpNode _.RC:$src1,
6622                                (Ctrl.VT Ctrl.RC:$src2)))>,
6623                   T8PD, EVEX_4V, Sched<[sched]>;
6624   defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6625                   (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6626                   "$src2, $src1", "$src1, $src2",
6627                   (_.VT (OpNode
6628                            _.RC:$src1,
6629                            (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6630                   T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6631                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6632   defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6633                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6634                    "${src2}"#_.BroadcastStr#", $src1",
6635                    "$src1, ${src2}"#_.BroadcastStr,
6636                    (_.VT (OpNode
6637                             _.RC:$src1,
6638                             (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6639                    T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6640                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6643 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6644                                     X86SchedWriteWidths sched,
6645                                     AVX512VLVectorVTInfo _,
6646                                     AVX512VLVectorVTInfo Ctrl> {
6647   let Predicates = [HasAVX512] in {
6648     defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6649                                   _.info512, Ctrl.info512>, EVEX_V512;
6650   }
6651   let Predicates = [HasAVX512, HasVLX] in {
6652     defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6653                                   _.info128, Ctrl.info128>, EVEX_V128;
6654     defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6655                                   _.info256, Ctrl.info256>, EVEX_V256;
6656   }
6659 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6660                          AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6661   defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6662                                       _, Ctrl>;
6663   defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6664                                     X86VPermilpi, SchedWriteFShuffle, _>,
6665                     EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6668 let ExeDomain = SSEPackedSingle in
6669 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6670                                avx512vl_i32_info>;
6671 let ExeDomain = SSEPackedDouble in
6672 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6673                                avx512vl_i64_info>, VEX_W1X;
6675 //===----------------------------------------------------------------------===//
6676 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6677 //===----------------------------------------------------------------------===//
6679 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6680                              X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6681                              EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6682 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6683                                   X86PShufhw, SchedWriteShuffle>,
6684                                   EVEX, AVX512XSIi8Base;
6685 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6686                                   X86PShuflw, SchedWriteShuffle>,
6687                                   EVEX, AVX512XDIi8Base;
6689 //===----------------------------------------------------------------------===//
6690 // AVX-512 - VPSHUFB
6691 //===----------------------------------------------------------------------===//
6693 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6694                                X86SchedWriteWidths sched> {
6695   let Predicates = [HasBWI] in
6696   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6697                               EVEX_V512;
6699   let Predicates = [HasVLX, HasBWI] in {
6700   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6701                               EVEX_V256;
6702   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6703                               EVEX_V128;
6704   }
6707 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6708                                   SchedWriteVarShuffle>, WIG;
6710 //===----------------------------------------------------------------------===//
6711 // Move Low to High and High to Low packed FP Instructions
6712 //===----------------------------------------------------------------------===//
6714 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6715           (ins VR128X:$src1, VR128X:$src2),
6716           "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6717           [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6718           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6719 let isCommutable = 1 in
6720 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6721           (ins VR128X:$src1, VR128X:$src2),
6722           "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6723           [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6724           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6726 //===----------------------------------------------------------------------===//
6727 // VMOVHPS/PD VMOVLPS Instructions
6728 // All patterns was taken from SSS implementation.
6729 //===----------------------------------------------------------------------===//
6731 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6732                                   SDPatternOperator OpNode,
6733                                   X86VectorVTInfo _> {
6734   let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6735   def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6736                   (ins _.RC:$src1, f64mem:$src2),
6737                   !strconcat(OpcodeStr,
6738                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6739                   [(set _.RC:$dst,
6740                      (OpNode _.RC:$src1,
6741                        (_.VT (bitconvert
6742                          (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6743                   Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6746 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6747 // SSE1. And MOVLPS pattern is even more complex.
6748 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6749                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6750 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6751                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W;
6752 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6753                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6754 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6755                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W;
6757 let Predicates = [HasAVX512] in {
6758   // VMOVHPD patterns
6759   def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6760             (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6762   // VMOVLPD patterns
6763   def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6764             (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6767 let SchedRW = [WriteFStore] in {
6768 let mayStore = 1, hasSideEffects = 0 in
6769 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6770                        (ins f64mem:$dst, VR128X:$src),
6771                        "vmovhps\t{$src, $dst|$dst, $src}",
6772                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6773 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6774                        (ins f64mem:$dst, VR128X:$src),
6775                        "vmovhpd\t{$src, $dst|$dst, $src}",
6776                        [(store (f64 (extractelt
6777                                      (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6778                                      (iPTR 0))), addr:$dst)]>,
6779                        EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6780 let mayStore = 1, hasSideEffects = 0 in
6781 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6782                        (ins f64mem:$dst, VR128X:$src),
6783                        "vmovlps\t{$src, $dst|$dst, $src}",
6784                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6785 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6786                        (ins f64mem:$dst, VR128X:$src),
6787                        "vmovlpd\t{$src, $dst|$dst, $src}",
6788                        [(store (f64 (extractelt (v2f64 VR128X:$src),
6789                                      (iPTR 0))), addr:$dst)]>,
6790                        EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6791 } // SchedRW
6793 let Predicates = [HasAVX512] in {
6794   // VMOVHPD patterns
6795   def : Pat<(store (f64 (extractelt
6796                            (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6797                            (iPTR 0))), addr:$dst),
6798            (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6800 //===----------------------------------------------------------------------===//
6801 // FMA - Fused Multiply Operations
6804 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6805                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6806                                X86VectorVTInfo _> {
6807   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6808       Uses = [MXCSR], mayRaiseFPException = 1 in {
6809   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6810           (ins _.RC:$src2, _.RC:$src3),
6811           OpcodeStr, "$src3, $src2", "$src2, $src3",
6812           (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6813           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6814           EVEX_4V, Sched<[sched]>;
6816   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6817           (ins _.RC:$src2, _.MemOp:$src3),
6818           OpcodeStr, "$src3, $src2", "$src2, $src3",
6819           (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6820           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6821           EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
6822                           sched.ReadAfterFold]>;
6824   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6825             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6826             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6827             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6828             (OpNode _.RC:$src2,
6829              _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6830             (MaskOpNode _.RC:$src2,
6831              _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6832             EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6833                                     sched.ReadAfterFold]>;
6834   }
6837 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6838                                  X86FoldableSchedWrite sched,
6839                                  X86VectorVTInfo _> {
6840   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6841       Uses = [MXCSR] in
6842   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6843           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6844           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6845           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6846           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6847           EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6850 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6851                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6852                                    X86SchedWriteWidths sched,
6853                                    AVX512VLVectorVTInfo _,
6854                                    Predicate prd = HasAVX512> {
6855   let Predicates = [prd] in {
6856     defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6857                                       sched.ZMM, _.info512>,
6858                   avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6859                                         _.info512>,
6860                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6861   }
6862   let Predicates = [HasVLX, prd] in {
6863     defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6864                                     sched.YMM, _.info256>,
6865                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6866     defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6867                                     sched.XMM, _.info128>,
6868                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6869   }
6872 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6873                               SDNode MaskOpNode, SDNode OpNodeRnd> {
6874     defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6875                                       OpNodeRnd, SchedWriteFMA,
6876                                       avx512vl_f16_info, HasFP16>, T_MAP6PD;
6877     defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6878                                       OpNodeRnd, SchedWriteFMA,
6879                                       avx512vl_f32_info>, T8PD;
6880     defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6881                                       OpNodeRnd, SchedWriteFMA,
6882                                       avx512vl_f64_info>, T8PD, REX_W;
6885 defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6886                                        fma, X86FmaddRnd>;
6887 defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6888                                        X86Fmsub, X86FmsubRnd>;
6889 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6890                                        X86Fmaddsub, X86FmaddsubRnd>;
6891 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6892                                        X86Fmsubadd, X86FmsubaddRnd>;
6893 defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6894                                        X86Fnmadd, X86FnmaddRnd>;
6895 defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6896                                        X86Fnmsub, X86FnmsubRnd>;
6899 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6900                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6901                                X86VectorVTInfo _> {
6902   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6903       Uses = [MXCSR], mayRaiseFPException = 1 in {
6904   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6905           (ins _.RC:$src2, _.RC:$src3),
6906           OpcodeStr, "$src3, $src2", "$src2, $src3",
6907           (null_frag),
6908           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6909           EVEX_4V, Sched<[sched]>;
6911   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6912           (ins _.RC:$src2, _.MemOp:$src3),
6913           OpcodeStr, "$src3, $src2", "$src2, $src3",
6914           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6915           (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6916           EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
6917                           sched.ReadAfterFold]>;
6919   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6920          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6921          OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6922          "$src2, ${src3}"#_.BroadcastStr,
6923          (_.VT (OpNode _.RC:$src2,
6924                       (_.VT (_.BroadcastLdFrag addr:$src3)),
6925                       _.RC:$src1)),
6926          (_.VT (MaskOpNode _.RC:$src2,
6927                            (_.VT (_.BroadcastLdFrag addr:$src3)),
6928                            _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
6929          Sched<[sched.Folded, sched.ReadAfterFold,
6930                 sched.ReadAfterFold]>;
6931   }
6934 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6935                                  X86FoldableSchedWrite sched,
6936                                  X86VectorVTInfo _> {
6937   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6938       Uses = [MXCSR] in
6939   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6940           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6941           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6942           (null_frag),
6943           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6944           1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6947 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6948                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6949                                    X86SchedWriteWidths sched,
6950                                    AVX512VLVectorVTInfo _,
6951                                    Predicate prd = HasAVX512> {
6952   let Predicates = [prd] in {
6953     defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6954                                       sched.ZMM, _.info512>,
6955                   avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6956                                         _.info512>,
6957                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6958   }
6959   let Predicates = [HasVLX, prd] in {
6960     defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6961                                     sched.YMM, _.info256>,
6962                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6963     defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6964                                     sched.XMM, _.info128>,
6965                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6966   }
6969 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6970                               SDNode MaskOpNode, SDNode OpNodeRnd > {
6971     defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6972                                       OpNodeRnd, SchedWriteFMA,
6973                                       avx512vl_f16_info, HasFP16>, T_MAP6PD;
6974     defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6975                                       OpNodeRnd, SchedWriteFMA,
6976                                       avx512vl_f32_info>, T8PD;
6977     defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6978                                       OpNodeRnd, SchedWriteFMA,
6979                                       avx512vl_f64_info>, T8PD, REX_W;
6982 defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6983                                        fma, X86FmaddRnd>;
6984 defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6985                                        X86Fmsub, X86FmsubRnd>;
6986 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6987                                        X86Fmaddsub, X86FmaddsubRnd>;
6988 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6989                                        X86Fmsubadd, X86FmsubaddRnd>;
6990 defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6991                                        X86Fnmadd, X86FnmaddRnd>;
6992 defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6993                                        X86Fnmsub, X86FnmsubRnd>;
6995 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6996                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6997                                X86VectorVTInfo _> {
6998   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6999       Uses = [MXCSR], mayRaiseFPException = 1 in {
7000   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
7001           (ins _.RC:$src2, _.RC:$src3),
7002           OpcodeStr, "$src3, $src2", "$src2, $src3",
7003           (null_frag),
7004           (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
7005           EVEX_4V, Sched<[sched]>;
7007   // Pattern is 312 order so that the load is in a different place from the
7008   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7009   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
7010           (ins _.RC:$src2, _.MemOp:$src3),
7011           OpcodeStr, "$src3, $src2", "$src2, $src3",
7012           (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
7013           (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
7014           EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
7015                           sched.ReadAfterFold]>;
7017   // Pattern is 312 order so that the load is in a different place from the
7018   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7019   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
7020          (ins _.RC:$src2, _.ScalarMemOp:$src3),
7021          OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
7022          "$src2, ${src3}"#_.BroadcastStr,
7023          (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
7024                        _.RC:$src1, _.RC:$src2)),
7025          (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
7026                            _.RC:$src1, _.RC:$src2)), 1, 0>,
7027          EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7028                                  sched.ReadAfterFold]>;
7029   }
7032 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
7033                                  X86FoldableSchedWrite sched,
7034                                  X86VectorVTInfo _> {
7035   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
7036       Uses = [MXCSR] in
7037   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
7038           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7039           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
7040           (null_frag),
7041           (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
7042           1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
7045 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7046                                    SDNode MaskOpNode, SDNode OpNodeRnd,
7047                                    X86SchedWriteWidths sched,
7048                                    AVX512VLVectorVTInfo _,
7049                                    Predicate prd = HasAVX512> {
7050   let Predicates = [prd] in {
7051     defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7052                                       sched.ZMM, _.info512>,
7053                   avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
7054                                         _.info512>,
7055                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7056   }
7057   let Predicates = [HasVLX, prd] in {
7058     defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7059                                     sched.YMM, _.info256>,
7060                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7061     defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7062                                     sched.XMM, _.info128>,
7063                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7064   }
7067 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7068                               SDNode MaskOpNode, SDNode OpNodeRnd > {
7069     defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
7070                                       OpNodeRnd, SchedWriteFMA,
7071                                       avx512vl_f16_info, HasFP16>, T_MAP6PD;
7072     defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
7073                                       OpNodeRnd, SchedWriteFMA,
7074                                       avx512vl_f32_info>, T8PD;
7075     defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
7076                                       OpNodeRnd, SchedWriteFMA,
7077                                       avx512vl_f64_info>, T8PD, REX_W;
7080 defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
7081                                        fma, X86FmaddRnd>;
7082 defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
7083                                        X86Fmsub, X86FmsubRnd>;
7084 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
7085                                        X86Fmaddsub, X86FmaddsubRnd>;
7086 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
7087                                        X86Fmsubadd, X86FmsubaddRnd>;
7088 defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
7089                                        X86Fnmadd, X86FnmaddRnd>;
7090 defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
7091                                        X86Fnmsub, X86FnmsubRnd>;
7093 // Scalar FMA
7094 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7095                                dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
7096 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
7097   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7098           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
7099           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7100           EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
7102   let mayLoad = 1 in
7103   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7104           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
7105           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7106           EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
7107                           SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
7109   let Uses = [MXCSR] in
7110   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7111          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7112          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
7113          EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
7115   let isCodeGenOnly = 1, isCommutable = 1 in {
7116     def r     : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7117                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
7118                      !strconcat(OpcodeStr,
7119                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7120                      !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC;
7121     def m     : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
7122                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
7123                     !strconcat(OpcodeStr,
7124                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7125                     [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
7126                                      SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
7128     let Uses = [MXCSR] in
7129     def rb    : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7130                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
7131                      !strconcat(OpcodeStr,
7132                               "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
7133                      !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
7134                      Sched<[SchedWriteFMA.Scl]>, EVEX_4V;
7135   }// isCodeGenOnly = 1
7136 }// Constraints = "$src1 = $dst"
7139 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7140                             string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
7141                             X86VectorVTInfo _, string SUFF> {
7142   let ExeDomain = _.ExeDomain in {
7143   defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
7144                 // Operands for intrinsic are in 123 order to preserve passthu
7145                 // semantics.
7146                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7147                          _.FRC:$src3))),
7148                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7149                          (_.ScalarLdFrag addr:$src3)))),
7150                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
7151                          _.FRC:$src3, (i32 timm:$rc)))), 0>;
7153   defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
7154                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
7155                                           _.FRC:$src1))),
7156                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
7157                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
7158                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
7159                          _.FRC:$src1, (i32 timm:$rc)))), 1>;
7161   // One pattern is 312 order so that the load is in a different place from the
7162   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7163   defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
7164                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
7165                          _.FRC:$src2))),
7166                 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
7167                                  _.FRC:$src1, _.FRC:$src2))),
7168                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
7169                          _.FRC:$src2, (i32 timm:$rc)))), 1>;
7170   }
7173 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7174                         string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
7175   let Predicates = [HasAVX512] in {
7176     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7177                                  OpNodeRnd, f32x_info, "SS">,
7178                                  EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
7179     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7180                                  OpNodeRnd, f64x_info, "SD">,
7181                                  EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD;
7182   }
7183   let Predicates = [HasFP16] in {
7184     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7185                                  OpNodeRnd, f16x_info, "SH">,
7186                                  EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD;
7187   }
7190 defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
7191 defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
7192 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
7193 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
7195 multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
7196                                       SDNode RndOp, string Prefix,
7197                                       string Suffix, SDNode Move,
7198                                       X86VectorVTInfo _, PatLeaf ZeroFP,
7199                                       Predicate prd = HasAVX512> {
7200   let Predicates = [prd] in {
7201     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7202                 (Op _.FRC:$src2,
7203                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7204                     _.FRC:$src3))))),
7205               (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
7206                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7207                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7209     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7210                 (Op _.FRC:$src2, _.FRC:$src3,
7211                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7212               (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
7213                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7214                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7216     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7217                 (Op _.FRC:$src2,
7218                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7219                     (_.ScalarLdFrag addr:$src3)))))),
7220               (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
7221                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7222                addr:$src3)>;
7224     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7225                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7226                     (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
7227               (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
7228                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7229                addr:$src3)>;
7231     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7232                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7233                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7234               (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
7235                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7236                addr:$src3)>;
7238     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7239                (X86selects_mask VK1WM:$mask,
7240                 (MaskedOp _.FRC:$src2,
7241                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7242                     _.FRC:$src3),
7243                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7244               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
7245                VR128X:$src1, VK1WM:$mask,
7246                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7247                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7249     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7250                (X86selects_mask VK1WM:$mask,
7251                 (MaskedOp _.FRC:$src2,
7252                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7253                     (_.ScalarLdFrag addr:$src3)),
7254                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7255               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
7256                VR128X:$src1, VK1WM:$mask,
7257                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7259     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7260                (X86selects_mask VK1WM:$mask,
7261                 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7262                           (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7263                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7264               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7265                VR128X:$src1, VK1WM:$mask,
7266                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7268     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7269                (X86selects_mask VK1WM:$mask,
7270                 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7271                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7272                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7273               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7274                VR128X:$src1, VK1WM:$mask,
7275                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7276                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7278     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7279                (X86selects_mask VK1WM:$mask,
7280                 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7281                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7282                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7283               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7284                VR128X:$src1, VK1WM:$mask,
7285                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7287     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7288                (X86selects_mask VK1WM:$mask,
7289                 (MaskedOp _.FRC:$src2,
7290                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7291                           _.FRC:$src3),
7292                 (_.EltVT ZeroFP)))))),
7293               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7294                VR128X:$src1, VK1WM:$mask,
7295                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7296                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7298     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7299                (X86selects_mask VK1WM:$mask,
7300                 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7301                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7302                 (_.EltVT ZeroFP)))))),
7303               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7304                VR128X:$src1, VK1WM:$mask,
7305                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7306                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7308     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7309                (X86selects_mask VK1WM:$mask,
7310                 (MaskedOp _.FRC:$src2,
7311                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7312                           (_.ScalarLdFrag addr:$src3)),
7313                 (_.EltVT ZeroFP)))))),
7314               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7315                VR128X:$src1, VK1WM:$mask,
7316                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7318     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7319                (X86selects_mask VK1WM:$mask,
7320                 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7321                           _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7322                 (_.EltVT ZeroFP)))))),
7323               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7324                VR128X:$src1, VK1WM:$mask,
7325                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7327     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7328                (X86selects_mask VK1WM:$mask,
7329                 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7330                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7331                 (_.EltVT ZeroFP)))))),
7332               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7333                VR128X:$src1, VK1WM:$mask,
7334                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7336     // Patterns with rounding mode.
7337     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7338                 (RndOp _.FRC:$src2,
7339                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7340                        _.FRC:$src3, (i32 timm:$rc)))))),
7341               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7342                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7343                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7345     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7346                 (RndOp _.FRC:$src2, _.FRC:$src3,
7347                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7348                        (i32 timm:$rc)))))),
7349               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7350                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7351                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7353     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7354                (X86selects_mask VK1WM:$mask,
7355                 (RndOp _.FRC:$src2,
7356                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7357                        _.FRC:$src3, (i32 timm:$rc)),
7358                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7359               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7360                VR128X:$src1, VK1WM:$mask,
7361                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7362                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7364     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7365                (X86selects_mask VK1WM:$mask,
7366                 (RndOp _.FRC:$src2, _.FRC:$src3,
7367                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7368                        (i32 timm:$rc)),
7369                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7370               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7371                VR128X:$src1, VK1WM:$mask,
7372                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7373                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7375     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7376                (X86selects_mask VK1WM:$mask,
7377                 (RndOp _.FRC:$src2,
7378                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7379                        _.FRC:$src3, (i32 timm:$rc)),
7380                 (_.EltVT ZeroFP)))))),
7381               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7382                VR128X:$src1, VK1WM:$mask,
7383                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7384                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7386     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7387                (X86selects_mask VK1WM:$mask,
7388                 (RndOp _.FRC:$src2, _.FRC:$src3,
7389                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7390                        (i32 timm:$rc)),
7391                 (_.EltVT ZeroFP)))))),
7392               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7393                VR128X:$src1, VK1WM:$mask,
7394                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7395                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7396   }
7398 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7399                                   X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7400 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7401                                   X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7402 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7403                                   X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7404 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7405                                   X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7407 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7408                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7409 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7410                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7411 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7412                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7413 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7414                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7416 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7417                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7418 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7419                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7420 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7421                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7422 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7423                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7425 //===----------------------------------------------------------------------===//
7426 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7427 //===----------------------------------------------------------------------===//
7428 let Constraints = "$src1 = $dst" in {
7429 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7430                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7431   // NOTE: The SDNode have the multiply operands first with the add last.
7432   // This enables commuted load patterns to be autogenerated by tablegen.
7433   let ExeDomain = _.ExeDomain in {
7434   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7435           (ins _.RC:$src2, _.RC:$src3),
7436           OpcodeStr, "$src3, $src2", "$src2, $src3",
7437           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7438           T8PD, EVEX_4V, Sched<[sched]>;
7440   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7441           (ins _.RC:$src2, _.MemOp:$src3),
7442           OpcodeStr, "$src3, $src2", "$src2, $src3",
7443           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7444           T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
7445                                 sched.ReadAfterFold]>;
7447   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7448             (ins _.RC:$src2, _.ScalarMemOp:$src3),
7449             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7450             !strconcat("$src2, ${src3}", _.BroadcastStr ),
7451             (OpNode _.RC:$src2,
7452                     (_.VT (_.BroadcastLdFrag addr:$src3)),
7453                     _.RC:$src1)>,
7454             T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7455                                           sched.ReadAfterFold]>;
7456   }
7458 } // Constraints = "$src1 = $dst"
7460 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7461                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7462   let Predicates = [HasIFMA] in {
7463     defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7464                       EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7465   }
7466   let Predicates = [HasVLX, HasIFMA] in {
7467     defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7468                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7469     defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7470                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7471   }
7474 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7475                                          SchedWriteVecIMul, avx512vl_i64_info>,
7476                                          REX_W;
7477 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7478                                          SchedWriteVecIMul, avx512vl_i64_info>,
7479                                          REX_W;
7481 //===----------------------------------------------------------------------===//
7482 // AVX-512  Scalar convert from sign integer to float/double
7483 //===----------------------------------------------------------------------===//
7485 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7486                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
7487                     X86MemOperand x86memop, PatFrag ld_frag, string asm,
7488                     string mem, list<Register> _Uses = [MXCSR],
7489                     bit _mayRaiseFPException = 1> {
7490 let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7491     mayRaiseFPException = _mayRaiseFPException in {
7492   let hasSideEffects = 0, isCodeGenOnly = 1 in {
7493     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7494               (ins DstVT.FRC:$src1, SrcRC:$src),
7495               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7496               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7497     let mayLoad = 1 in
7498       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7499               (ins DstVT.FRC:$src1, x86memop:$src),
7500               asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7501               EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7502   } // hasSideEffects = 0
7503   def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7504                 (ins DstVT.RC:$src1, SrcRC:$src2),
7505                 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7506                 [(set DstVT.RC:$dst,
7507                       (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7508                EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7510   def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7511                 (ins DstVT.RC:$src1, x86memop:$src2),
7512                 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7513                 [(set DstVT.RC:$dst,
7514                       (OpNode (DstVT.VT DstVT.RC:$src1),
7515                                (ld_frag addr:$src2)))]>,
7516                 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7518   def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7519                   (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7520                   DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7523 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7524                                X86FoldableSchedWrite sched, RegisterClass SrcRC,
7525                                X86VectorVTInfo DstVT, string asm,
7526                                string mem> {
7527   let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7528   def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7529               (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7530               !strconcat(asm,
7531                   "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7532               [(set DstVT.RC:$dst,
7533                     (OpNode (DstVT.VT DstVT.RC:$src1),
7534                              SrcRC:$src2,
7535                              (i32 timm:$rc)))]>,
7536               EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7537   def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7538                   (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7539                   DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7542 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7543                                 X86FoldableSchedWrite sched,
7544                                 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7545                                 X86MemOperand x86memop, PatFrag ld_frag,
7546                                 string asm, string mem> {
7547   defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7548               avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7549                             ld_frag, asm, mem>, VEX_LIG;
7552 let Predicates = [HasAVX512] in {
7553 defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7554                                  WriteCvtI2SS, GR32,
7555                                  v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7556                                  XS, EVEX_CD8<32, CD8VT1>;
7557 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7558                                  WriteCvtI2SS, GR64,
7559                                  v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7560                                  XS, REX_W, EVEX_CD8<64, CD8VT1>;
7561 defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7562                                  v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7563                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7564 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7565                                  WriteCvtI2SD, GR64,
7566                                  v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7567                                  XD, REX_W, EVEX_CD8<64, CD8VT1>;
7569 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7570               (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7571 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7572               (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7574 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7575           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7576 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7577           (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7578 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7579           (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7580 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7581           (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7583 def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7584           (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7585 def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7586           (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7587 def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7588           (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7589 def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7590           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7592 defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7593                                   WriteCvtI2SS, GR32,
7594                                   v4f32x_info, i32mem, loadi32,
7595                                   "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7596 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7597                                   WriteCvtI2SS, GR64,
7598                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7599                                   XS, REX_W, EVEX_CD8<64, CD8VT1>;
7600 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7601                                   i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7602                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7603 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7604                                   WriteCvtI2SD, GR64,
7605                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7606                                   XD, REX_W, EVEX_CD8<64, CD8VT1>;
7608 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7609               (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7610 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7611               (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7613 def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7614           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7615 def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7616           (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7617 def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7618           (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7619 def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7620           (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7622 def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7623           (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7624 def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7625           (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7626 def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7627           (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7628 def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7629           (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7632 //===----------------------------------------------------------------------===//
7633 // AVX-512  Scalar convert from float/double to integer
7634 //===----------------------------------------------------------------------===//
7636 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7637                                   X86VectorVTInfo DstVT, SDNode OpNode,
7638                                   SDNode OpNodeRnd,
7639                                   X86FoldableSchedWrite sched, string asm,
7640                                   string aliasStr, Predicate prd = HasAVX512> {
7641   let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7642     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7643                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7644                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7645                 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7646     let Uses = [MXCSR] in
7647     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7648                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7649                  [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7650                  EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7651                  Sched<[sched]>;
7652     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7653                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7654                 [(set DstVT.RC:$dst, (OpNode
7655                       (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7656                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7657   } // Predicates = [prd]
7659   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7660           (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7661   def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7662           (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7663   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7664           (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7665                                           SrcVT.IntScalarMemOp:$src), 0, "att">;
7668 // Convert float/double to signed/unsigned int 32/64
7669 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7670                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7671                                    XS, EVEX_CD8<32, CD8VT1>;
7672 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7673                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7674                                    XS, REX_W, EVEX_CD8<32, CD8VT1>;
7675 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7676                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7677                                    XS, EVEX_CD8<32, CD8VT1>;
7678 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7679                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7680                                    XS, REX_W, EVEX_CD8<32, CD8VT1>;
7681 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7682                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7683                                    XD, EVEX_CD8<64, CD8VT1>;
7684 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7685                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7686                                    XD, REX_W, EVEX_CD8<64, CD8VT1>;
7687 defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7688                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7689                                    XD, EVEX_CD8<64, CD8VT1>;
7690 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7691                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7692                                    XD, REX_W, EVEX_CD8<64, CD8VT1>;
7694 multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7695                         X86VectorVTInfo DstVT, SDNode OpNode,
7696                         X86FoldableSchedWrite sched> {
7697   let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7698     let isCodeGenOnly = 1 in {
7699     def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7700                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7701                 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7702                 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7703     def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7704                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7705                 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7706                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7707     }
7708   } // Predicates = [HasAVX512]
7711 defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7712                        lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>;
7713 defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7714                        llrint, WriteCvtSS2I>, REX_W, XS, EVEX_CD8<32, CD8VT1>;
7715 defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7716                        lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>;
7717 defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7718                        llrint, WriteCvtSD2I>, REX_W, XD, EVEX_CD8<64, CD8VT1>;
7720 let Predicates = [HasAVX512] in {
7721   def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7722   def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7724   def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7725   def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7728 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7729 // which produce unnecessary vmovs{s,d} instructions
7730 let Predicates = [HasAVX512] in {
7731 def : Pat<(v4f32 (X86Movss
7732                    (v4f32 VR128X:$dst),
7733                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7734           (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7736 def : Pat<(v4f32 (X86Movss
7737                    (v4f32 VR128X:$dst),
7738                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7739           (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7741 def : Pat<(v4f32 (X86Movss
7742                    (v4f32 VR128X:$dst),
7743                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7744           (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7746 def : Pat<(v4f32 (X86Movss
7747                    (v4f32 VR128X:$dst),
7748                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7749           (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7751 def : Pat<(v2f64 (X86Movsd
7752                    (v2f64 VR128X:$dst),
7753                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7754           (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7756 def : Pat<(v2f64 (X86Movsd
7757                    (v2f64 VR128X:$dst),
7758                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7759           (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7761 def : Pat<(v2f64 (X86Movsd
7762                    (v2f64 VR128X:$dst),
7763                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7764           (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7766 def : Pat<(v2f64 (X86Movsd
7767                    (v2f64 VR128X:$dst),
7768                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7769           (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7771 def : Pat<(v4f32 (X86Movss
7772                    (v4f32 VR128X:$dst),
7773                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7774           (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7776 def : Pat<(v4f32 (X86Movss
7777                    (v4f32 VR128X:$dst),
7778                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7779           (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7781 def : Pat<(v4f32 (X86Movss
7782                    (v4f32 VR128X:$dst),
7783                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7784           (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7786 def : Pat<(v4f32 (X86Movss
7787                    (v4f32 VR128X:$dst),
7788                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7789           (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7791 def : Pat<(v2f64 (X86Movsd
7792                    (v2f64 VR128X:$dst),
7793                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7794           (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7796 def : Pat<(v2f64 (X86Movsd
7797                    (v2f64 VR128X:$dst),
7798                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7799           (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7801 def : Pat<(v2f64 (X86Movsd
7802                    (v2f64 VR128X:$dst),
7803                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7804           (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7806 def : Pat<(v2f64 (X86Movsd
7807                    (v2f64 VR128X:$dst),
7808                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7809           (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7810 } // Predicates = [HasAVX512]
7812 // Convert float/double to signed/unsigned int 32/64 with truncation
7813 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7814                             X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7815                             SDNode OpNodeInt, SDNode OpNodeSAE,
7816                             X86FoldableSchedWrite sched, string aliasStr,
7817                             Predicate prd = HasAVX512> {
7818 let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7819   let isCodeGenOnly = 1 in {
7820   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7821               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7822               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7823               EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7824   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7825               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7826               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7827               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7828   }
7830   def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7831             !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7832            [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7833            EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7834   let Uses = [MXCSR] in
7835   def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7836             !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7837             [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7838                                   EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7839   def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7840               (ins _SrcRC.IntScalarMemOp:$src),
7841               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7842               [(set _DstRC.RC:$dst,
7843                 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7844               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7845 } // Predicates = [prd]
7847   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7848           (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7849   def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7850           (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7851   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7852           (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7853                                           _SrcRC.IntScalarMemOp:$src), 0, "att">;
7856 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7857                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7858                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7859 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7860                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7861                         "{q}">, REX_W, XS, EVEX_CD8<32, CD8VT1>;
7862 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7863                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7864                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7865 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7866                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7867                         "{q}">, REX_W, XD, EVEX_CD8<64, CD8VT1>;
7869 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7870                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7871                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7872 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7873                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7874                         "{q}">, XS,REX_W, EVEX_CD8<32, CD8VT1>;
7875 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7876                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7877                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7878 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7879                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7880                         "{q}">, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7882 //===----------------------------------------------------------------------===//
7883 // AVX-512  Convert form float to double and back
7884 //===----------------------------------------------------------------------===//
7886 let Uses = [MXCSR], mayRaiseFPException = 1 in
7887 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7888                                 X86VectorVTInfo _Src, SDNode OpNode,
7889                                 X86FoldableSchedWrite sched> {
7890   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7891                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7892                          "$src2, $src1", "$src1, $src2",
7893                          (_.VT (OpNode (_.VT _.RC:$src1),
7894                                        (_Src.VT _Src.RC:$src2)))>,
7895                          EVEX_4V, VEX_LIG, Sched<[sched]>;
7896   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7897                          (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7898                          "$src2, $src1", "$src1, $src2",
7899                          (_.VT (OpNode (_.VT _.RC:$src1),
7900                                   (_Src.ScalarIntMemFrags addr:$src2)))>,
7901                          EVEX_4V, VEX_LIG,
7902                          Sched<[sched.Folded, sched.ReadAfterFold]>;
7904   let isCodeGenOnly = 1, hasSideEffects = 0 in {
7905     def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7906                (ins _.FRC:$src1, _Src.FRC:$src2),
7907                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7908                EVEX_4V, VEX_LIG, Sched<[sched]>;
7909     let mayLoad = 1 in
7910     def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7911                (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7912                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7913                EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7914   }
7917 // Scalar Conversion with SAE - suppress all exceptions
7918 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7919                                     X86VectorVTInfo _Src, SDNode OpNodeSAE,
7920                                     X86FoldableSchedWrite sched> {
7921   let Uses = [MXCSR] in
7922   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7923                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7924                         "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7925                         (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7926                                          (_Src.VT _Src.RC:$src2)))>,
7927                         EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7930 // Scalar Conversion with rounding control (RC)
7931 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7932                                    X86VectorVTInfo _Src, SDNode OpNodeRnd,
7933                                    X86FoldableSchedWrite sched> {
7934   let Uses = [MXCSR] in
7935   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7936                         (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7937                         "$rc, $src2, $src1", "$src1, $src2, $rc",
7938                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7939                                          (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7940                         EVEX_4V, VEX_LIG, Sched<[sched]>,
7941                         EVEX_B, EVEX_RC;
7943 multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7944                                       SDNode OpNode, SDNode OpNodeRnd,
7945                                       X86FoldableSchedWrite sched,
7946                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
7947                                       Predicate prd = HasAVX512> {
7948   let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7949     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7950              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7951                                OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7952   }
7955 multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7956                                        SDNode OpNode, SDNode OpNodeSAE,
7957                                        X86FoldableSchedWrite sched,
7958                                        X86VectorVTInfo _src, X86VectorVTInfo _dst,
7959                                        Predicate prd = HasAVX512> {
7960   let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7961     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7962              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7963              EVEX_CD8<_src.EltSize, CD8VT1>;
7964   }
7966 defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7967                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7968                                          f32x_info>, XD, REX_W;
7969 defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7970                                           X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7971                                           f64x_info>, XS;
7972 defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7973                                           X86froundsRnd, WriteCvtSD2SS, f64x_info,
7974                                           f16x_info, HasFP16>, T_MAP5XD, REX_W;
7975 defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7976                                           X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7977                                           f64x_info, HasFP16>, T_MAP5XS;
7978 defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7979                                           X86froundsRnd, WriteCvtSD2SS, f32x_info,
7980                                           f16x_info, HasFP16>, T_MAP5PS;
7981 defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7982                                           X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7983                                           f32x_info, HasFP16>, T_MAP6PS;
7985 def : Pat<(f64 (any_fpextend FR32X:$src)),
7986           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7987           Requires<[HasAVX512]>;
7988 def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7989           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7990           Requires<[HasAVX512, OptForSize]>;
7992 def : Pat<(f32 (any_fpround FR64X:$src)),
7993           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7994            Requires<[HasAVX512]>;
7996 def : Pat<(f32 (any_fpextend FR16X:$src)),
7997           (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7998           Requires<[HasFP16]>;
7999 def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
8000           (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
8001           Requires<[HasFP16, OptForSize]>;
8003 def : Pat<(f64 (any_fpextend FR16X:$src)),
8004           (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
8005           Requires<[HasFP16]>;
8006 def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
8007           (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
8008           Requires<[HasFP16, OptForSize]>;
8010 def : Pat<(f16 (any_fpround FR32X:$src)),
8011           (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
8012            Requires<[HasFP16]>;
8013 def : Pat<(f16 (any_fpround FR64X:$src)),
8014           (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
8015            Requires<[HasFP16]>;
8017 def : Pat<(v4f32 (X86Movss
8018                    (v4f32 VR128X:$dst),
8019                    (v4f32 (scalar_to_vector
8020                      (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
8021           (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
8022           Requires<[HasAVX512]>;
8024 def : Pat<(v2f64 (X86Movsd
8025                    (v2f64 VR128X:$dst),
8026                    (v2f64 (scalar_to_vector
8027                      (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
8028           (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
8029           Requires<[HasAVX512]>;
8031 //===----------------------------------------------------------------------===//
8032 // AVX-512  Vector convert from signed/unsigned integer to float/double
8033 //          and from float/double to signed/unsigned integer
8034 //===----------------------------------------------------------------------===//
8036 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8037                           X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
8038                           X86FoldableSchedWrite sched,
8039                           string Broadcast = _.BroadcastStr,
8040                           string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8041                           RegisterClass MaskRC = _.KRCWM,
8042                           dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
8043                           dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
8044 let Uses = [MXCSR], mayRaiseFPException = 1 in {
8045   defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
8046                          (ins _Src.RC:$src),
8047                          (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
8048                          (ins MaskRC:$mask, _Src.RC:$src),
8049                           OpcodeStr, "$src", "$src",
8050                          (_.VT (OpNode (_Src.VT _Src.RC:$src))),
8051                          (vselect_mask MaskRC:$mask,
8052                                        (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
8053                                        _.RC:$src0),
8054                          (vselect_mask MaskRC:$mask,
8055                                        (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
8056                                        _.ImmAllZerosV)>,
8057                          EVEX, Sched<[sched]>;
8059   defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8060                          (ins MemOp:$src),
8061                          (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
8062                          (ins MaskRC:$mask, MemOp:$src),
8063                          OpcodeStr#Alias, "$src", "$src",
8064                          LdDAG,
8065                          (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
8066                          (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
8067                          EVEX, Sched<[sched.Folded]>;
8069   defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8070                          (ins _Src.ScalarMemOp:$src),
8071                          (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
8072                          (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
8073                          OpcodeStr,
8074                          "${src}"#Broadcast, "${src}"#Broadcast,
8075                          (_.VT (OpNode (_Src.VT
8076                                   (_Src.BroadcastLdFrag addr:$src))
8077                             )),
8078                          (vselect_mask MaskRC:$mask,
8079                                        (_.VT
8080                                         (MaskOpNode
8081                                          (_Src.VT
8082                                           (_Src.BroadcastLdFrag addr:$src)))),
8083                                        _.RC:$src0),
8084                          (vselect_mask MaskRC:$mask,
8085                                        (_.VT
8086                                         (MaskOpNode
8087                                          (_Src.VT
8088                                           (_Src.BroadcastLdFrag addr:$src)))),
8089                                        _.ImmAllZerosV)>,
8090                          EVEX, EVEX_B, Sched<[sched.Folded]>;
8091   }
8093 // Conversion with SAE - suppress all exceptions
8094 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8095                               X86VectorVTInfo _Src, SDNode OpNodeSAE,
8096                               X86FoldableSchedWrite sched> {
8097   let Uses = [MXCSR] in
8098   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8099                         (ins _Src.RC:$src), OpcodeStr,
8100                         "{sae}, $src", "$src, {sae}",
8101                         (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
8102                         EVEX, EVEX_B, Sched<[sched]>;
8105 // Conversion with rounding control (RC)
8106 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8107                          X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
8108                          X86FoldableSchedWrite sched> {
8109   let Uses = [MXCSR] in
8110   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8111                         (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
8112                         "$rc, $src", "$src, $rc",
8113                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
8114                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8117 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
8118 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8119                                 X86VectorVTInfo _Src, SDPatternOperator OpNode,
8120                                 SDNode MaskOpNode,
8121                                 X86FoldableSchedWrite sched,
8122                                 string Broadcast = _.BroadcastStr,
8123                                 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8124                                 RegisterClass MaskRC = _.KRCWM>
8125   : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
8126                    Alias, MemOp, MaskRC,
8127                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
8128                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
8130 // Extend [Float to Double, Half to Float]
8131 multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
8132                              AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8133                              X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
8134   let Predicates = [prd] in {
8135     defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
8136                             any_fpextend, fpextend, sched.ZMM>,
8137              avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
8138                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
8139   }
8140   let Predicates = [prd, HasVLX] in {
8141     defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
8142                                X86any_vfpext, X86vfpext, sched.XMM,
8143                                _dst.info128.BroadcastStr,
8144                                "", f64mem>, EVEX_V128;
8145     defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
8146                                any_fpextend, fpextend, sched.YMM>, EVEX_V256;
8147   }
8150 // Truncate [Double to Float, Float to Half]
8151 multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
8152                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8153                             X86SchedWriteWidths sched, Predicate prd = HasAVX512,
8154                             PatFrag bcast128 = _src.info128.BroadcastLdFrag,
8155                             PatFrag loadVT128 = _src.info128.LdFrag,
8156                             RegisterClass maskRC128 = _src.info128.KRCWM> {
8157   let Predicates = [prd] in {
8158     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
8159                             X86any_vfpround, X86vfpround, sched.ZMM>,
8160              avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8161                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
8162   }
8163   let Predicates = [prd, HasVLX] in {
8164     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
8165                                null_frag, null_frag, sched.XMM,
8166                                _src.info128.BroadcastStr, "{x}",
8167                                f128mem, maskRC128>, EVEX_V128;
8168     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
8169                                X86any_vfpround, X86vfpround,
8170                                sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
8172     // Special patterns to allow use of X86vmfpround for masking. Instruction
8173     // patterns have been disabled with null_frag.
8174     def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
8175               (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8176     def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8177                             maskRC128:$mask),
8178               (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
8179     def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8180                             maskRC128:$mask),
8181               (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
8183     def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
8184               (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8185     def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
8186                             maskRC128:$mask),
8187               (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8188     def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
8189                             maskRC128:$mask),
8190               (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
8192     def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
8193               (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8194     def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8195                             (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
8196               (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8197     def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8198                             _dst.info128.ImmAllZerosV, maskRC128:$mask),
8199               (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
8200   }
8202   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8203                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8204   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8205                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8206                   VK2WM:$mask, VR128X:$src), 0, "att">;
8207   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
8208                   "$dst {${mask}} {z}, $src}",
8209                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8210                   VK2WM:$mask, VR128X:$src), 0, "att">;
8211   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8212                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8213   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8214                   "$dst {${mask}}, ${src}{1to2}}",
8215                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8216                   VK2WM:$mask, f64mem:$src), 0, "att">;
8217   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8218                   "$dst {${mask}} {z}, ${src}{1to2}}",
8219                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8220                   VK2WM:$mask, f64mem:$src), 0, "att">;
8222   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8223                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8224   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8225                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8226                   VK4WM:$mask, VR256X:$src), 0, "att">;
8227   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8228                   "$dst {${mask}} {z}, $src}",
8229                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8230                   VK4WM:$mask, VR256X:$src), 0, "att">;
8231   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8232                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8233   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8234                   "$dst {${mask}}, ${src}{1to4}}",
8235                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8236                   VK4WM:$mask, f64mem:$src), 0, "att">;
8237   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8238                   "$dst {${mask}} {z}, ${src}{1to4}}",
8239                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8240                   VK4WM:$mask, f64mem:$src), 0, "att">;
8243 defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
8244                                   avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
8245                                   REX_W, PD, EVEX_CD8<64, CD8VF>;
8246 defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
8247                                    avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
8248                                    PS, EVEX_CD8<32, CD8VH>;
8250 // Extend Half to Double
8251 multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
8252                             X86SchedWriteWidths sched> {
8253   let Predicates = [HasFP16] in {
8254     defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
8255                                   any_fpextend, fpextend, sched.ZMM>,
8256              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
8257                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
8258     def : Pat<(v8f64 (extloadv8f16 addr:$src)),
8259                 (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
8260   }
8261   let Predicates = [HasFP16, HasVLX] in {
8262     defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8263                                      X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8264                                      f32mem>, EVEX_V128;
8265     defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8266                                      X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8267                                      f64mem>, EVEX_V256;
8268   }
8271 // Truncate Double to Half
8272 multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8273   let Predicates = [HasFP16] in {
8274     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8275                             X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8276              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8277                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
8278   }
8279   let Predicates = [HasFP16, HasVLX] in {
8280     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8281                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8282                                VK2WM>, EVEX_V128;
8283     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8284                                null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8285                                VK4WM>, EVEX_V256;
8286   }
8287   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8288                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8289                   VR128X:$src), 0, "att">;
8290   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8291                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8292                   VK2WM:$mask, VR128X:$src), 0, "att">;
8293   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8294                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8295                   VK2WM:$mask, VR128X:$src), 0, "att">;
8296   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8297                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8298                   i64mem:$src), 0, "att">;
8299   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8300                   "$dst {${mask}}, ${src}{1to2}}",
8301                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8302                   VK2WM:$mask, i64mem:$src), 0, "att">;
8303   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8304                   "$dst {${mask}} {z}, ${src}{1to2}}",
8305                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8306                   VK2WM:$mask, i64mem:$src), 0, "att">;
8308   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8309                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8310                   VR256X:$src), 0, "att">;
8311   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8312                   "$dst {${mask}}, $src}",
8313                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8314                   VK4WM:$mask, VR256X:$src), 0, "att">;
8315   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8316                   "$dst {${mask}} {z}, $src}",
8317                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8318                   VK4WM:$mask, VR256X:$src), 0, "att">;
8319   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8320                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8321                   i64mem:$src), 0, "att">;
8322   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8323                   "$dst {${mask}}, ${src}{1to4}}",
8324                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8325                   VK4WM:$mask, i64mem:$src), 0, "att">;
8326   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8327                   "$dst {${mask}} {z}, ${src}{1to4}}",
8328                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8329                   VK4WM:$mask, i64mem:$src), 0, "att">;
8331   def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8332                   (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8333                   VR512:$src), 0, "att">;
8334   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8335                   "$dst {${mask}}, $src}",
8336                   (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8337                   VK8WM:$mask, VR512:$src), 0, "att">;
8338   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8339                   "$dst {${mask}} {z}, $src}",
8340                   (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8341                   VK8WM:$mask, VR512:$src), 0, "att">;
8342   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8343                   (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8344                   i64mem:$src), 0, "att">;
8345   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8346                   "$dst {${mask}}, ${src}{1to8}}",
8347                   (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8348                   VK8WM:$mask, i64mem:$src), 0, "att">;
8349   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8350                   "$dst {${mask}} {z}, ${src}{1to8}}",
8351                   (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8352                   VK8WM:$mask, i64mem:$src), 0, "att">;
8355 defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8356                                    avx512vl_f32_info, SchedWriteCvtPD2PS,
8357                                    HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
8358 defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8359                                     avx512vl_f16_info, SchedWriteCvtPS2PD,
8360                                     HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
8361 defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8362                                  REX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
8363 defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8364                                  T_MAP5PS, EVEX_CD8<16, CD8VQ>;
8366 let Predicates = [HasFP16, HasVLX] in {
8367   // Special patterns to allow use of X86vmfpround for masking. Instruction
8368   // patterns have been disabled with null_frag.
8369   def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8370             (VCVTPD2PHZ256rr VR256X:$src)>;
8371   def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8372                           VK4WM:$mask)),
8373             (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8374   def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8375                           VK4WM:$mask),
8376             (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8378   def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8379             (VCVTPD2PHZ256rm addr:$src)>;
8380   def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8381                           VK4WM:$mask),
8382             (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8383   def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8384                           VK4WM:$mask),
8385             (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8387   def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8388             (VCVTPD2PHZ256rmb addr:$src)>;
8389   def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8390                           (v8f16 VR128X:$src0), VK4WM:$mask),
8391             (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8392   def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8393                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8394             (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8396   def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8397             (VCVTPD2PHZ128rr VR128X:$src)>;
8398   def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8399                           VK2WM:$mask),
8400             (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8401   def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8402                           VK2WM:$mask),
8403             (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8405   def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8406             (VCVTPD2PHZ128rm addr:$src)>;
8407   def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8408                           VK2WM:$mask),
8409             (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8410   def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8411                           VK2WM:$mask),
8412             (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8414   def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8415             (VCVTPD2PHZ128rmb addr:$src)>;
8416   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8417                           (v8f16 VR128X:$src0), VK2WM:$mask),
8418             (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8419   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8420                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8421             (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8424 // Convert Signed/Unsigned Doubleword to Double
8425 let Uses = []<Register>, mayRaiseFPException = 0 in
8426 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8427                            SDNode MaskOpNode, SDPatternOperator OpNode128,
8428                            SDNode MaskOpNode128,
8429                            X86SchedWriteWidths sched> {
8430   // No rounding in this op
8431   let Predicates = [HasAVX512] in
8432     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8433                             MaskOpNode, sched.ZMM>, EVEX_V512;
8435   let Predicates = [HasVLX] in {
8436     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8437                                OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8438                                "", i64mem, VK2WM,
8439                                (v2f64 (OpNode128 (bc_v4i32
8440                                 (v2i64
8441                                  (scalar_to_vector (loadi64 addr:$src)))))),
8442                                (v2f64 (MaskOpNode128 (bc_v4i32
8443                                 (v2i64
8444                                  (scalar_to_vector (loadi64 addr:$src))))))>,
8445                                EVEX_V128;
8446     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8447                                MaskOpNode, sched.YMM>, EVEX_V256;
8448   }
8451 // Convert Signed/Unsigned Doubleword to Float
8452 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8453                            SDNode MaskOpNode, SDNode OpNodeRnd,
8454                            X86SchedWriteWidths sched> {
8455   let Predicates = [HasAVX512] in
8456     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8457                             MaskOpNode, sched.ZMM>,
8458              avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8459                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8461   let Predicates = [HasVLX] in {
8462     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8463                                MaskOpNode, sched.XMM>, EVEX_V128;
8464     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8465                                MaskOpNode, sched.YMM>, EVEX_V256;
8466   }
8469 // Convert Float to Signed/Unsigned Doubleword with truncation
8470 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8471                             SDNode MaskOpNode,
8472                             SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8473   let Predicates = [HasAVX512] in {
8474     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8475                             MaskOpNode, sched.ZMM>,
8476              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8477                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
8478   }
8479   let Predicates = [HasVLX] in {
8480     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8481                                MaskOpNode, sched.XMM>, EVEX_V128;
8482     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8483                                MaskOpNode, sched.YMM>, EVEX_V256;
8484   }
8487 // Convert Float to Signed/Unsigned Doubleword
8488 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8489                            SDNode MaskOpNode, SDNode OpNodeRnd,
8490                            X86SchedWriteWidths sched> {
8491   let Predicates = [HasAVX512] in {
8492     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8493                             MaskOpNode, sched.ZMM>,
8494              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8495                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8496   }
8497   let Predicates = [HasVLX] in {
8498     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8499                                MaskOpNode, sched.XMM>, EVEX_V128;
8500     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8501                                MaskOpNode, sched.YMM>, EVEX_V256;
8502   }
8505 // Convert Double to Signed/Unsigned Doubleword with truncation
8506 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8507                             SDNode MaskOpNode, SDNode OpNodeSAE,
8508                             X86SchedWriteWidths sched> {
8509   let Predicates = [HasAVX512] in {
8510     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8511                             MaskOpNode, sched.ZMM>,
8512              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8513                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
8514   }
8515   let Predicates = [HasVLX] in {
8516     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8517     // memory forms of these instructions in Asm Parser. They have the same
8518     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8519     // due to the same reason.
8520     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8521                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8522                                VK2WM>, EVEX_V128;
8523     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8524                                MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8525   }
8527   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8528                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8529                   VR128X:$src), 0, "att">;
8530   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8531                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8532                   VK2WM:$mask, VR128X:$src), 0, "att">;
8533   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8534                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8535                   VK2WM:$mask, VR128X:$src), 0, "att">;
8536   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8537                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8538                   f64mem:$src), 0, "att">;
8539   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8540                   "$dst {${mask}}, ${src}{1to2}}",
8541                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8542                   VK2WM:$mask, f64mem:$src), 0, "att">;
8543   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8544                   "$dst {${mask}} {z}, ${src}{1to2}}",
8545                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8546                   VK2WM:$mask, f64mem:$src), 0, "att">;
8548   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8549                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8550                   VR256X:$src), 0, "att">;
8551   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8552                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8553                   VK4WM:$mask, VR256X:$src), 0, "att">;
8554   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8555                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8556                   VK4WM:$mask, VR256X:$src), 0, "att">;
8557   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8558                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8559                   f64mem:$src), 0, "att">;
8560   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8561                   "$dst {${mask}}, ${src}{1to4}}",
8562                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8563                   VK4WM:$mask, f64mem:$src), 0, "att">;
8564   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8565                   "$dst {${mask}} {z}, ${src}{1to4}}",
8566                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8567                   VK4WM:$mask, f64mem:$src), 0, "att">;
8570 // Convert Double to Signed/Unsigned Doubleword
8571 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8572                            SDNode MaskOpNode, SDNode OpNodeRnd,
8573                            X86SchedWriteWidths sched> {
8574   let Predicates = [HasAVX512] in {
8575     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8576                             MaskOpNode, sched.ZMM>,
8577              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8578                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8579   }
8580   let Predicates = [HasVLX] in {
8581     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8582     // memory forms of these instructions in Asm Parcer. They have the same
8583     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8584     // due to the same reason.
8585     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8586                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8587                                VK2WM>, EVEX_V128;
8588     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8589                                MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8590   }
8592   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8593                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8594   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8595                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8596                   VK2WM:$mask, VR128X:$src), 0, "att">;
8597   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8598                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8599                   VK2WM:$mask, VR128X:$src), 0, "att">;
8600   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8601                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8602                   f64mem:$src), 0, "att">;
8603   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8604                   "$dst {${mask}}, ${src}{1to2}}",
8605                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8606                   VK2WM:$mask, f64mem:$src), 0, "att">;
8607   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8608                   "$dst {${mask}} {z}, ${src}{1to2}}",
8609                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8610                   VK2WM:$mask, f64mem:$src), 0, "att">;
8612   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8613                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8614   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8615                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8616                   VK4WM:$mask, VR256X:$src), 0, "att">;
8617   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8618                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8619                   VK4WM:$mask, VR256X:$src), 0, "att">;
8620   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8621                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8622                   f64mem:$src), 0, "att">;
8623   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8624                   "$dst {${mask}}, ${src}{1to4}}",
8625                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8626                   VK4WM:$mask, f64mem:$src), 0, "att">;
8627   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8628                   "$dst {${mask}} {z}, ${src}{1to4}}",
8629                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8630                   VK4WM:$mask, f64mem:$src), 0, "att">;
8633 // Convert Double to Signed/Unsigned Quardword
8634 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8635                            SDNode MaskOpNode, SDNode OpNodeRnd,
8636                            X86SchedWriteWidths sched> {
8637   let Predicates = [HasDQI] in {
8638     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8639                             MaskOpNode, sched.ZMM>,
8640              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8641                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8642   }
8643   let Predicates = [HasDQI, HasVLX] in {
8644     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8645                                MaskOpNode, sched.XMM>, EVEX_V128;
8646     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8647                                MaskOpNode, sched.YMM>, EVEX_V256;
8648   }
8651 // Convert Double to Signed/Unsigned Quardword with truncation
8652 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8653                             SDNode MaskOpNode, SDNode OpNodeRnd,
8654                             X86SchedWriteWidths sched> {
8655   let Predicates = [HasDQI] in {
8656     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8657                             MaskOpNode, sched.ZMM>,
8658              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8659                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8660   }
8661   let Predicates = [HasDQI, HasVLX] in {
8662     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8663                                MaskOpNode, sched.XMM>, EVEX_V128;
8664     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8665                                MaskOpNode, sched.YMM>, EVEX_V256;
8666   }
8669 // Convert Signed/Unsigned Quardword to Double
8670 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8671                            SDNode MaskOpNode, SDNode OpNodeRnd,
8672                            X86SchedWriteWidths sched> {
8673   let Predicates = [HasDQI] in {
8674     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8675                             MaskOpNode, sched.ZMM>,
8676              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8677                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8678   }
8679   let Predicates = [HasDQI, HasVLX] in {
8680     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8681                                MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8682     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8683                                MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8684   }
8687 // Convert Float to Signed/Unsigned Quardword
8688 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8689                            SDNode MaskOpNode, SDNode OpNodeRnd,
8690                            X86SchedWriteWidths sched> {
8691   let Predicates = [HasDQI] in {
8692     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8693                             MaskOpNode, sched.ZMM>,
8694              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8695                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8696   }
8697   let Predicates = [HasDQI, HasVLX] in {
8698     // Explicitly specified broadcast string, since we take only 2 elements
8699     // from v4f32x_info source
8700     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8701                                MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8702                                (v2i64 (OpNode (bc_v4f32
8703                                 (v2f64
8704                                  (scalar_to_vector (loadf64 addr:$src)))))),
8705                                (v2i64 (MaskOpNode (bc_v4f32
8706                                 (v2f64
8707                                  (scalar_to_vector (loadf64 addr:$src))))))>,
8708                                EVEX_V128;
8709     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8710                                MaskOpNode, sched.YMM>, EVEX_V256;
8711   }
8714 // Convert Float to Signed/Unsigned Quardword with truncation
8715 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8716                             SDNode MaskOpNode, SDNode OpNodeRnd,
8717                             X86SchedWriteWidths sched> {
8718   let Predicates = [HasDQI] in {
8719     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8720                             MaskOpNode, sched.ZMM>,
8721              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8722                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8723   }
8724   let Predicates = [HasDQI, HasVLX] in {
8725     // Explicitly specified broadcast string, since we take only 2 elements
8726     // from v4f32x_info source
8727     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8728                                MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8729                                (v2i64 (OpNode (bc_v4f32
8730                                 (v2f64
8731                                  (scalar_to_vector (loadf64 addr:$src)))))),
8732                                (v2i64 (MaskOpNode (bc_v4f32
8733                                 (v2f64
8734                                  (scalar_to_vector (loadf64 addr:$src))))))>,
8735                                EVEX_V128;
8736     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8737                                MaskOpNode, sched.YMM>, EVEX_V256;
8738   }
8741 // Convert Signed/Unsigned Quardword to Float
8742 // Also Convert Signed/Unsigned Doubleword to Half
8743 multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8744                                  SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8745                                  SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8746                                  AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8747                                  X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8748   let Predicates = [prd] in {
8749     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8750                             MaskOpNode, sched.ZMM>,
8751              avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8752                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8753   }
8754   let Predicates = [prd, HasVLX] in {
8755     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8756     // memory forms of these instructions in Asm Parcer. They have the same
8757     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8758     // due to the same reason.
8759     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8760                                null_frag, sched.XMM, _src.info128.BroadcastStr,
8761                                "{x}", i128mem, _src.info128.KRCWM>,
8762                                EVEX_V128, NotEVEX2VEXConvertible;
8763     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8764                                MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8765                                "{y}">, EVEX_V256,
8766                                NotEVEX2VEXConvertible;
8768     // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8769     // patterns have been disabled with null_frag.
8770     def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8771               (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8772     def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8773                              _src.info128.KRCWM:$mask),
8774               (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8775     def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8776                              _src.info128.KRCWM:$mask),
8777               (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8779     def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8780               (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8781     def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8782                              _src.info128.KRCWM:$mask),
8783               (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8784     def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8785                              _src.info128.KRCWM:$mask),
8786               (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8788     def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8789               (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8790     def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8791                              (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8792               (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8793     def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8794                              _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8795               (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8796   }
8798   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8799                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8800                   VR128X:$src), 0, "att">;
8801   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8802                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8803                   VK2WM:$mask, VR128X:$src), 0, "att">;
8804   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8805                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8806                   VK2WM:$mask, VR128X:$src), 0, "att">;
8807   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8808                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8809                   i64mem:$src), 0, "att">;
8810   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8811                   "$dst {${mask}}, ${src}{1to2}}",
8812                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8813                   VK2WM:$mask, i64mem:$src), 0, "att">;
8814   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8815                   "$dst {${mask}} {z}, ${src}{1to2}}",
8816                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8817                   VK2WM:$mask, i64mem:$src), 0, "att">;
8819   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8820                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8821                   VR256X:$src), 0, "att">;
8822   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8823                   "$dst {${mask}}, $src}",
8824                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8825                   VK4WM:$mask, VR256X:$src), 0, "att">;
8826   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8827                   "$dst {${mask}} {z}, $src}",
8828                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8829                   VK4WM:$mask, VR256X:$src), 0, "att">;
8830   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8831                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8832                   i64mem:$src), 0, "att">;
8833   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8834                   "$dst {${mask}}, ${src}{1to4}}",
8835                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8836                   VK4WM:$mask, i64mem:$src), 0, "att">;
8837   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8838                   "$dst {${mask}} {z}, ${src}{1to4}}",
8839                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8840                   VK4WM:$mask, i64mem:$src), 0, "att">;
8843 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8844                                  X86any_VSintToFP, X86VSintToFP,
8845                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8847 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8848                                 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8849                                 PS, EVEX_CD8<32, CD8VF>;
8851 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8852                                  X86cvttp2si, X86cvttp2siSAE,
8853                                  SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8855 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8856                                  X86cvttp2si, X86cvttp2siSAE,
8857                                  SchedWriteCvtPD2DQ>,
8858                                  PD, REX_W, EVEX_CD8<64, CD8VF>;
8860 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8861                                  X86cvttp2ui, X86cvttp2uiSAE,
8862                                  SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8864 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8865                                  X86cvttp2ui, X86cvttp2uiSAE,
8866                                  SchedWriteCvtPD2DQ>,
8867                                  PS, REX_W, EVEX_CD8<64, CD8VF>;
8869 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8870                                   uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8871                                   SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8873 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8874                                  uint_to_fp, X86VUintToFpRnd,
8875                                  SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8877 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8878                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8879                                  EVEX_CD8<32, CD8VF>;
8881 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8882                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8883                                  REX_W, EVEX_CD8<64, CD8VF>;
8885 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8886                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8887                                  PS, EVEX_CD8<32, CD8VF>;
8889 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8890                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8891                                  PS, EVEX_CD8<64, CD8VF>;
8893 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8894                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W,
8895                                  PD, EVEX_CD8<64, CD8VF>;
8897 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8898                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8899                                  EVEX_CD8<32, CD8VH>;
8901 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8902                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8903                                  PD, EVEX_CD8<64, CD8VF>;
8905 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8906                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8907                                  EVEX_CD8<32, CD8VH>;
8909 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8910                                  X86cvttp2si, X86cvttp2siSAE,
8911                                  SchedWriteCvtPD2DQ>, REX_W,
8912                                  PD, EVEX_CD8<64, CD8VF>;
8914 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8915                                  X86cvttp2si, X86cvttp2siSAE,
8916                                  SchedWriteCvtPS2DQ>, PD,
8917                                  EVEX_CD8<32, CD8VH>;
8919 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8920                                  X86cvttp2ui, X86cvttp2uiSAE,
8921                                  SchedWriteCvtPD2DQ>, REX_W,
8922                                  PD, EVEX_CD8<64, CD8VF>;
8924 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8925                                  X86cvttp2ui, X86cvttp2uiSAE,
8926                                  SchedWriteCvtPS2DQ>, PD,
8927                                  EVEX_CD8<32, CD8VH>;
8929 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8930                             sint_to_fp, X86VSintToFpRnd,
8931                             SchedWriteCvtDQ2PD>, REX_W, XS, EVEX_CD8<64, CD8VF>;
8933 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8934                             uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8935                             REX_W, XS, EVEX_CD8<64, CD8VF>;
8937 defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8938                             X86any_VSintToFP, X86VMSintToFP,
8939                             X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8940                             SchedWriteCvtDQ2PS, HasFP16>,
8941                             T_MAP5PS, EVEX_CD8<32, CD8VF>;
8943 defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8944                             X86any_VUintToFP, X86VMUintToFP,
8945                             X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8946                             SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
8947                             EVEX_CD8<32, CD8VF>;
8949 defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8950                             X86any_VSintToFP, X86VMSintToFP,
8951                             X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8952                             SchedWriteCvtDQ2PS>, REX_W, PS,
8953                             EVEX_CD8<64, CD8VF>;
8955 defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8956                             X86any_VUintToFP, X86VMUintToFP,
8957                             X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8958                             SchedWriteCvtDQ2PS>, REX_W, XD,
8959                             EVEX_CD8<64, CD8VF>;
8961 let Predicates = [HasVLX] in {
8962   // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8963   // patterns have been disabled with null_frag.
8964   def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8965             (VCVTPD2DQZ128rr VR128X:$src)>;
8966   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8967                           VK2WM:$mask),
8968             (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8969   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8970                           VK2WM:$mask),
8971             (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8973   def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8974             (VCVTPD2DQZ128rm addr:$src)>;
8975   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8976                           VK2WM:$mask),
8977             (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8978   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8979                           VK2WM:$mask),
8980             (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8982   def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8983             (VCVTPD2DQZ128rmb addr:$src)>;
8984   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8985                           (v4i32 VR128X:$src0), VK2WM:$mask),
8986             (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8987   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8988                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8989             (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8991   // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8992   // patterns have been disabled with null_frag.
8993   def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8994             (VCVTTPD2DQZ128rr VR128X:$src)>;
8995   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8996                           VK2WM:$mask),
8997             (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8998   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8999                           VK2WM:$mask),
9000             (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9002   def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
9003             (VCVTTPD2DQZ128rm addr:$src)>;
9004   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9005                           VK2WM:$mask),
9006             (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9007   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9008                           VK2WM:$mask),
9009             (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
9011   def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
9012             (VCVTTPD2DQZ128rmb addr:$src)>;
9013   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
9014                           (v4i32 VR128X:$src0), VK2WM:$mask),
9015             (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9016   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
9017                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9018             (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
9020   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
9021   // patterns have been disabled with null_frag.
9022   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
9023             (VCVTPD2UDQZ128rr VR128X:$src)>;
9024   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
9025                            VK2WM:$mask),
9026             (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
9027   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
9028                            VK2WM:$mask),
9029             (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9031   def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
9032             (VCVTPD2UDQZ128rm addr:$src)>;
9033   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9034                            VK2WM:$mask),
9035             (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9036   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9037                            VK2WM:$mask),
9038             (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
9040   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
9041             (VCVTPD2UDQZ128rmb addr:$src)>;
9042   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
9043                            (v4i32 VR128X:$src0), VK2WM:$mask),
9044             (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9045   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
9046                            v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9047             (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
9049   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
9050   // patterns have been disabled with null_frag.
9051   def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
9052             (VCVTTPD2UDQZ128rr VR128X:$src)>;
9053   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
9054                           VK2WM:$mask),
9055             (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
9056   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
9057                           VK2WM:$mask),
9058             (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9060   def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
9061             (VCVTTPD2UDQZ128rm addr:$src)>;
9062   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9063                           VK2WM:$mask),
9064             (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9065   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9066                           VK2WM:$mask),
9067             (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
9069   def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
9070             (VCVTTPD2UDQZ128rmb addr:$src)>;
9071   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9072                           (v4i32 VR128X:$src0), VK2WM:$mask),
9073             (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9074   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9075                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9076             (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
9079 let Predicates = [HasDQI, HasVLX] in {
9080   def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9081             (VCVTPS2QQZ128rm addr:$src)>;
9082   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9083                                  (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9084                                  VR128X:$src0)),
9085             (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9086   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9087                                  (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9088                                  v2i64x_info.ImmAllZerosV)),
9089             (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9091   def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9092             (VCVTPS2UQQZ128rm addr:$src)>;
9093   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9094                                  (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9095                                  VR128X:$src0)),
9096             (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9097   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9098                                  (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9099                                  v2i64x_info.ImmAllZerosV)),
9100             (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9102   def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9103             (VCVTTPS2QQZ128rm addr:$src)>;
9104   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9105                                  (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9106                                  VR128X:$src0)),
9107             (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9108   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9109                                  (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9110                                  v2i64x_info.ImmAllZerosV)),
9111             (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9113   def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9114             (VCVTTPS2UQQZ128rm addr:$src)>;
9115   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9116                                  (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9117                                  VR128X:$src0)),
9118             (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9119   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9120                                  (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9121                                  v2i64x_info.ImmAllZerosV)),
9122             (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9125 let Predicates = [HasVLX] in {
9126   def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9127             (VCVTDQ2PDZ128rm addr:$src)>;
9128   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9129                                  (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9130                                  VR128X:$src0)),
9131             (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9132   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9133                                  (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9134                                  v2f64x_info.ImmAllZerosV)),
9135             (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9137   def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9138             (VCVTUDQ2PDZ128rm addr:$src)>;
9139   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9140                                  (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9141                                  VR128X:$src0)),
9142             (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9143   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9144                                  (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9145                                  v2f64x_info.ImmAllZerosV)),
9146             (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9149 //===----------------------------------------------------------------------===//
9150 // Half precision conversion instructions
9151 //===----------------------------------------------------------------------===//
9153 let Uses = [MXCSR], mayRaiseFPException = 1 in
9154 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9155                            X86MemOperand x86memop, dag ld_dag,
9156                            X86FoldableSchedWrite sched> {
9157   defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
9158                             (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
9159                             (X86any_cvtph2ps (_src.VT _src.RC:$src)),
9160                             (X86cvtph2ps (_src.VT _src.RC:$src))>,
9161                             T8PD, Sched<[sched]>;
9162   defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
9163                             (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
9164                             (X86any_cvtph2ps (_src.VT ld_dag)),
9165                             (X86cvtph2ps (_src.VT ld_dag))>,
9166                             T8PD, Sched<[sched.Folded]>;
9169 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9170                                X86FoldableSchedWrite sched> {
9171   let Uses = [MXCSR] in
9172   defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
9173                              (ins _src.RC:$src), "vcvtph2ps",
9174                              "{sae}, $src", "$src, {sae}",
9175                              (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
9176                              T8PD, EVEX_B, Sched<[sched]>;
9179 let Predicates = [HasAVX512] in
9180   defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
9181                                     (load addr:$src), WriteCvtPH2PSZ>,
9182                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
9183                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9185 let Predicates = [HasVLX] in {
9186   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
9187                        (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
9188                        EVEX_CD8<32, CD8VH>;
9189   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
9190                        (bitconvert (v2i64 (X86vzload64 addr:$src))),
9191                        WriteCvtPH2PS>, EVEX, EVEX_V128,
9192                        EVEX_CD8<32, CD8VH>;
9194   // Pattern match vcvtph2ps of a scalar i64 load.
9195   def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
9196               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
9197             (VCVTPH2PSZ128rm addr:$src)>;
9200 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9201                            X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
9202 let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9203   def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9204              (ins _src.RC:$src1, i32u8imm:$src2),
9205              "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9206              [(set _dest.RC:$dst,
9207                    (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9208              Sched<[RR]>;
9209   let Constraints = "$src0 = $dst" in
9210   def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9211              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9212              "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
9213              [(set _dest.RC:$dst,
9214                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9215                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
9216              Sched<[RR]>, EVEX_K;
9217   def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9218              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9219              "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
9220              [(set _dest.RC:$dst,
9221                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9222                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9223              Sched<[RR]>, EVEX_KZ;
9224   let hasSideEffects = 0, mayStore = 1 in {
9225     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
9226                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
9227                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9228                Sched<[MR]>;
9229     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
9230                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9231                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
9232                 EVEX_K, Sched<[MR]>;
9233   }
9237 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9238                                SchedWrite Sched> {
9239   let hasSideEffects = 0, Uses = [MXCSR] in {
9240     def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9241               (ins _src.RC:$src1, i32u8imm:$src2),
9242               "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}",
9243               [(set _dest.RC:$dst,
9244                     (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9245               EVEX_B, Sched<[Sched]>;
9246     let Constraints = "$src0 = $dst" in
9247     def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9248               (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9249               "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}",
9250               [(set _dest.RC:$dst,
9251                     (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9252                                   _dest.RC:$src0, _src.KRCWM:$mask))]>,
9253               EVEX_B, Sched<[Sched]>, EVEX_K;
9254     def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9255               (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9256               "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}",
9257               [(set _dest.RC:$dst,
9258                     (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9259                                   _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9260               EVEX_B, Sched<[Sched]>, EVEX_KZ;
9264 let Predicates = [HasAVX512] in {
9265   defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9266                                     WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9267                     avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9268                                         EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9270   def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9271             (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9274 let Predicates = [HasVLX] in {
9275   defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9276                                        WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9277                                        EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9278   defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9279                                        WriteCvtPS2PH, WriteCvtPS2PHSt>,
9280                                        EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9282   def : Pat<(store (f64 (extractelt
9283                          (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9284                          (iPTR 0))), addr:$dst),
9285             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9286   def : Pat<(store (i64 (extractelt
9287                          (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9288                          (iPTR 0))), addr:$dst),
9289             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9290   def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9291             (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9294 //  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9295 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9296                               string OpcodeStr, Domain d,
9297                               X86FoldableSchedWrite sched = WriteFComX> {
9298   let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9299   def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9300                   !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9301                   EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9304 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9305   defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9306                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9307   defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9308                                    AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9309   defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9310                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9311   defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9312                                    AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9315 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9316   defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9317                                  "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9318                                  EVEX_CD8<32, CD8VT1>;
9319   defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9320                                   "ucomisd", SSEPackedDouble>, PD, EVEX,
9321                                   VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9322   defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9323                                  "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9324                                  EVEX_CD8<32, CD8VT1>;
9325   defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9326                                  "comisd", SSEPackedDouble>, PD, EVEX,
9327                                   VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9328   let isCodeGenOnly = 1 in {
9329     defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9330                           sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9331                           EVEX_CD8<32, CD8VT1>;
9332     defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9333                           sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
9334                           VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9336     defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9337                           sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9338                           EVEX_CD8<32, CD8VT1>;
9339     defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9340                           sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
9341                           VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9342   }
9345 let Defs = [EFLAGS], Predicates = [HasFP16] in {
9346   defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9347                                 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9348                                 EVEX_CD8<16, CD8VT1>;
9349   defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9350                                 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9351                                 EVEX_CD8<16, CD8VT1>;
9352   defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9353                                 "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
9354                                 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9355   defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9356                                 "comish", SSEPackedSingle>, T_MAP5PS, EVEX,
9357                                 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9358   let isCodeGenOnly = 1 in {
9359     defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9360                                 sse_load_f16, "ucomish", SSEPackedSingle>,
9361                                 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9363     defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9364                                 sse_load_f16, "comish", SSEPackedSingle>,
9365                                 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9366   }
9369 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9370 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9371                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
9372                          Predicate prd = HasAVX512> {
9373   let Predicates = [prd], ExeDomain = _.ExeDomain in {
9374   defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9375                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9376                            "$src2, $src1", "$src1, $src2",
9377                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9378                            EVEX_4V, VEX_LIG, Sched<[sched]>;
9379   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9380                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9381                          "$src2, $src1", "$src1, $src2",
9382                          (OpNode (_.VT _.RC:$src1),
9383                           (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
9384                           Sched<[sched.Folded, sched.ReadAfterFold]>;
9388 defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9389                                f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9390                                T_MAP6PD;
9391 defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9392                                  SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9393                                  EVEX_CD8<16, CD8VT1>, T_MAP6PD;
9394 let Uses = [MXCSR] in {
9395 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9396                                f32x_info>, EVEX_CD8<32, CD8VT1>,
9397                                T8PD;
9398 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9399                                f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>,
9400                                T8PD;
9401 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9402                                  SchedWriteFRsqrt.Scl, f32x_info>,
9403                                  EVEX_CD8<32, CD8VT1>, T8PD;
9404 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9405                                  SchedWriteFRsqrt.Scl, f64x_info>, REX_W,
9406                                  EVEX_CD8<64, CD8VT1>, T8PD;
9409 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9410 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9411                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9412   let ExeDomain = _.ExeDomain in {
9413   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9414                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
9415                          (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
9416                          Sched<[sched]>;
9417   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9418                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9419                          (OpNode (_.VT
9420                            (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
9421                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9422   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9423                           (ins _.ScalarMemOp:$src), OpcodeStr,
9424                           "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9425                           (OpNode (_.VT
9426                             (_.BroadcastLdFrag addr:$src)))>,
9427                           EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9428   }
9431 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9432                                 X86SchedWriteWidths sched> {
9433   let Uses = [MXCSR] in {
9434   defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9435                              v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9436   defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9437                              v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9438   }
9439   let Predicates = [HasFP16] in
9440   defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9441                            v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9443   // Define only if AVX512VL feature is present.
9444   let Predicates = [HasVLX], Uses = [MXCSR] in {
9445     defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9446                                   OpNode, sched.XMM, v4f32x_info>,
9447                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
9448     defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9449                                   OpNode, sched.YMM, v8f32x_info>,
9450                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
9451     defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9452                                   OpNode, sched.XMM, v2f64x_info>,
9453                                   EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>;
9454     defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9455                                   OpNode, sched.YMM, v4f64x_info>,
9456                                   EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>;
9457   }
9458   let Predicates = [HasFP16, HasVLX] in {
9459     defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9460                                 OpNode, sched.XMM, v8f16x_info>,
9461                                 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9462     defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9463                                 OpNode, sched.YMM, v16f16x_info>,
9464                                 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9465   }
9468 defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9469 defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9471 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9472 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9473                          SDNode OpNode, SDNode OpNodeSAE,
9474                          X86FoldableSchedWrite sched> {
9475   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9476   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9477                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9478                            "$src2, $src1", "$src1, $src2",
9479                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9480                            Sched<[sched]>, SIMD_EXC;
9482   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9483                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9484                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9485                             (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9486                             EVEX_B, Sched<[sched]>;
9488   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9489                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9490                          "$src2, $src1", "$src1, $src2",
9491                          (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9492                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9493   }
9496 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9497                         SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9498   defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9499                            sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
9500   defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9501                            sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD, EVEX_4V;
9504 multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9505                         SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9506   let Predicates = [HasFP16] in
9507   defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode,  OpNodeSAE, sched>,
9508                EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
9511 let Predicates = [HasERI] in {
9512   defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
9513                                SchedWriteFRcp.Scl>;
9514   defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
9515                                SchedWriteFRsqrt.Scl>;
9518 defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9519                               SchedWriteFRnd.Scl>,
9520                  avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9521                                   SchedWriteFRnd.Scl>;
9522 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9524 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9525                          SDNode OpNode, X86FoldableSchedWrite sched> {
9526   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9527   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9528                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
9529                          (OpNode (_.VT _.RC:$src))>,
9530                          Sched<[sched]>;
9532   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9533                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9534                          (OpNode (_.VT
9535                              (bitconvert (_.LdFrag addr:$src))))>,
9536                           Sched<[sched.Folded, sched.ReadAfterFold]>;
9538   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9539                          (ins _.ScalarMemOp:$src), OpcodeStr,
9540                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9541                          (OpNode (_.VT
9542                                   (_.BroadcastLdFrag addr:$src)))>,
9543                          EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9544   }
9546 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9547                          SDNode OpNode, X86FoldableSchedWrite sched> {
9548   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9549   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9550                         (ins _.RC:$src), OpcodeStr,
9551                         "{sae}, $src", "$src, {sae}",
9552                         (OpNode (_.VT _.RC:$src))>,
9553                         EVEX_B, Sched<[sched]>;
9556 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9557                        SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9558    defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9559               avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9560               T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9561    defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9562               avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9563               T8PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9566 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9567                                   SDNode OpNode, X86SchedWriteWidths sched> {
9568   // Define only if AVX512VL feature is present.
9569   let Predicates = [HasVLX] in {
9570     defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9571                                 sched.XMM>,
9572                                 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
9573     defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9574                                 sched.YMM>,
9575                                 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
9576     defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9577                                 sched.XMM>,
9578                                 EVEX_V128, REX_W, T8PD, EVEX_CD8<64, CD8VF>;
9579     defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9580                                 sched.YMM>,
9581                                 EVEX_V256, REX_W, T8PD, EVEX_CD8<64, CD8VF>;
9582   }
9585 multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9586                        SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9587   let Predicates = [HasFP16] in
9588   defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9589               avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9590               T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9591   let Predicates = [HasFP16, HasVLX] in {
9592     defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9593                                      EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9594     defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9595                                      EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9596   }
9598 let Predicates = [HasERI] in {
9599  defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
9600                             SchedWriteFRsqrt>, EVEX;
9601  defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
9602                             SchedWriteFRcp>, EVEX;
9603  defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
9604                             SchedWriteFAdd>, EVEX;
9606 defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9607                             SchedWriteFRnd>,
9608                  avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9609                                      SchedWriteFRnd>,
9610                  avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9611                                           SchedWriteFRnd>, EVEX;
9613 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9614                                     X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9615   let ExeDomain = _.ExeDomain in
9616   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9617                          (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9618                          (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9619                          EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9622 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9623                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9624   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9625   defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9626                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
9627                          (_.VT (any_fsqrt _.RC:$src)),
9628                          (_.VT (fsqrt _.RC:$src))>, EVEX,
9629                          Sched<[sched]>;
9630   defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9631                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9632                          (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9633                          (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9634                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9635   defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9636                           (ins _.ScalarMemOp:$src), OpcodeStr,
9637                           "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9638                           (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9639                           (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9640                           EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9641   }
9644 let Uses = [MXCSR], mayRaiseFPException = 1 in
9645 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9646                                   X86SchedWriteSizes sched> {
9647   let Predicates = [HasFP16] in
9648   defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9649                                 sched.PH.ZMM, v32f16_info>,
9650                                 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9651   let Predicates = [HasFP16, HasVLX] in {
9652     defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9653                                      sched.PH.XMM, v8f16x_info>,
9654                                      EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9655     defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9656                                      sched.PH.YMM, v16f16x_info>,
9657                                      EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9658   }
9659   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9660                                 sched.PS.ZMM, v16f32_info>,
9661                                 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9662   defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9663                                 sched.PD.ZMM, v8f64_info>,
9664                                 EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>;
9665   // Define only if AVX512VL feature is present.
9666   let Predicates = [HasVLX] in {
9667     defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9668                                      sched.PS.XMM, v4f32x_info>,
9669                                      EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
9670     defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9671                                      sched.PS.YMM, v8f32x_info>,
9672                                      EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
9673     defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9674                                      sched.PD.XMM, v2f64x_info>,
9675                                      EVEX_V128, REX_W, PD, EVEX_CD8<64, CD8VF>;
9676     defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9677                                      sched.PD.YMM, v4f64x_info>,
9678                                      EVEX_V256, REX_W, PD, EVEX_CD8<64, CD8VF>;
9679   }
9682 let Uses = [MXCSR] in
9683 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9684                                         X86SchedWriteSizes sched> {
9685   let Predicates = [HasFP16] in
9686   defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9687                                       sched.PH.ZMM, v32f16_info>,
9688                                       EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9689   defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9690                                       sched.PS.ZMM, v16f32_info>,
9691                                       EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9692   defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9693                                       sched.PD.ZMM, v8f64_info>,
9694                                       EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>;
9697 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9698                               X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9699   let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9700     defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9701                          (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9702                          "$src2, $src1", "$src1, $src2",
9703                          (X86fsqrts (_.VT _.RC:$src1),
9704                                     (_.VT _.RC:$src2))>,
9705                          Sched<[sched]>, SIMD_EXC;
9706     defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9707                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9708                          "$src2, $src1", "$src1, $src2",
9709                          (X86fsqrts (_.VT _.RC:$src1),
9710                                     (_.ScalarIntMemFrags addr:$src2))>,
9711                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9712     let Uses = [MXCSR] in
9713     defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9714                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9715                          "$rc, $src2, $src1", "$src1, $src2, $rc",
9716                          (X86fsqrtRnds (_.VT _.RC:$src1),
9717                                      (_.VT _.RC:$src2),
9718                                      (i32 timm:$rc))>,
9719                          EVEX_B, EVEX_RC, Sched<[sched]>;
9721     let isCodeGenOnly = 1, hasSideEffects = 0 in {
9722       def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9723                 (ins _.FRC:$src1, _.FRC:$src2),
9724                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9725                 Sched<[sched]>, SIMD_EXC;
9726       let mayLoad = 1 in
9727         def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9728                   (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9729                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9730                   Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9731     }
9732   }
9734   let Predicates = [prd] in {
9735     def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9736               (!cast<Instruction>(Name#Zr)
9737                   (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9738   }
9740   let Predicates = [prd, OptForSize] in {
9741     def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9742               (!cast<Instruction>(Name#Zm)
9743                   (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9744   }
9747 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9748                                   X86SchedWriteSizes sched> {
9749   defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9750                         EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
9751   defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9752                         EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9753   defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9754                         EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, REX_W;
9757 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9758              avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9760 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9762 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9763                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9764   let ExeDomain = _.ExeDomain in {
9765   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9766                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9767                            "$src3, $src2, $src1", "$src1, $src2, $src3",
9768                            (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9769                            (i32 timm:$src3)))>,
9770                            Sched<[sched]>, SIMD_EXC;
9772   let Uses = [MXCSR] in
9773   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9774                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9775                          "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9776                          (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9777                          (i32 timm:$src3)))>, EVEX_B,
9778                          Sched<[sched]>;
9780   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9781                          (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9782                          OpcodeStr,
9783                          "$src3, $src2, $src1", "$src1, $src2, $src3",
9784                          (_.VT (X86RndScales _.RC:$src1,
9785                                 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9786                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9788   let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9789     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9790                (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9791                OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9792                []>, Sched<[sched]>, SIMD_EXC;
9794     let mayLoad = 1 in
9795       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9796                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9797                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9798                  []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9799   }
9800   }
9802   let Predicates = [HasAVX512] in {
9803     def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9804               (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9805                _.FRC:$src1, timm:$src2))>;
9806   }
9808   let Predicates = [HasAVX512, OptForSize] in {
9809     def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9810               (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9811                addr:$src1, timm:$src2))>;
9812   }
9815 let Predicates = [HasFP16] in
9816 defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9817                                            SchedWriteFRnd.Scl, f16x_info>,
9818                                            AVX512PSIi8Base, TA, EVEX_4V,
9819                                            EVEX_CD8<16, CD8VT1>;
9821 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9822                                            SchedWriteFRnd.Scl, f32x_info>,
9823                                            AVX512AIi8Base, EVEX_4V, VEX_LIG,
9824                                            EVEX_CD8<32, CD8VT1>;
9826 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9827                                            SchedWriteFRnd.Scl, f64x_info>,
9828                                            REX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9829                                            EVEX_CD8<64, CD8VT1>;
9831 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9832                                 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9833                                 dag OutMask, Predicate BasePredicate> {
9834   let Predicates = [BasePredicate] in {
9835     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9836                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9837                (extractelt _.VT:$dst, (iPTR 0))))),
9838               (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9839                _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9841     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9842                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9843                ZeroFP))),
9844               (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9845                OutMask, _.VT:$src2, _.VT:$src1)>;
9846   }
9849 defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9850                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9851                             fp16imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasFP16>;
9852 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9853                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9854                             fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9855 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9856                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9857                             fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9860 //-------------------------------------------------
9861 // Integer truncate and extend operations
9862 //-------------------------------------------------
9864 // PatFrags that contain a select and a truncate op. The take operands in the
9865 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9866 // either to the multiclasses.
9867 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9868                            (vselect_mask node:$mask,
9869                                          (trunc node:$src), node:$src0)>;
9870 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9871                             (vselect_mask node:$mask,
9872                                           (X86vtruncs node:$src), node:$src0)>;
9873 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9874                              (vselect_mask node:$mask,
9875                                            (X86vtruncus node:$src), node:$src0)>;
9877 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9878                               SDPatternOperator MaskNode,
9879                               X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9880                               X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9881   let ExeDomain = DestInfo.ExeDomain in {
9882   def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9883              (ins SrcInfo.RC:$src),
9884              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9885              [(set DestInfo.RC:$dst,
9886                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9887              EVEX, Sched<[sched]>;
9888   let Constraints = "$src0 = $dst" in
9889   def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9890              (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9891              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9892              [(set DestInfo.RC:$dst,
9893                    (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9894                              (DestInfo.VT DestInfo.RC:$src0),
9895                              SrcInfo.KRCWM:$mask))]>,
9896              EVEX, EVEX_K, Sched<[sched]>;
9897   def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9898              (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9899              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9900              [(set DestInfo.RC:$dst,
9901                    (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9902                              DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9903              EVEX, EVEX_KZ, Sched<[sched]>;
9904   }
9906   let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9907     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9908                (ins x86memop:$dst, SrcInfo.RC:$src),
9909                OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9910                EVEX, Sched<[sched.Folded]>;
9912     def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9913                (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9914                OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9915                EVEX, EVEX_K, Sched<[sched.Folded]>;
9916   }//mayStore = 1, hasSideEffects = 0
9919 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9920                                     PatFrag truncFrag, PatFrag mtruncFrag,
9921                                     string Name> {
9923   def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9924             (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9925                                     addr:$dst, SrcInfo.RC:$src)>;
9927   def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9928                         SrcInfo.KRCWM:$mask),
9929             (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9930                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9933 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9934                         SDNode OpNode256, SDNode OpNode512,
9935                         SDPatternOperator MaskNode128,
9936                         SDPatternOperator MaskNode256,
9937                         SDPatternOperator MaskNode512,
9938                         X86SchedWriteWidths sched,
9939                         AVX512VLVectorVTInfo VTSrcInfo,
9940                         X86VectorVTInfo DestInfoZ128,
9941                         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9942                         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9943                         X86MemOperand x86memopZ, PatFrag truncFrag,
9944                         PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9946   let Predicates = [HasVLX, prd] in {
9947     defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
9948                              VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9949                 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9950                                          mtruncFrag, NAME>, EVEX_V128;
9952     defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
9953                              VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9954                 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9955                                          mtruncFrag, NAME>, EVEX_V256;
9956   }
9957   let Predicates = [prd] in
9958     defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
9959                              VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9960                 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9961                                          mtruncFrag, NAME>, EVEX_V512;
9964 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9965                            X86SchedWriteWidths sched, PatFrag StoreNode,
9966                            PatFrag MaskedStoreNode, SDNode InVecNode,
9967                            SDPatternOperator InVecMaskNode> {
9968   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9969                           InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9970                           avx512vl_i64_info, v16i8x_info, v16i8x_info,
9971                           v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9972                           MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9975 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9976                            SDPatternOperator MaskNode,
9977                            X86SchedWriteWidths sched, PatFrag StoreNode,
9978                            PatFrag MaskedStoreNode, SDNode InVecNode,
9979                            SDPatternOperator InVecMaskNode> {
9980   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9981                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9982                           avx512vl_i64_info, v8i16x_info, v8i16x_info,
9983                           v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9984                           MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9987 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9988                            SDPatternOperator MaskNode,
9989                            X86SchedWriteWidths sched, PatFrag StoreNode,
9990                            PatFrag MaskedStoreNode, SDNode InVecNode,
9991                            SDPatternOperator InVecMaskNode> {
9992   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9993                           InVecMaskNode, MaskNode, MaskNode, sched,
9994                           avx512vl_i64_info, v4i32x_info, v4i32x_info,
9995                           v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9996                           MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9999 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
10000                            SDPatternOperator MaskNode,
10001                            X86SchedWriteWidths sched, PatFrag StoreNode,
10002                            PatFrag MaskedStoreNode, SDNode InVecNode,
10003                            SDPatternOperator InVecMaskNode> {
10004   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
10005                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
10006                           avx512vl_i32_info, v16i8x_info, v16i8x_info,
10007                           v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
10008                           MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
10011 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10012                            SDPatternOperator MaskNode,
10013                            X86SchedWriteWidths sched, PatFrag StoreNode,
10014                            PatFrag MaskedStoreNode, SDNode InVecNode,
10015                            SDPatternOperator InVecMaskNode> {
10016   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
10017                           InVecMaskNode, MaskNode, MaskNode, sched,
10018                           avx512vl_i32_info, v8i16x_info, v8i16x_info,
10019                           v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
10020                           MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
10023 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10024                            SDPatternOperator MaskNode,
10025                            X86SchedWriteWidths sched, PatFrag StoreNode,
10026                            PatFrag MaskedStoreNode, SDNode InVecNode,
10027                            SDPatternOperator InVecMaskNode> {
10028   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
10029                           InVecMaskNode, MaskNode, MaskNode, sched,
10030                           avx512vl_i16_info, v16i8x_info, v16i8x_info,
10031                           v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
10032                           MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
10035 defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",
10036                                   SchedWriteVecTruncate, truncstorevi8,
10037                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
10038 defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",
10039                                   SchedWriteVecTruncate, truncstore_s_vi8,
10040                                   masked_truncstore_s_vi8, X86vtruncs,
10041                                   X86vmtruncs>;
10042 defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb",
10043                                   SchedWriteVecTruncate, truncstore_us_vi8,
10044                                   masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
10046 defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
10047                                   SchedWriteVecTruncate, truncstorevi16,
10048                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
10049 defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
10050                                   SchedWriteVecTruncate, truncstore_s_vi16,
10051                                   masked_truncstore_s_vi16, X86vtruncs,
10052                                   X86vmtruncs>;
10053 defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
10054                                   select_truncus, SchedWriteVecTruncate,
10055                                   truncstore_us_vi16, masked_truncstore_us_vi16,
10056                                   X86vtruncus, X86vmtruncus>;
10058 defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
10059                                   SchedWriteVecTruncate, truncstorevi32,
10060                                   masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
10061 defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
10062                                   SchedWriteVecTruncate, truncstore_s_vi32,
10063                                   masked_truncstore_s_vi32, X86vtruncs,
10064                                   X86vmtruncs>;
10065 defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
10066                                   select_truncus, SchedWriteVecTruncate,
10067                                   truncstore_us_vi32, masked_truncstore_us_vi32,
10068                                   X86vtruncus, X86vmtruncus>;
10070 defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
10071                                   SchedWriteVecTruncate, truncstorevi8,
10072                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
10073 defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
10074                                   SchedWriteVecTruncate, truncstore_s_vi8,
10075                                   masked_truncstore_s_vi8, X86vtruncs,
10076                                   X86vmtruncs>;
10077 defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
10078                                   select_truncus, SchedWriteVecTruncate,
10079                                   truncstore_us_vi8, masked_truncstore_us_vi8,
10080                                   X86vtruncus, X86vmtruncus>;
10082 defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
10083                                   SchedWriteVecTruncate, truncstorevi16,
10084                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
10085 defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
10086                                   SchedWriteVecTruncate, truncstore_s_vi16,
10087                                   masked_truncstore_s_vi16, X86vtruncs,
10088                                   X86vmtruncs>;
10089 defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
10090                                   select_truncus, SchedWriteVecTruncate,
10091                                   truncstore_us_vi16, masked_truncstore_us_vi16,
10092                                   X86vtruncus, X86vmtruncus>;
10094 defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
10095                                   SchedWriteVecTruncate, truncstorevi8,
10096                                   masked_truncstorevi8, X86vtrunc,
10097                                   X86vmtrunc>;
10098 defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
10099                                   SchedWriteVecTruncate, truncstore_s_vi8,
10100                                   masked_truncstore_s_vi8, X86vtruncs,
10101                                   X86vmtruncs>;
10102 defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
10103                                   select_truncus, SchedWriteVecTruncate,
10104                                   truncstore_us_vi8, masked_truncstore_us_vi8,
10105                                   X86vtruncus, X86vmtruncus>;
10107 let Predicates = [HasAVX512, NoVLX] in {
10108 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
10109          (v8i16 (EXTRACT_SUBREG
10110                  (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
10111                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
10112 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
10113          (v4i32 (EXTRACT_SUBREG
10114                  (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
10115                                            VR256X:$src, sub_ymm)))), sub_xmm))>;
10118 let Predicates = [HasBWI, NoVLX] in {
10119 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10120          (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
10121                                             VR256X:$src, sub_ymm))), sub_xmm))>;
10124 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
10125 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
10126                            X86VectorVTInfo DestInfo,
10127                            X86VectorVTInfo SrcInfo> {
10128   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10129                                  DestInfo.RC:$src0,
10130                                  SrcInfo.KRCWM:$mask)),
10131             (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
10132                                                  SrcInfo.KRCWM:$mask,
10133                                                  SrcInfo.RC:$src)>;
10135   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10136                                  DestInfo.ImmAllZerosV,
10137                                  SrcInfo.KRCWM:$mask)),
10138             (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
10139                                                   SrcInfo.RC:$src)>;
10142 let Predicates = [HasVLX] in {
10143 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
10144 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
10145 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
10148 let Predicates = [HasAVX512] in {
10149 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
10150 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
10151 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
10153 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
10154 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
10155 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
10157 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
10158 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
10159 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
10162 multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
10163               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
10164               X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
10165   let ExeDomain = DestInfo.ExeDomain in {
10166   defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10167                     (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
10168                     (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
10169                   EVEX, Sched<[sched]>;
10171   defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10172                   (ins x86memop:$src), OpcodeStr ,"$src", "$src",
10173                   (DestInfo.VT (LdFrag addr:$src))>,
10174                 EVEX, Sched<[sched.Folded]>;
10175   }
10178 multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
10179           SDNode OpNode, SDNode InVecNode, string ExtTy,
10180           X86SchedWriteWidths sched,
10181           PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10182   let Predicates = [HasVLX, HasBWI] in {
10183     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
10184                     v16i8x_info, i64mem, LdFrag, InVecNode>,
10185                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, WIG;
10187     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
10188                     v16i8x_info, i128mem, LdFrag, OpNode>,
10189                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, WIG;
10190   }
10191   let Predicates = [HasBWI] in {
10192     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
10193                     v32i8x_info, i256mem, LdFrag, OpNode>,
10194                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, WIG;
10195   }
10198 multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
10199           SDNode OpNode, SDNode InVecNode, string ExtTy,
10200           X86SchedWriteWidths sched,
10201           PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10202   let Predicates = [HasVLX, HasAVX512] in {
10203     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10204                    v16i8x_info, i32mem, LdFrag, InVecNode>,
10205                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, WIG;
10207     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10208                    v16i8x_info, i64mem, LdFrag, InVecNode>,
10209                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, WIG;
10210   }
10211   let Predicates = [HasAVX512] in {
10212     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10213                    v16i8x_info, i128mem, LdFrag, OpNode>,
10214                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, WIG;
10215   }
10218 multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
10219                               SDNode InVecNode, string ExtTy,
10220                               X86SchedWriteWidths sched,
10221                               PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10222   let Predicates = [HasVLX, HasAVX512] in {
10223     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10224                    v16i8x_info, i16mem, LdFrag, InVecNode>,
10225                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, WIG;
10227     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10228                    v16i8x_info, i32mem, LdFrag, InVecNode>,
10229                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, WIG;
10230   }
10231   let Predicates = [HasAVX512] in {
10232     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10233                    v16i8x_info, i64mem, LdFrag, InVecNode>,
10234                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, WIG;
10235   }
10238 multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
10239          SDNode OpNode, SDNode InVecNode, string ExtTy,
10240          X86SchedWriteWidths sched,
10241          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10242   let Predicates = [HasVLX, HasAVX512] in {
10243     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10244                    v8i16x_info, i64mem, LdFrag, InVecNode>,
10245                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, WIG;
10247     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10248                    v8i16x_info, i128mem, LdFrag, OpNode>,
10249                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, WIG;
10250   }
10251   let Predicates = [HasAVX512] in {
10252     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10253                    v16i16x_info, i256mem, LdFrag, OpNode>,
10254                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, WIG;
10255   }
10258 multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
10259          SDNode OpNode, SDNode InVecNode, string ExtTy,
10260          X86SchedWriteWidths sched,
10261          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10262   let Predicates = [HasVLX, HasAVX512] in {
10263     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10264                    v8i16x_info, i32mem, LdFrag, InVecNode>,
10265                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, WIG;
10267     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10268                    v8i16x_info, i64mem, LdFrag, InVecNode>,
10269                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, WIG;
10270   }
10271   let Predicates = [HasAVX512] in {
10272     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10273                    v8i16x_info, i128mem, LdFrag, OpNode>,
10274                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, WIG;
10275   }
10278 multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
10279          SDNode OpNode, SDNode InVecNode, string ExtTy,
10280          X86SchedWriteWidths sched,
10281          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10283   let Predicates = [HasVLX, HasAVX512] in {
10284     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10285                    v4i32x_info, i64mem, LdFrag, InVecNode>,
10286                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
10288     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10289                    v4i32x_info, i128mem, LdFrag, OpNode>,
10290                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
10291   }
10292   let Predicates = [HasAVX512] in {
10293     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10294                    v8i32x_info, i256mem, LdFrag, OpNode>,
10295                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
10296   }
10299 defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
10300 defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
10301 defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq",       zext_invec, "z", SchedWriteVecExtend>;
10302 defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
10303 defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
10304 defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
10306 defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
10307 defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
10308 defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq",       sext_invec, "s", SchedWriteVecExtend>;
10309 defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
10310 defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
10311 defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
10314 // Patterns that we also need any extend versions of. aext_vector_inreg
10315 // is currently legalized to zext_vector_inreg.
10316 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10317   // 256-bit patterns
10318   let Predicates = [HasVLX, HasBWI] in {
10319     def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10320               (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10321   }
10323   let Predicates = [HasVLX] in {
10324     def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10325               (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10327     def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10328               (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10329   }
10331   // 512-bit patterns
10332   let Predicates = [HasBWI] in {
10333     def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10334               (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10335   }
10336   let Predicates = [HasAVX512] in {
10337     def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10338               (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10339     def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10340               (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10342     def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10343               (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10345     def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10346               (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10347   }
10350 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10351                                  SDNode InVecOp> :
10352     AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10353   // 128-bit patterns
10354   let Predicates = [HasVLX, HasBWI] in {
10355   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10356             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10357   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10358             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10359   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10360             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10361   }
10362   let Predicates = [HasVLX] in {
10363   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10364             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10365   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10366             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10368   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10369             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10371   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10372             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10373   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10374             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10375   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10376             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10378   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10379             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10380   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10381             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10383   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10384             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10385   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10386             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10387   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10388             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10389   }
10390   let Predicates = [HasVLX] in {
10391   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10392             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10393   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10394             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10395   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10396             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10398   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10399             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10400   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10401             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10403   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10404             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10405   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10406             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10407   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10408             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10409   }
10410   // 512-bit patterns
10411   let Predicates = [HasAVX512] in {
10412   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10413             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10414   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10415             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10416   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10417             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10418   }
10421 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10422 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10424 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10425 // ext+trunc aggressively making it impossible to legalize the DAG to this
10426 // pattern directly.
10427 let Predicates = [HasAVX512, NoBWI] in {
10428 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10429          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10430 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10431          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10434 //===----------------------------------------------------------------------===//
10435 // GATHER - SCATTER Operations
10437 // FIXME: Improve scheduling of gather/scatter instructions.
10438 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10439                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10440   let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10441       ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10442   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10443             (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10444             !strconcat(OpcodeStr#_.Suffix,
10445             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10446             []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10447             Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10450 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10451                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10452   defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10453                                       vy512xmem>, EVEX_V512, REX_W;
10454   defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10455                                       vz512mem>, EVEX_V512, REX_W;
10456 let Predicates = [HasVLX] in {
10457   defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10458                               vx256xmem>, EVEX_V256, REX_W;
10459   defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10460                               vy256xmem>, EVEX_V256, REX_W;
10461   defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10462                               vx128xmem>, EVEX_V128, REX_W;
10463   defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10464                               vx128xmem>, EVEX_V128, REX_W;
10468 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10469                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10470   defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10471                                        EVEX_V512;
10472   defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10473                                        EVEX_V512;
10474 let Predicates = [HasVLX] in {
10475   defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10476                                           vy256xmem>, EVEX_V256;
10477   defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10478                                           vy128xmem>, EVEX_V256;
10479   defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10480                                           vx128xmem>, EVEX_V128;
10481   defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10482                                           vx64xmem, VK2WM>, EVEX_V128;
10487 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10488                avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10490 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10491                 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10493 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10494                           X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10496 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10497     hasSideEffects = 0 in
10499   def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10500             (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10501             !strconcat(OpcodeStr#_.Suffix,
10502             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10503             []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10504             Sched<[WriteStore]>;
10507 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10508                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10509   defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10510                                       vy512xmem>, EVEX_V512, REX_W;
10511   defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10512                                       vz512mem>, EVEX_V512, REX_W;
10513 let Predicates = [HasVLX] in {
10514   defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10515                               vx256xmem>, EVEX_V256, REX_W;
10516   defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10517                               vy256xmem>, EVEX_V256, REX_W;
10518   defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10519                               vx128xmem>, EVEX_V128, REX_W;
10520   defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10521                               vx128xmem>, EVEX_V128, REX_W;
10525 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10526                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10527   defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10528                                        EVEX_V512;
10529   defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10530                                        EVEX_V512;
10531 let Predicates = [HasVLX] in {
10532   defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10533                                           vy256xmem>, EVEX_V256;
10534   defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10535                                           vy128xmem>, EVEX_V256;
10536   defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10537                                           vx128xmem>, EVEX_V128;
10538   defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10539                                           vx64xmem, VK2WM>, EVEX_V128;
10543 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10544                avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10546 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10547                 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10549 // prefetch
10550 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10551                        RegisterClass KRC, X86MemOperand memop> {
10552   let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10553   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10554             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10555             EVEX, EVEX_K, Sched<[WriteLoad]>;
10558 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10559                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10561 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10562                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10564 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10565                      VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10567 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10568                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10570 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10571                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10573 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10574                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10576 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10577                      VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10579 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10580                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10582 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10583                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10585 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10586                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10588 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10589                      VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10591 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10592                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10594 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10595                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10597 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10598                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10600 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10601                      VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10603 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10604                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10606 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10607 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10608                   !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10609                   [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10610                   EVEX, Sched<[Sched]>;
10613 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10614                                  string OpcodeStr, Predicate prd> {
10615 let Predicates = [prd] in
10616   defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10618   let Predicates = [prd, HasVLX] in {
10619     defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10620     defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10621   }
10624 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10625 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W;
10626 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10627 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
10629 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10630     def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10631                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10632                         [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10633                         EVEX, Sched<[WriteMove]>;
10636 // Use 512bit version to implement 128/256 bit in case NoVLX.
10637 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10638                                            X86VectorVTInfo _,
10639                                            string Name> {
10641   def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10642             (_.KVT (COPY_TO_REGCLASS
10643                      (!cast<Instruction>(Name#"Zrr")
10644                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10645                                       _.RC:$src, _.SubRegIdx)),
10646                    _.KRC))>;
10649 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10650                                    AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10651   let Predicates = [prd] in
10652     defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10653                                             EVEX_V512;
10655   let Predicates = [prd, HasVLX] in {
10656     defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10657                                               EVEX_V256;
10658     defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10659                                                EVEX_V128;
10660   }
10661   let Predicates = [prd, NoVLX] in {
10662     defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10663     defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10664   }
10667 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10668                                               avx512vl_i8_info, HasBWI>;
10669 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10670                                               avx512vl_i16_info, HasBWI>, REX_W;
10671 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10672                                               avx512vl_i32_info, HasDQI>;
10673 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10674                                               avx512vl_i64_info, HasDQI>, REX_W;
10676 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10677 // is available, but BWI is not. We can't handle this in lowering because
10678 // a target independent DAG combine likes to combine sext and trunc.
10679 let Predicates = [HasDQI, NoBWI] in {
10680   def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10681             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10682   def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10683             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10686 let Predicates = [HasDQI, NoBWI, HasVLX] in {
10687   def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10688             (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10691 //===----------------------------------------------------------------------===//
10692 // AVX-512 - COMPRESS and EXPAND
10695 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10696                                  string OpcodeStr, X86FoldableSchedWrite sched> {
10697   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10698               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10699               (null_frag)>, AVX5128IBase,
10700               Sched<[sched]>;
10702   let mayStore = 1, hasSideEffects = 0 in
10703   def mr : AVX5128I<opc, MRMDestMem, (outs),
10704               (ins _.MemOp:$dst, _.RC:$src),
10705               OpcodeStr # "\t{$src, $dst|$dst, $src}",
10706               []>, EVEX_CD8<_.EltSize, CD8VT1>,
10707               Sched<[sched.Folded]>;
10709   def mrk : AVX5128I<opc, MRMDestMem, (outs),
10710               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10711               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10712               []>,
10713               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10714               Sched<[sched.Folded]>;
10717 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10718   def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10719             (!cast<Instruction>(Name#_.ZSuffix#mrk)
10720                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10722   def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10723             (!cast<Instruction>(Name#_.ZSuffix#rrk)
10724                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10725   def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10726             (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10727                             _.KRCWM:$mask, _.RC:$src)>;
10730 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10731                                  X86FoldableSchedWrite sched,
10732                                  AVX512VLVectorVTInfo VTInfo,
10733                                  Predicate Pred = HasAVX512> {
10734   let Predicates = [Pred] in
10735   defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10736            compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10738   let Predicates = [Pred, HasVLX] in {
10739     defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10740                 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10741     defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10742                 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10743   }
10746 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10747 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10748                                           avx512vl_i32_info>, EVEX;
10749 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10750                                           avx512vl_i64_info>, EVEX, REX_W;
10751 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10752                                           avx512vl_f32_info>, EVEX;
10753 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10754                                           avx512vl_f64_info>, EVEX, REX_W;
10756 // expand
10757 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10758                                  string OpcodeStr, X86FoldableSchedWrite sched> {
10759   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10760               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10761               (null_frag)>, AVX5128IBase,
10762               Sched<[sched]>;
10764   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10765               (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10766               (null_frag)>,
10767             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10768             Sched<[sched.Folded, sched.ReadAfterFold]>;
10771 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10773   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10774             (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10775                                         _.KRCWM:$mask, addr:$src)>;
10777   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10778             (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10779                                         _.KRCWM:$mask, addr:$src)>;
10781   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10782                                                (_.VT _.RC:$src0))),
10783             (!cast<Instruction>(Name#_.ZSuffix#rmk)
10784                             _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10786   def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10787             (!cast<Instruction>(Name#_.ZSuffix#rrk)
10788                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10789   def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10790             (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10791                             _.KRCWM:$mask, _.RC:$src)>;
10794 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10795                                X86FoldableSchedWrite sched,
10796                                AVX512VLVectorVTInfo VTInfo,
10797                                Predicate Pred = HasAVX512> {
10798   let Predicates = [Pred] in
10799   defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10800            expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10802   let Predicates = [Pred, HasVLX] in {
10803     defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10804                 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10805     defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10806                 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10807   }
10810 // FIXME: Is there a better scheduler class for VPEXPAND?
10811 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10812                                       avx512vl_i32_info>, EVEX;
10813 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10814                                       avx512vl_i64_info>, EVEX, REX_W;
10815 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10816                                       avx512vl_f32_info>, EVEX;
10817 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10818                                       avx512vl_f64_info>, EVEX, REX_W;
10820 //handle instruction  reg_vec1 = op(reg_vec,imm)
10821 //                               op(mem_vec,imm)
10822 //                               op(broadcast(eltVt),imm)
10823 //all instruction created with FROUND_CURRENT
10824 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10825                                       SDPatternOperator OpNode,
10826                                       SDPatternOperator MaskOpNode,
10827                                       X86FoldableSchedWrite sched,
10828                                       X86VectorVTInfo _> {
10829   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10830   defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10831                       (ins _.RC:$src1, i32u8imm:$src2),
10832                       OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10833                       (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10834                       (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10835                       Sched<[sched]>;
10836   defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10837                     (ins _.MemOp:$src1, i32u8imm:$src2),
10838                     OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10839                     (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10840                             (i32 timm:$src2)),
10841                     (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10842                                 (i32 timm:$src2))>,
10843                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10844   defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10845                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10846                     OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10847                     "${src1}"#_.BroadcastStr#", $src2",
10848                     (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10849                             (i32 timm:$src2)),
10850                     (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10851                                 (i32 timm:$src2))>, EVEX_B,
10852                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10853   }
10856 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10857 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10858                                           SDNode OpNode, X86FoldableSchedWrite sched,
10859                                           X86VectorVTInfo _> {
10860   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10861   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10862                       (ins _.RC:$src1, i32u8imm:$src2),
10863                       OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10864                       "$src1, {sae}, $src2",
10865                       (OpNode (_.VT _.RC:$src1),
10866                               (i32 timm:$src2))>,
10867                       EVEX_B, Sched<[sched]>;
10870 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10871             AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10872             SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10873             Predicate prd>{
10874   let Predicates = [prd] in {
10875     defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10876                                            sched.ZMM, _.info512>,
10877                 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10878                                                sched.ZMM, _.info512>, EVEX_V512;
10879   }
10880   let Predicates = [prd, HasVLX] in {
10881     defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10882                                            sched.XMM, _.info128>, EVEX_V128;
10883     defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10884                                            sched.YMM, _.info256>, EVEX_V256;
10885   }
10888 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10889 //                               op(reg_vec2,mem_vec,imm)
10890 //                               op(reg_vec2,broadcast(eltVt),imm)
10891 //all instruction created with FROUND_CURRENT
10892 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10893                                 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10894   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10895   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10896                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10897                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10898                       (OpNode (_.VT _.RC:$src1),
10899                               (_.VT _.RC:$src2),
10900                               (i32 timm:$src3))>,
10901                       Sched<[sched]>;
10902   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10903                     (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10904                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10905                     (OpNode (_.VT _.RC:$src1),
10906                             (_.VT (bitconvert (_.LdFrag addr:$src2))),
10907                             (i32 timm:$src3))>,
10908                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10909   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10910                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10911                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10912                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
10913                     (OpNode (_.VT _.RC:$src1),
10914                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10915                             (i32 timm:$src3))>, EVEX_B,
10916                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10917   }
10920 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10921 //                               op(reg_vec2,mem_vec,imm)
10922 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10923                               X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10924                               X86VectorVTInfo SrcInfo>{
10925   let ExeDomain = DestInfo.ExeDomain in {
10926   defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10927                   (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10928                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10929                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10930                                (SrcInfo.VT SrcInfo.RC:$src2),
10931                                (i8 timm:$src3)))>,
10932                   Sched<[sched]>;
10933   defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10934                 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10935                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10936                 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10937                              (SrcInfo.VT (bitconvert
10938                                                 (SrcInfo.LdFrag addr:$src2))),
10939                              (i8 timm:$src3)))>,
10940                 Sched<[sched.Folded, sched.ReadAfterFold]>;
10941   }
10944 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10945 //                               op(reg_vec2,mem_vec,imm)
10946 //                               op(reg_vec2,broadcast(eltVt),imm)
10947 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10948                            X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10949   avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10951   let ExeDomain = _.ExeDomain in
10952   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10953                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10954                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10955                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
10956                     (OpNode (_.VT _.RC:$src1),
10957                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10958                             (i8 timm:$src3))>, EVEX_B,
10959                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10962 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10963 //                                      op(reg_vec2,mem_scalar,imm)
10964 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10965                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10966   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10967   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10968                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10969                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10970                       (OpNode (_.VT _.RC:$src1),
10971                               (_.VT _.RC:$src2),
10972                               (i32 timm:$src3))>,
10973                       Sched<[sched]>;
10974   defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10975                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10976                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10977                     (OpNode (_.VT _.RC:$src1),
10978                             (_.ScalarIntMemFrags addr:$src2),
10979                             (i32 timm:$src3))>,
10980                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10981   }
10984 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10985 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10986                                     SDNode OpNode, X86FoldableSchedWrite sched,
10987                                     X86VectorVTInfo _> {
10988   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10989   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10990                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10991                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10992                       "$src1, $src2, {sae}, $src3",
10993                       (OpNode (_.VT _.RC:$src1),
10994                               (_.VT _.RC:$src2),
10995                               (i32 timm:$src3))>,
10996                       EVEX_B, Sched<[sched]>;
10999 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
11000 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11001                                     X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11002   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
11003   defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11004                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
11005                       OpcodeStr, "$src3, {sae}, $src2, $src1",
11006                       "$src1, $src2, {sae}, $src3",
11007                       (OpNode (_.VT _.RC:$src1),
11008                               (_.VT _.RC:$src2),
11009                               (i32 timm:$src3))>,
11010                       EVEX_B, Sched<[sched]>;
11013 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
11014             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
11015             SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
11016   let Predicates = [prd] in {
11017     defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
11018                 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
11019                                   EVEX_V512;
11021   }
11022   let Predicates = [prd, HasVLX] in {
11023     defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
11024                                   EVEX_V128;
11025     defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
11026                                   EVEX_V256;
11027   }
11030 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
11031                    X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
11032                    AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
11033   let Predicates = [Pred] in {
11034     defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
11035                            SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
11036   }
11037   let Predicates = [Pred, HasVLX] in {
11038     defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
11039                            SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
11040     defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
11041                            SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
11042   }
11045 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
11046                                   bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
11047                                   Predicate Pred = HasAVX512> {
11048   let Predicates = [Pred] in {
11049     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
11050                                 EVEX_V512;
11051   }
11052   let Predicates = [Pred, HasVLX] in {
11053     defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
11054                                 EVEX_V128;
11055     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
11056                                 EVEX_V256;
11057   }
11060 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
11061                   X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
11062                   SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
11063   let Predicates = [prd] in {
11064      defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
11065               avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
11066   }
11069 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
11070                     bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
11071                     SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
11072                     X86SchedWriteWidths sched, Predicate prd>{
11073   defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
11074                             opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
11075                             AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
11076   defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
11077                             opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
11078                             AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
11079   defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
11080                             opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
11081                             AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W;
11084 defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
11085                               X86VReduce, X86VReduce, X86VReduceSAE,
11086                               SchedWriteFRnd, HasDQI>;
11087 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
11088                               X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
11089                               SchedWriteFRnd, HasAVX512>;
11090 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
11091                               X86VGetMant, X86VGetMant, X86VGetMantSAE,
11092                               SchedWriteFRnd, HasAVX512>;
11094 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
11095                                                 0x50, X86VRange, X86VRangeSAE,
11096                                                 SchedWriteFAdd, HasDQI>,
11097       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
11098 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
11099                                                 0x50, X86VRange, X86VRangeSAE,
11100                                                 SchedWriteFAdd, HasDQI>,
11101       AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11103 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
11104       f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11105       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
11106 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
11107       0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11108       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11110 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
11111       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11112       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
11113 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
11114       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11115       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11116 defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
11117       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
11118       AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11120 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
11121       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11122       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
11123 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
11124       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11125       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11126 defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
11127       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
11128       AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11130 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
11131                                           X86FoldableSchedWrite sched,
11132                                           X86VectorVTInfo _,
11133                                           X86VectorVTInfo CastInfo,
11134                                           string EVEX2VEXOvrd> {
11135   let ExeDomain = _.ExeDomain in {
11136   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11137                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11138                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11139                   (_.VT (bitconvert
11140                          (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
11141                                                   (i8 timm:$src3)))))>,
11142                   Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
11143   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11144                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11145                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11146                 (_.VT
11147                  (bitconvert
11148                   (CastInfo.VT (X86Shuf128 _.RC:$src1,
11149                                            (CastInfo.LdFrag addr:$src2),
11150                                            (i8 timm:$src3)))))>,
11151                 Sched<[sched.Folded, sched.ReadAfterFold]>,
11152                 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
11153   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11154                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11155                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11156                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
11157                     (_.VT
11158                      (bitconvert
11159                       (CastInfo.VT
11160                        (X86Shuf128 _.RC:$src1,
11161                                    (_.BroadcastLdFrag addr:$src2),
11162                                    (i8 timm:$src3)))))>, EVEX_B,
11163                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11164   }
11167 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
11168                                    AVX512VLVectorVTInfo _,
11169                                    AVX512VLVectorVTInfo CastInfo, bits<8> opc,
11170                                    string EVEX2VEXOvrd>{
11171   let Predicates = [HasAVX512] in
11172   defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11173                                           _.info512, CastInfo.info512, "">, EVEX_V512;
11175   let Predicates = [HasAVX512, HasVLX] in
11176   defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11177                                              _.info256, CastInfo.info256,
11178                                              EVEX2VEXOvrd>, EVEX_V256;
11181 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
11182       avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11183 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
11184       avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
11185 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11186       avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11187 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11188       avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
11190 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11191                          X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11192   // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
11193   // instantiation of this class.
11194   let ExeDomain = _.ExeDomain in {
11195   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11196                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11197                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11198                   (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
11199                   Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
11200   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11201                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11202                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11203                 (_.VT (X86VAlign _.RC:$src1,
11204                                  (bitconvert (_.LdFrag addr:$src2)),
11205                                  (i8 timm:$src3)))>,
11206                 Sched<[sched.Folded, sched.ReadAfterFold]>,
11207                 EVEX2VEXOverride<"VPALIGNRrmi">;
11209   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11210                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11211                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11212                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
11213                    (X86VAlign _.RC:$src1,
11214                               (_.VT (_.BroadcastLdFrag addr:$src2)),
11215                               (i8 timm:$src3))>, EVEX_B,
11216                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11217   }
11220 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11221                                 AVX512VLVectorVTInfo _> {
11222   let Predicates = [HasAVX512] in {
11223     defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11224                                 AVX512AIi8Base, EVEX_4V, EVEX_V512;
11225   }
11226   let Predicates = [HasAVX512, HasVLX] in {
11227     defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11228                                 AVX512AIi8Base, EVEX_4V, EVEX_V128;
11229     // We can't really override the 256-bit version so change it back to unset.
11230     let EVEX2VEXOverride = ? in
11231     defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11232                                 AVX512AIi8Base, EVEX_4V, EVEX_V256;
11233   }
11236 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11237                                    avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11238 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11239                                    avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11240                                    REX_W;
11242 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11243                                          SchedWriteShuffle, avx512vl_i8_info,
11244                                          avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11246 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
11247 // into vpalignr.
11248 def ValignqImm32XForm : SDNodeXForm<timm, [{
11249   return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11250 }]>;
11251 def ValignqImm8XForm : SDNodeXForm<timm, [{
11252   return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11253 }]>;
11254 def ValigndImm8XForm : SDNodeXForm<timm, [{
11255   return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11256 }]>;
11258 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11259                                         X86VectorVTInfo From, X86VectorVTInfo To,
11260                                         SDNodeXForm ImmXForm> {
11261   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11262                                  (bitconvert
11263                                   (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11264                                                    timm:$src3))),
11265                                  To.RC:$src0)),
11266             (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11267                                                   To.RC:$src1, To.RC:$src2,
11268                                                   (ImmXForm timm:$src3))>;
11270   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11271                                  (bitconvert
11272                                   (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11273                                                    timm:$src3))),
11274                                  To.ImmAllZerosV)),
11275             (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11276                                                    To.RC:$src1, To.RC:$src2,
11277                                                    (ImmXForm timm:$src3))>;
11279   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11280                                  (bitconvert
11281                                   (From.VT (OpNode From.RC:$src1,
11282                                                    (From.LdFrag addr:$src2),
11283                                            timm:$src3))),
11284                                  To.RC:$src0)),
11285             (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11286                                                   To.RC:$src1, addr:$src2,
11287                                                   (ImmXForm timm:$src3))>;
11289   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11290                                  (bitconvert
11291                                   (From.VT (OpNode From.RC:$src1,
11292                                                    (From.LdFrag addr:$src2),
11293                                            timm:$src3))),
11294                                  To.ImmAllZerosV)),
11295             (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11296                                                    To.RC:$src1, addr:$src2,
11297                                                    (ImmXForm timm:$src3))>;
11300 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11301                                            X86VectorVTInfo From,
11302                                            X86VectorVTInfo To,
11303                                            SDNodeXForm ImmXForm> :
11304       avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11305   def : Pat<(From.VT (OpNode From.RC:$src1,
11306                              (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11307                              timm:$src3)),
11308             (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11309                                                   (ImmXForm timm:$src3))>;
11311   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11312                                  (bitconvert
11313                                   (From.VT (OpNode From.RC:$src1,
11314                                            (bitconvert
11315                                             (To.VT (To.BroadcastLdFrag addr:$src2))),
11316                                            timm:$src3))),
11317                                  To.RC:$src0)),
11318             (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11319                                                    To.RC:$src1, addr:$src2,
11320                                                    (ImmXForm timm:$src3))>;
11322   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11323                                  (bitconvert
11324                                   (From.VT (OpNode From.RC:$src1,
11325                                            (bitconvert
11326                                             (To.VT (To.BroadcastLdFrag addr:$src2))),
11327                                            timm:$src3))),
11328                                  To.ImmAllZerosV)),
11329             (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11330                                                     To.RC:$src1, addr:$src2,
11331                                                     (ImmXForm timm:$src3))>;
11334 let Predicates = [HasAVX512] in {
11335   // For 512-bit we lower to the widest element type we can. So we only need
11336   // to handle converting valignq to valignd.
11337   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11338                                          v16i32_info, ValignqImm32XForm>;
11341 let Predicates = [HasVLX] in {
11342   // For 128-bit we lower to the widest element type we can. So we only need
11343   // to handle converting valignq to valignd.
11344   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11345                                          v4i32x_info, ValignqImm32XForm>;
11346   // For 256-bit we lower to the widest element type we can. So we only need
11347   // to handle converting valignq to valignd.
11348   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11349                                          v8i32x_info, ValignqImm32XForm>;
11352 let Predicates = [HasVLX, HasBWI] in {
11353   // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11354   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11355                                       v16i8x_info, ValignqImm8XForm>;
11356   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11357                                       v16i8x_info, ValigndImm8XForm>;
11360 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11361                 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11362                 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
11364 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11365                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11366   let ExeDomain = _.ExeDomain in {
11367   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11368                     (ins _.RC:$src1), OpcodeStr,
11369                     "$src1", "$src1",
11370                     (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11371                     Sched<[sched]>;
11373   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11374                   (ins _.MemOp:$src1), OpcodeStr,
11375                   "$src1", "$src1",
11376                   (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11377             EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11378             Sched<[sched.Folded]>;
11379   }
11382 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11383                             X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11384            avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11385   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11386                   (ins _.ScalarMemOp:$src1), OpcodeStr,
11387                   "${src1}"#_.BroadcastStr,
11388                   "${src1}"#_.BroadcastStr,
11389                   (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11390              EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11391              Sched<[sched.Folded]>;
11394 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11395                               X86SchedWriteWidths sched,
11396                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11397   let Predicates = [prd] in
11398     defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11399                              EVEX_V512;
11401   let Predicates = [prd, HasVLX] in {
11402     defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11403                               EVEX_V256;
11404     defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11405                               EVEX_V128;
11406   }
11409 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11410                                X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11411                                Predicate prd> {
11412   let Predicates = [prd] in
11413     defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11414                               EVEX_V512;
11416   let Predicates = [prd, HasVLX] in {
11417     defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11418                                  EVEX_V256;
11419     defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11420                                  EVEX_V128;
11421   }
11424 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11425                                  SDNode OpNode, X86SchedWriteWidths sched,
11426                                  Predicate prd> {
11427   defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11428                                avx512vl_i64_info, prd>, REX_W;
11429   defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11430                                avx512vl_i32_info, prd>;
11433 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11434                                  SDNode OpNode, X86SchedWriteWidths sched,
11435                                  Predicate prd> {
11436   defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11437                               avx512vl_i16_info, prd>, WIG;
11438   defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11439                               avx512vl_i8_info, prd>, WIG;
11442 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11443                                   bits<8> opc_d, bits<8> opc_q,
11444                                   string OpcodeStr, SDNode OpNode,
11445                                   X86SchedWriteWidths sched> {
11446   defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11447                                     HasAVX512>,
11448               avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11449                                     HasBWI>;
11452 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11453                                     SchedWriteVecALU>;
11455 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11456 let Predicates = [HasAVX512, NoVLX] in {
11457   def : Pat<(v4i64 (abs VR256X:$src)),
11458             (EXTRACT_SUBREG
11459                 (VPABSQZrr
11460                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11461              sub_ymm)>;
11462   def : Pat<(v2i64 (abs VR128X:$src)),
11463             (EXTRACT_SUBREG
11464                 (VPABSQZrr
11465                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11466              sub_xmm)>;
11469 // Use 512bit version to implement 128/256 bit.
11470 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11471                                  AVX512VLVectorVTInfo _, Predicate prd> {
11472   let Predicates = [prd, NoVLX] in {
11473     def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11474               (EXTRACT_SUBREG
11475                 (!cast<Instruction>(InstrStr # "Zrr")
11476                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11477                                  _.info256.RC:$src1,
11478                                  _.info256.SubRegIdx)),
11479               _.info256.SubRegIdx)>;
11481     def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11482               (EXTRACT_SUBREG
11483                 (!cast<Instruction>(InstrStr # "Zrr")
11484                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11485                                  _.info128.RC:$src1,
11486                                  _.info128.SubRegIdx)),
11487               _.info128.SubRegIdx)>;
11488   }
11491 defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11492                                         SchedWriteVecIMul, HasCDI>;
11494 // FIXME: Is there a better scheduler class for VPCONFLICT?
11495 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11496                                         SchedWriteVecALU, HasCDI>;
11498 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11499 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11500 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11502 //===---------------------------------------------------------------------===//
11503 // Counts number of ones - VPOPCNTD and VPOPCNTQ
11504 //===---------------------------------------------------------------------===//
11506 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11507 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11508                                      SchedWriteVecALU, HasVPOPCNTDQ>;
11510 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11511 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11513 //===---------------------------------------------------------------------===//
11514 // Replicate Single FP - MOVSHDUP and MOVSLDUP
11515 //===---------------------------------------------------------------------===//
11517 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11518                             X86SchedWriteWidths sched> {
11519   defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11520                                       avx512vl_f32_info, HasAVX512>, XS;
11523 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11524                                   SchedWriteFShuffle>;
11525 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11526                                   SchedWriteFShuffle>;
11528 //===----------------------------------------------------------------------===//
11529 // AVX-512 - MOVDDUP
11530 //===----------------------------------------------------------------------===//
11532 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11533                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11534   let ExeDomain = _.ExeDomain in {
11535   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11536                    (ins _.RC:$src), OpcodeStr, "$src", "$src",
11537                    (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11538                    Sched<[sched]>;
11539   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11540                  (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11541                  (_.VT (_.BroadcastLdFrag addr:$src))>,
11542                  EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11543                  Sched<[sched.Folded]>;
11544   }
11547 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11548                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11549   defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11550                            VTInfo.info512>, EVEX_V512;
11552   let Predicates = [HasAVX512, HasVLX] in {
11553     defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11554                                 VTInfo.info256>, EVEX_V256;
11555     defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11556                                    VTInfo.info128>, EVEX_V128;
11557   }
11560 multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11561                           X86SchedWriteWidths sched> {
11562   defm NAME:      avx512_movddup_common<opc, OpcodeStr, sched,
11563                                         avx512vl_f64_info>, XD, REX_W;
11566 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11568 let Predicates = [HasVLX] in {
11569 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11570           (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11572 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11573                         (v2f64 VR128X:$src0)),
11574           (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11575                            (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11576 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11577                         immAllZerosV),
11578           (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11581 //===----------------------------------------------------------------------===//
11582 // AVX-512 - Unpack Instructions
11583 //===----------------------------------------------------------------------===//
11585 let Uses = []<Register>, mayRaiseFPException = 0 in {
11586 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11587                                  SchedWriteFShuffleSizes, 0, 1>;
11588 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11589                                  SchedWriteFShuffleSizes>;
11592 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11593                                        SchedWriteShuffle, HasBWI>;
11594 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11595                                        SchedWriteShuffle, HasBWI>;
11596 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11597                                        SchedWriteShuffle, HasBWI>;
11598 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11599                                        SchedWriteShuffle, HasBWI>;
11601 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11602                                        SchedWriteShuffle, HasAVX512>;
11603 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11604                                        SchedWriteShuffle, HasAVX512>;
11605 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11606                                         SchedWriteShuffle, HasAVX512>;
11607 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11608                                         SchedWriteShuffle, HasAVX512>;
11610 //===----------------------------------------------------------------------===//
11611 // AVX-512 - Extract & Insert Integer Instructions
11612 //===----------------------------------------------------------------------===//
11614 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11615                                                             X86VectorVTInfo _> {
11616   def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11617               (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11618               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11619               [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11620                        addr:$dst)]>,
11621               EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11624 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11625   let Predicates = [HasBWI] in {
11626     def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11627                   (ins _.RC:$src1, u8imm:$src2),
11628                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11629                   [(set GR32orGR64:$dst,
11630                         (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11631                   EVEX, TAPD, Sched<[WriteVecExtract]>;
11633     defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
11634   }
11637 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11638   let Predicates = [HasBWI] in {
11639     def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11640                   (ins _.RC:$src1, u8imm:$src2),
11641                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11642                   [(set GR32orGR64:$dst,
11643                         (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11644                   EVEX, PD, Sched<[WriteVecExtract]>;
11646     let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11647     def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11648                    (ins _.RC:$src1, u8imm:$src2),
11649                    OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11650                    EVEX, TAPD, Sched<[WriteVecExtract]>;
11652     defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
11653   }
11656 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11657                                                             RegisterClass GRC> {
11658   let Predicates = [HasDQI] in {
11659     def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11660                   (ins _.RC:$src1, u8imm:$src2),
11661                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11662                   [(set GRC:$dst,
11663                       (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11664                   EVEX, TAPD, Sched<[WriteVecExtract]>;
11666     def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11667                 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11668                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11669                 [(store (extractelt (_.VT _.RC:$src1),
11670                                     imm:$src2),addr:$dst)]>,
11671                 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
11672                 Sched<[WriteVecExtractSt]>;
11673   }
11676 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG;
11677 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG;
11678 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11679 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W;
11681 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11682                                             X86VectorVTInfo _, PatFrag LdFrag,
11683                                             SDPatternOperator immoperator> {
11684   def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11685       (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11686       OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11687       [(set _.RC:$dst,
11688           (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11689       EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11692 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11693                                             X86VectorVTInfo _, PatFrag LdFrag> {
11694   let Predicates = [HasBWI] in {
11695     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11696         (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11697         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11698         [(set _.RC:$dst,
11699             (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
11700         Sched<[WriteVecInsert]>;
11702     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11703   }
11706 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11707                                          X86VectorVTInfo _, RegisterClass GRC> {
11708   let Predicates = [HasDQI] in {
11709     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11710         (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11711         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11712         [(set _.RC:$dst,
11713             (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11714         EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11716     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11717                                     _.ScalarLdFrag, imm>, TAPD;
11718   }
11721 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11722                                      extloadi8>, TAPD, WIG;
11723 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11724                                      extloadi16>, PD, WIG;
11725 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11726 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W;
11728 let Predicates = [HasAVX512, NoBWI] in {
11729   def : Pat<(X86pinsrb VR128:$src1,
11730                        (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11731                        timm:$src3),
11732             (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11733                        timm:$src3)>;
11736 let Predicates = [HasBWI] in {
11737   def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
11738             (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
11739                         GR8:$src2, sub_8bit), timm:$src3)>;
11740   def : Pat<(X86pinsrb VR128:$src1,
11741                        (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11742                        timm:$src3),
11743             (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11744                         timm:$src3)>;
11747 // Always select FP16 instructions if available.
11748 let Predicates = [HasBWI], AddedComplexity = -10 in {
11749   def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
11750   def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
11751   def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
11752   def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
11755 //===----------------------------------------------------------------------===//
11756 // VSHUFPS - VSHUFPD Operations
11757 //===----------------------------------------------------------------------===//
11759 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11760   defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11761                                     SchedWriteFShuffle>,
11762                                     EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11763                                     AVX512AIi8Base, EVEX_4V;
11766 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS;
11767 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, REX_W;
11769 //===----------------------------------------------------------------------===//
11770 // AVX-512 - Byte shift Left/Right
11771 //===----------------------------------------------------------------------===//
11773 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11774                                Format MRMm, string OpcodeStr,
11775                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11776   def ri : AVX512<opc, MRMr,
11777              (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11778              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11779              [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11780              Sched<[sched]>;
11781   def mi : AVX512<opc, MRMm,
11782            (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11783            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11784            [(set _.RC:$dst,(_.VT (OpNode
11785                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
11786                                  (i8 timm:$src2))))]>,
11787            Sched<[sched.Folded, sched.ReadAfterFold]>;
11790 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11791                                    Format MRMm, string OpcodeStr,
11792                                    X86SchedWriteWidths sched, Predicate prd>{
11793   let Predicates = [prd] in
11794     defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11795                                  sched.ZMM, v64i8_info>, EVEX_V512;
11796   let Predicates = [prd, HasVLX] in {
11797     defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11798                                     sched.YMM, v32i8x_info>, EVEX_V256;
11799     defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11800                                     sched.XMM, v16i8x_info>, EVEX_V128;
11801   }
11803 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11804                                        SchedWriteShuffle, HasBWI>,
11805                                        AVX512PDIi8Base, EVEX_4V, WIG;
11806 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11807                                        SchedWriteShuffle, HasBWI>,
11808                                        AVX512PDIi8Base, EVEX_4V, WIG;
11810 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11811                                 string OpcodeStr, X86FoldableSchedWrite sched,
11812                                 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11813   let isCommutable = 1 in
11814   def rr : AVX512BI<opc, MRMSrcReg,
11815              (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11816              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11817              [(set _dst.RC:$dst,(_dst.VT
11818                                 (OpNode (_src.VT _src.RC:$src1),
11819                                         (_src.VT _src.RC:$src2))))]>,
11820              Sched<[sched]>;
11821   def rm : AVX512BI<opc, MRMSrcMem,
11822            (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11823            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11824            [(set _dst.RC:$dst,(_dst.VT
11825                               (OpNode (_src.VT _src.RC:$src1),
11826                               (_src.VT (bitconvert
11827                                         (_src.LdFrag addr:$src2))))))]>,
11828            Sched<[sched.Folded, sched.ReadAfterFold]>;
11831 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11832                                     string OpcodeStr, X86SchedWriteWidths sched,
11833                                     Predicate prd> {
11834   let Predicates = [prd] in
11835     defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11836                                   v8i64_info, v64i8_info>, EVEX_V512;
11837   let Predicates = [prd, HasVLX] in {
11838     defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11839                                      v4i64x_info, v32i8x_info>, EVEX_V256;
11840     defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11841                                      v2i64x_info, v16i8x_info>, EVEX_V128;
11842   }
11845 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11846                                         SchedWritePSADBW, HasBWI>, EVEX_4V, WIG;
11848 // Transforms to swizzle an immediate to enable better matching when
11849 // memory operand isn't in the right place.
11850 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11851   // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11852   uint8_t Imm = N->getZExtValue();
11853   // Swap bits 1/4 and 3/6.
11854   uint8_t NewImm = Imm & 0xa5;
11855   if (Imm & 0x02) NewImm |= 0x10;
11856   if (Imm & 0x10) NewImm |= 0x02;
11857   if (Imm & 0x08) NewImm |= 0x40;
11858   if (Imm & 0x40) NewImm |= 0x08;
11859   return getI8Imm(NewImm, SDLoc(N));
11860 }]>;
11861 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11862   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11863   uint8_t Imm = N->getZExtValue();
11864   // Swap bits 2/4 and 3/5.
11865   uint8_t NewImm = Imm & 0xc3;
11866   if (Imm & 0x04) NewImm |= 0x10;
11867   if (Imm & 0x10) NewImm |= 0x04;
11868   if (Imm & 0x08) NewImm |= 0x20;
11869   if (Imm & 0x20) NewImm |= 0x08;
11870   return getI8Imm(NewImm, SDLoc(N));
11871 }]>;
11872 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11873   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11874   uint8_t Imm = N->getZExtValue();
11875   // Swap bits 1/2 and 5/6.
11876   uint8_t NewImm = Imm & 0x99;
11877   if (Imm & 0x02) NewImm |= 0x04;
11878   if (Imm & 0x04) NewImm |= 0x02;
11879   if (Imm & 0x20) NewImm |= 0x40;
11880   if (Imm & 0x40) NewImm |= 0x20;
11881   return getI8Imm(NewImm, SDLoc(N));
11882 }]>;
11883 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11884   // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11885   uint8_t Imm = N->getZExtValue();
11886   // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11887   uint8_t NewImm = Imm & 0x81;
11888   if (Imm & 0x02) NewImm |= 0x04;
11889   if (Imm & 0x04) NewImm |= 0x10;
11890   if (Imm & 0x08) NewImm |= 0x40;
11891   if (Imm & 0x10) NewImm |= 0x02;
11892   if (Imm & 0x20) NewImm |= 0x08;
11893   if (Imm & 0x40) NewImm |= 0x20;
11894   return getI8Imm(NewImm, SDLoc(N));
11895 }]>;
11896 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11897   // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11898   uint8_t Imm = N->getZExtValue();
11899   // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11900   uint8_t NewImm = Imm & 0x81;
11901   if (Imm & 0x02) NewImm |= 0x10;
11902   if (Imm & 0x04) NewImm |= 0x02;
11903   if (Imm & 0x08) NewImm |= 0x20;
11904   if (Imm & 0x10) NewImm |= 0x04;
11905   if (Imm & 0x20) NewImm |= 0x40;
11906   if (Imm & 0x40) NewImm |= 0x08;
11907   return getI8Imm(NewImm, SDLoc(N));
11908 }]>;
11910 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11911                           X86FoldableSchedWrite sched, X86VectorVTInfo _,
11912                           string Name>{
11913   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11914   defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11915                       (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11916                       OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11917                       (OpNode (_.VT _.RC:$src1),
11918                               (_.VT _.RC:$src2),
11919                               (_.VT _.RC:$src3),
11920                               (i8 timm:$src4)), 1, 1>,
11921                       AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11922   defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11923                     (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11924                     OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11925                     (OpNode (_.VT _.RC:$src1),
11926                             (_.VT _.RC:$src2),
11927                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
11928                             (i8 timm:$src4)), 1, 0>,
11929                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11930                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11931   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11932                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11933                     OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11934                     "$src2, ${src3}"#_.BroadcastStr#", $src4",
11935                     (OpNode (_.VT _.RC:$src1),
11936                             (_.VT _.RC:$src2),
11937                             (_.VT (_.BroadcastLdFrag addr:$src3)),
11938                             (i8 timm:$src4)), 1, 0>, EVEX_B,
11939                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11940                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11941   }// Constraints = "$src1 = $dst"
11943   // Additional patterns for matching passthru operand in other positions.
11944   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11945                    (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11946                    _.RC:$src1)),
11947             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11948              _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11949   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11950                    (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11951                    _.RC:$src1)),
11952             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11953              _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11955   // Additional patterns for matching zero masking with loads in other
11956   // positions.
11957   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11958                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11959                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11960                    _.ImmAllZerosV)),
11961             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11962              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11963   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11964                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11965                     _.RC:$src2, (i8 timm:$src4)),
11966                    _.ImmAllZerosV)),
11967             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11968              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11970   // Additional patterns for matching masked loads with different
11971   // operand orders.
11972   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11973                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11974                     _.RC:$src2, (i8 timm:$src4)),
11975                    _.RC:$src1)),
11976             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11977              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11978   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11979                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11980                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11981                    _.RC:$src1)),
11982             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11983              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11984   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11985                    (OpNode _.RC:$src2, _.RC:$src1,
11986                     (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11987                    _.RC:$src1)),
11988             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11989              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11990   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11991                    (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11992                     _.RC:$src1, (i8 timm:$src4)),
11993                    _.RC:$src1)),
11994             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11995              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11996   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11997                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11998                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11999                    _.RC:$src1)),
12000             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
12001              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
12003   // Additional patterns for matching zero masking with broadcasts in other
12004   // positions.
12005   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12006                    (OpNode (_.BroadcastLdFrag addr:$src3),
12007                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
12008                    _.ImmAllZerosV)),
12009             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
12010              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
12011              (VPTERNLOG321_imm8 timm:$src4))>;
12012   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12013                    (OpNode _.RC:$src1,
12014                     (_.BroadcastLdFrag addr:$src3),
12015                     _.RC:$src2, (i8 timm:$src4)),
12016                    _.ImmAllZerosV)),
12017             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
12018              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
12019              (VPTERNLOG132_imm8 timm:$src4))>;
12021   // Additional patterns for matching masked broadcasts with different
12022   // operand orders.
12023   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12024                    (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
12025                     _.RC:$src2, (i8 timm:$src4)),
12026                    _.RC:$src1)),
12027             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12028              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
12029   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12030                    (OpNode (_.BroadcastLdFrag addr:$src3),
12031                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
12032                    _.RC:$src1)),
12033             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12034              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
12035   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12036                    (OpNode _.RC:$src2, _.RC:$src1,
12037                     (_.BroadcastLdFrag addr:$src3),
12038                     (i8 timm:$src4)), _.RC:$src1)),
12039             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12040              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
12041   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12042                    (OpNode _.RC:$src2,
12043                     (_.BroadcastLdFrag addr:$src3),
12044                     _.RC:$src1, (i8 timm:$src4)),
12045                    _.RC:$src1)),
12046             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12047              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
12048   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12049                    (OpNode (_.BroadcastLdFrag addr:$src3),
12050                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
12051                    _.RC:$src1)),
12052             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12053              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
12056 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
12057                                  AVX512VLVectorVTInfo _> {
12058   let Predicates = [HasAVX512] in
12059     defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
12060                                _.info512, NAME>, EVEX_V512;
12061   let Predicates = [HasAVX512, HasVLX] in {
12062     defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
12063                                _.info128, NAME>, EVEX_V128;
12064     defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
12065                                _.info256, NAME>, EVEX_V256;
12066   }
12069 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
12070                                         avx512vl_i32_info>;
12071 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
12072                                         avx512vl_i64_info>, REX_W;
12074 // Patterns to implement vnot using vpternlog instead of creating all ones
12075 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
12076 // so that the result is only dependent on src0. But we use the same source
12077 // for all operands to prevent a false dependency.
12078 // TODO: We should maybe have a more generalized algorithm for folding to
12079 // vpternlog.
12080 let Predicates = [HasAVX512] in {
12081   def : Pat<(v64i8 (vnot VR512:$src)),
12082             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12083   def : Pat<(v32i16 (vnot VR512:$src)),
12084             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12085   def : Pat<(v16i32 (vnot VR512:$src)),
12086             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12087   def : Pat<(v8i64 (vnot VR512:$src)),
12088             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12091 let Predicates = [HasAVX512, NoVLX] in {
12092   def : Pat<(v16i8 (vnot VR128X:$src)),
12093             (EXTRACT_SUBREG
12094              (VPTERNLOGQZrri
12095               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12096               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12097               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12098               (i8 15)), sub_xmm)>;
12099   def : Pat<(v8i16 (vnot VR128X:$src)),
12100             (EXTRACT_SUBREG
12101              (VPTERNLOGQZrri
12102               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12103               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12104               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12105               (i8 15)), sub_xmm)>;
12106   def : Pat<(v4i32 (vnot VR128X:$src)),
12107             (EXTRACT_SUBREG
12108              (VPTERNLOGQZrri
12109               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12110               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12111               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12112               (i8 15)), sub_xmm)>;
12113   def : Pat<(v2i64 (vnot VR128X:$src)),
12114             (EXTRACT_SUBREG
12115              (VPTERNLOGQZrri
12116               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12117               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12118               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12119               (i8 15)), sub_xmm)>;
12121   def : Pat<(v32i8 (vnot VR256X:$src)),
12122             (EXTRACT_SUBREG
12123              (VPTERNLOGQZrri
12124               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12125               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12126               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12127               (i8 15)), sub_ymm)>;
12128   def : Pat<(v16i16 (vnot VR256X:$src)),
12129             (EXTRACT_SUBREG
12130              (VPTERNLOGQZrri
12131               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12132               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12133               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12134               (i8 15)), sub_ymm)>;
12135   def : Pat<(v8i32 (vnot VR256X:$src)),
12136             (EXTRACT_SUBREG
12137              (VPTERNLOGQZrri
12138               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12139               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12140               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12141               (i8 15)), sub_ymm)>;
12142   def : Pat<(v4i64 (vnot VR256X:$src)),
12143             (EXTRACT_SUBREG
12144              (VPTERNLOGQZrri
12145               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12146               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12147               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12148               (i8 15)), sub_ymm)>;
12151 let Predicates = [HasVLX] in {
12152   def : Pat<(v16i8 (vnot VR128X:$src)),
12153             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12154   def : Pat<(v8i16 (vnot VR128X:$src)),
12155             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12156   def : Pat<(v4i32 (vnot VR128X:$src)),
12157             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12158   def : Pat<(v2i64 (vnot VR128X:$src)),
12159             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12161   def : Pat<(v32i8 (vnot VR256X:$src)),
12162             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12163   def : Pat<(v16i16 (vnot VR256X:$src)),
12164             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12165   def : Pat<(v8i32 (vnot VR256X:$src)),
12166             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12167   def : Pat<(v4i64 (vnot VR256X:$src)),
12168             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12171 //===----------------------------------------------------------------------===//
12172 // AVX-512 - FixupImm
12173 //===----------------------------------------------------------------------===//
12175 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
12176                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
12177                                   X86VectorVTInfo TblVT>{
12178   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
12179       Uses = [MXCSR], mayRaiseFPException = 1 in {
12180     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12181                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12182                          OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12183                         (X86VFixupimm (_.VT _.RC:$src1),
12184                                       (_.VT _.RC:$src2),
12185                                       (TblVT.VT _.RC:$src3),
12186                                       (i32 timm:$src4))>, Sched<[sched]>;
12187     defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12188                       (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
12189                       OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12190                       (X86VFixupimm (_.VT _.RC:$src1),
12191                                     (_.VT _.RC:$src2),
12192                                     (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12193                                     (i32 timm:$src4))>,
12194                       Sched<[sched.Folded, sched.ReadAfterFold]>;
12195     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12196                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12197                     OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
12198                     "$src2, ${src3}"#_.BroadcastStr#", $src4",
12199                       (X86VFixupimm (_.VT _.RC:$src1),
12200                                     (_.VT _.RC:$src2),
12201                                     (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
12202                                     (i32 timm:$src4))>,
12203                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12204   } // Constraints = "$src1 = $dst"
12207 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12208                                       X86FoldableSchedWrite sched,
12209                                       X86VectorVTInfo _, X86VectorVTInfo TblVT>
12210   : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
12211 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
12212   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12213                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12214                       OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12215                       "$src2, $src3, {sae}, $src4",
12216                       (X86VFixupimmSAE (_.VT _.RC:$src1),
12217                                        (_.VT _.RC:$src2),
12218                                        (TblVT.VT _.RC:$src3),
12219                                        (i32 timm:$src4))>,
12220                       EVEX_B, Sched<[sched]>;
12221   }
12224 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
12225                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
12226                                   X86VectorVTInfo _src3VT> {
12227   let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12228       ExeDomain = _.ExeDomain in {
12229     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12230                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12231                       OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12232                       (X86VFixupimms (_.VT _.RC:$src1),
12233                                      (_.VT _.RC:$src2),
12234                                      (_src3VT.VT _src3VT.RC:$src3),
12235                                      (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
12236     let Uses = [MXCSR] in
12237     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12238                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12239                       OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12240                       "$src2, $src3, {sae}, $src4",
12241                       (X86VFixupimmSAEs (_.VT _.RC:$src1),
12242                                         (_.VT _.RC:$src2),
12243                                         (_src3VT.VT _src3VT.RC:$src3),
12244                                         (i32 timm:$src4))>,
12245                       EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12246     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12247                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12248                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12249                      (X86VFixupimms (_.VT _.RC:$src1),
12250                                     (_.VT _.RC:$src2),
12251                                     (_src3VT.VT (scalar_to_vector
12252                                               (_src3VT.ScalarLdFrag addr:$src3))),
12253                                     (i32 timm:$src4))>,
12254                      Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
12255   }
12258 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12259                                       AVX512VLVectorVTInfo _Vec,
12260                                       AVX512VLVectorVTInfo _Tbl> {
12261   let Predicates = [HasAVX512] in
12262     defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
12263                                 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12264                                 EVEX_4V, EVEX_V512;
12265   let Predicates = [HasAVX512, HasVLX] in {
12266     defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
12267                             _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12268                             EVEX_4V, EVEX_V128;
12269     defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
12270                             _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12271                             EVEX_4V, EVEX_V256;
12272   }
12275 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12276                                            SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12277                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
12278 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12279                                            SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12280                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
12281 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12282                          avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12283 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12284                          avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
12286 // Patterns used to select SSE scalar fp arithmetic instructions from
12287 // either:
12289 // (1) a scalar fp operation followed by a blend
12291 // The effect is that the backend no longer emits unnecessary vector
12292 // insert instructions immediately after SSE scalar fp instructions
12293 // like addss or mulss.
12295 // For example, given the following code:
12296 //   __m128 foo(__m128 A, __m128 B) {
12297 //     A[0] += B[0];
12298 //     return A;
12299 //   }
12301 // Previously we generated:
12302 //   addss %xmm0, %xmm1
12303 //   movss %xmm1, %xmm0
12305 // We now generate:
12306 //   addss %xmm1, %xmm0
12308 // (2) a vector packed single/double fp operation followed by a vector insert
12310 // The effect is that the backend converts the packed fp instruction
12311 // followed by a vector insert into a single SSE scalar fp instruction.
12313 // For example, given the following code:
12314 //   __m128 foo(__m128 A, __m128 B) {
12315 //     __m128 C = A + B;
12316 //     return (__m128) {c[0], a[1], a[2], a[3]};
12317 //   }
12319 // Previously we generated:
12320 //   addps %xmm0, %xmm1
12321 //   movss %xmm1, %xmm0
12323 // We now generate:
12324 //   addss %xmm1, %xmm0
12326 // TODO: Some canonicalization in lowering would simplify the number of
12327 // patterns we have to try to match.
12328 multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12329                                           string OpcPrefix, SDNode MoveNode,
12330                                           X86VectorVTInfo _, PatLeaf ZeroFP> {
12331   let Predicates = [HasAVX512] in {
12332     // extracted scalar math op with insert via movss
12333     def : Pat<(MoveNode
12334                (_.VT VR128X:$dst),
12335                (_.VT (scalar_to_vector
12336                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12337                           _.FRC:$src)))),
12338               (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12339                (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12340     def : Pat<(MoveNode
12341                (_.VT VR128X:$dst),
12342                (_.VT (scalar_to_vector
12343                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12344                           (_.ScalarLdFrag addr:$src))))),
12345               (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12347     // extracted masked scalar math op with insert via movss
12348     def : Pat<(MoveNode (_.VT VR128X:$src1),
12349                (scalar_to_vector
12350                 (X86selects_mask VK1WM:$mask,
12351                             (MaskedOp (_.EltVT
12352                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12353                                       _.FRC:$src2),
12354                             _.FRC:$src0))),
12355               (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12356                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12357                VK1WM:$mask, _.VT:$src1,
12358                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12359     def : Pat<(MoveNode (_.VT VR128X:$src1),
12360                (scalar_to_vector
12361                 (X86selects_mask VK1WM:$mask,
12362                             (MaskedOp (_.EltVT
12363                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12364                                       (_.ScalarLdFrag addr:$src2)),
12365                             _.FRC:$src0))),
12366               (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12367                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12368                VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12370     // extracted masked scalar math op with insert via movss
12371     def : Pat<(MoveNode (_.VT VR128X:$src1),
12372                (scalar_to_vector
12373                 (X86selects_mask VK1WM:$mask,
12374                             (MaskedOp (_.EltVT
12375                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12376                                       _.FRC:$src2), (_.EltVT ZeroFP)))),
12377       (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12378           VK1WM:$mask, _.VT:$src1,
12379           (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12380     def : Pat<(MoveNode (_.VT VR128X:$src1),
12381                (scalar_to_vector
12382                 (X86selects_mask VK1WM:$mask,
12383                             (MaskedOp (_.EltVT
12384                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12385                                       (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12386       (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12387   }
12390 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12391 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12392 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12393 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12395 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12396 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12397 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12398 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12400 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12401 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12402 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12403 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12405 multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12406                                              SDNode Move, X86VectorVTInfo _> {
12407   let Predicates = [HasAVX512] in {
12408     def : Pat<(_.VT (Move _.VT:$dst,
12409                      (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12410               (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12411   }
12414 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12415 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12416 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12418 //===----------------------------------------------------------------------===//
12419 // AES instructions
12420 //===----------------------------------------------------------------------===//
12422 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12423   let Predicates = [HasVLX, HasVAES] in {
12424     defm Z128 : AESI_binop_rm_int<Op, OpStr,
12425                                   !cast<Intrinsic>(IntPrefix),
12426                                   loadv2i64, 0, VR128X, i128mem>,
12427                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
12428     defm Z256 : AESI_binop_rm_int<Op, OpStr,
12429                                   !cast<Intrinsic>(IntPrefix#"_256"),
12430                                   loadv4i64, 0, VR256X, i256mem>,
12431                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
12432     }
12433     let Predicates = [HasAVX512, HasVAES] in
12434     defm Z    : AESI_binop_rm_int<Op, OpStr,
12435                                   !cast<Intrinsic>(IntPrefix#"_512"),
12436                                   loadv8i64, 0, VR512, i512mem>,
12437                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
12440 defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12441 defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12442 defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12443 defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12445 //===----------------------------------------------------------------------===//
12446 // PCLMUL instructions - Carry less multiplication
12447 //===----------------------------------------------------------------------===//
12449 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12450 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12451                               EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
12453 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12454 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12455                               EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
12457 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12458                                 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
12459                                 EVEX_CD8<64, CD8VF>, WIG;
12462 // Aliases
12463 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12464 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12465 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12467 //===----------------------------------------------------------------------===//
12468 // VBMI2
12469 //===----------------------------------------------------------------------===//
12471 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12472                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12473   let Constraints = "$src1 = $dst",
12474       ExeDomain   = VTI.ExeDomain in {
12475     defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12476                 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12477                 "$src3, $src2", "$src2, $src3",
12478                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12479                 T8PD, EVEX_4V, Sched<[sched]>;
12480     defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12481                 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12482                 "$src3, $src2", "$src2, $src3",
12483                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12484                         (VTI.VT (VTI.LdFrag addr:$src3))))>,
12485                 T8PD, EVEX_4V,
12486                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12487   }
12490 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12491                                X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12492          : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12493   let Constraints = "$src1 = $dst",
12494       ExeDomain   = VTI.ExeDomain in
12495   defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12496               (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12497               "${src3}"#VTI.BroadcastStr#", $src2",
12498               "$src2, ${src3}"#VTI.BroadcastStr,
12499               (OpNode VTI.RC:$src1, VTI.RC:$src2,
12500                (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12501               T8PD, EVEX_4V, EVEX_B,
12502               Sched<[sched.Folded, sched.ReadAfterFold]>;
12505 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12506                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12507   let Predicates = [HasVBMI2] in
12508   defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12509                                    EVEX_V512;
12510   let Predicates = [HasVBMI2, HasVLX] in {
12511     defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12512                                    EVEX_V256;
12513     defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12514                                    EVEX_V128;
12515   }
12518 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12519                                       X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12520   let Predicates = [HasVBMI2] in
12521   defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12522                                     EVEX_V512;
12523   let Predicates = [HasVBMI2, HasVLX] in {
12524     defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12525                                     EVEX_V256;
12526     defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12527                                     EVEX_V128;
12528   }
12530 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12531                            SDNode OpNode, X86SchedWriteWidths sched> {
12532   defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12533              avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
12534   defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12535              avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12536   defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12537              avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
12540 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12541                            SDNode OpNode, X86SchedWriteWidths sched> {
12542   defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12543              avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12544              REX_W, EVEX_CD8<16, CD8VF>;
12545   defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12546              OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
12547   defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12548              sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
12551 // Concat & Shift
12552 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12553 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12554 defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12555 defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12557 // Compress
12558 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12559                                          avx512vl_i8_info, HasVBMI2>, EVEX;
12560 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12561                                           avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12562 // Expand
12563 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12564                                       avx512vl_i8_info, HasVBMI2>, EVEX;
12565 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12566                                       avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12568 //===----------------------------------------------------------------------===//
12569 // VNNI
12570 //===----------------------------------------------------------------------===//
12572 let Constraints = "$src1 = $dst" in
12573 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12574                     X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12575                     bit IsCommutable> {
12576   let ExeDomain = VTI.ExeDomain in {
12577   defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12578                                    (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12579                                    "$src3, $src2", "$src2, $src3",
12580                                    (VTI.VT (OpNode VTI.RC:$src1,
12581                                             VTI.RC:$src2, VTI.RC:$src3)),
12582                                    IsCommutable, IsCommutable>,
12583                                    EVEX_4V, T8PD, Sched<[sched]>;
12584   defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12585                                    (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12586                                    "$src3, $src2", "$src2, $src3",
12587                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12588                                             (VTI.VT (VTI.LdFrag addr:$src3))))>,
12589                                    EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12590                                    Sched<[sched.Folded, sched.ReadAfterFold,
12591                                           sched.ReadAfterFold]>;
12592   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12593                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12594                                    OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12595                                    "$src2, ${src3}"#VTI.BroadcastStr,
12596                                    (OpNode VTI.RC:$src1, VTI.RC:$src2,
12597                                     (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12598                                    EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12599                                    T8PD, Sched<[sched.Folded, sched.ReadAfterFold,
12600                                                 sched.ReadAfterFold]>;
12601   }
12604 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12605                        X86SchedWriteWidths sched, bit IsCommutable> {
12606   let Predicates = [HasVNNI] in
12607   defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12608                            IsCommutable>, EVEX_V512;
12609   let Predicates = [HasVNNI, HasVLX] in {
12610     defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12611                            IsCommutable>, EVEX_V256;
12612     defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12613                            IsCommutable>, EVEX_V128;
12614   }
12617 // FIXME: Is there a better scheduler class for VPDP?
12618 defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12619 defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12620 defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12621 defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12623 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
12624 let Predicates = [HasVNNI] in {
12625   def : Pat<(v16i32 (add VR512:$src1,
12626                          (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12627             (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12628   def : Pat<(v16i32 (add VR512:$src1,
12629                          (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12630             (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12632 let Predicates = [HasVNNI,HasVLX] in {
12633   def : Pat<(v8i32 (add VR256X:$src1,
12634                         (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12635             (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12636   def : Pat<(v8i32 (add VR256X:$src1,
12637                         (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12638             (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12639   def : Pat<(v4i32 (add VR128X:$src1,
12640                         (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12641             (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12642   def : Pat<(v4i32 (add VR128X:$src1,
12643                         (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12644             (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12647 //===----------------------------------------------------------------------===//
12648 // Bit Algorithms
12649 //===----------------------------------------------------------------------===//
12651 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12652 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12653                                    avx512vl_i8_info, HasBITALG>;
12654 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12655                                    avx512vl_i16_info, HasBITALG>, REX_W;
12657 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12658 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12660 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12661                                  (X86Vpshufbitqmb node:$src1, node:$src2), [{
12662   return N->hasOneUse();
12663 }]>;
12665 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12666   defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12667                                 (ins VTI.RC:$src1, VTI.RC:$src2),
12668                                 "vpshufbitqmb",
12669                                 "$src2, $src1", "$src1, $src2",
12670                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12671                                 (VTI.VT VTI.RC:$src2)),
12672                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12673                                 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12674                                 Sched<[sched]>;
12675   defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12676                                 (ins VTI.RC:$src1, VTI.MemOp:$src2),
12677                                 "vpshufbitqmb",
12678                                 "$src2, $src1", "$src1, $src2",
12679                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12680                                 (VTI.VT (VTI.LdFrag addr:$src2))),
12681                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12682                                 (VTI.VT (VTI.LdFrag addr:$src2)))>,
12683                                 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12684                                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12687 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12688   let Predicates = [HasBITALG] in
12689   defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12690   let Predicates = [HasBITALG, HasVLX] in {
12691     defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12692     defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12693   }
12696 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12697 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12699 //===----------------------------------------------------------------------===//
12700 // GFNI
12701 //===----------------------------------------------------------------------===//
12703 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12704                                    X86SchedWriteWidths sched> {
12705   let Predicates = [HasGFNI, HasAVX512] in
12706   defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12707                                 EVEX_V512;
12708   let Predicates = [HasGFNI, HasVLX] in {
12709     defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12710                                 EVEX_V256;
12711     defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12712                                 EVEX_V128;
12713   }
12716 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12717                                           SchedWriteVecALU>,
12718                                           EVEX_CD8<8, CD8VF>, T8PD;
12720 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12721                                       X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12722                                       X86VectorVTInfo BcstVTI>
12723            : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12724   let ExeDomain = VTI.ExeDomain in
12725   defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12726                 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12727                 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12728                 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12729                 (OpNode (VTI.VT VTI.RC:$src1),
12730                  (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12731                  (i8 timm:$src3))>, EVEX_B,
12732                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12735 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12736                                      X86SchedWriteWidths sched> {
12737   let Predicates = [HasGFNI, HasAVX512] in
12738   defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12739                                            v64i8_info, v8i64_info>, EVEX_V512;
12740   let Predicates = [HasGFNI, HasVLX] in {
12741     defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12742                                            v32i8x_info, v4i64x_info>, EVEX_V256;
12743     defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12744                                            v16i8x_info, v2i64x_info>, EVEX_V128;
12745   }
12748 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12749                          X86GF2P8affineinvqb, SchedWriteVecIMul>,
12750                          EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12751 defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12752                          X86GF2P8affineqb, SchedWriteVecIMul>,
12753                          EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12756 //===----------------------------------------------------------------------===//
12757 // AVX5124FMAPS
12758 //===----------------------------------------------------------------------===//
12760 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12761     Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12762 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12763                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12764                     "v4fmaddps", "$src3, $src2", "$src2, $src3",
12765                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12766                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12768 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12769                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12770                      "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12771                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12772                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12774 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12775                     (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12776                     "v4fmaddss", "$src3, $src2", "$src2, $src3",
12777                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12778                     Sched<[SchedWriteFMA.Scl.Folded]>;
12780 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12781                      (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12782                      "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12783                      []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12784                      Sched<[SchedWriteFMA.Scl.Folded]>;
12787 //===----------------------------------------------------------------------===//
12788 // AVX5124VNNIW
12789 //===----------------------------------------------------------------------===//
12791 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12792     Constraints = "$src1 = $dst" in {
12793 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12794                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12795                      "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12796                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12797                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12799 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12800                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12801                      "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12802                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12803                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12806 let hasSideEffects = 0 in {
12807   let mayStore = 1, SchedRW = [WriteFStoreX] in
12808   def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12809   let mayLoad = 1, SchedRW = [WriteFLoadX] in
12810   def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12813 //===----------------------------------------------------------------------===//
12814 // VP2INTERSECT
12815 //===----------------------------------------------------------------------===//
12817 multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12818   def rr : I<0x68, MRMSrcReg,
12819                   (outs _.KRPC:$dst),
12820                   (ins _.RC:$src1, _.RC:$src2),
12821                   !strconcat("vp2intersect", _.Suffix,
12822                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12823                   [(set _.KRPC:$dst, (X86vp2intersect
12824                             _.RC:$src1, (_.VT _.RC:$src2)))]>,
12825                   EVEX_4V, T8XD, Sched<[sched]>;
12827   def rm : I<0x68, MRMSrcMem,
12828                   (outs _.KRPC:$dst),
12829                   (ins  _.RC:$src1, _.MemOp:$src2),
12830                   !strconcat("vp2intersect", _.Suffix,
12831                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12832                   [(set _.KRPC:$dst, (X86vp2intersect
12833                             _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12834                   EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12835                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12837   def rmb : I<0x68, MRMSrcMem,
12838                   (outs _.KRPC:$dst),
12839                   (ins _.RC:$src1, _.ScalarMemOp:$src2),
12840                   !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12841                              ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12842                   [(set _.KRPC:$dst, (X86vp2intersect
12843                              _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12844                   EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12845                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12848 multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12849   let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12850     defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12852   let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12853     defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12854     defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12855   }
12858 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12859 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
12861 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12862                              X86SchedWriteWidths sched,
12863                              AVX512VLVectorVTInfo _SrcVTInfo,
12864                              AVX512VLVectorVTInfo _DstVTInfo,
12865                              SDNode OpNode, Predicate prd,
12866                              bit IsCommutable = 0> {
12867   let Predicates = [prd] in
12868     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12869                                    _SrcVTInfo.info512, _DstVTInfo.info512,
12870                                    _SrcVTInfo.info512, IsCommutable>,
12871                                    EVEX_V512, EVEX_CD8<32, CD8VF>;
12872   let Predicates = [HasVLX, prd] in {
12873     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12874                                       _SrcVTInfo.info256, _DstVTInfo.info256,
12875                                       _SrcVTInfo.info256, IsCommutable>,
12876                                      EVEX_V256, EVEX_CD8<32, CD8VF>;
12877     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12878                                       _SrcVTInfo.info128, _DstVTInfo.info128,
12879                                       _SrcVTInfo.info128, IsCommutable>,
12880                                       EVEX_V128, EVEX_CD8<32, CD8VF>;
12881   }
12884 let ExeDomain = SSEPackedSingle in
12885 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12886                                         SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12887                                         avx512vl_f32_info, avx512vl_bf16_info,
12888                                         X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12890 // Truncate Float to BFloat16
12891 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12892                              X86SchedWriteWidths sched> {
12893   let ExeDomain = SSEPackedSingle in {
12894   let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12895     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info,
12896                             X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12897   }
12898   let Predicates = [HasBF16, HasVLX] in {
12899     let Uses = []<Register>, mayRaiseFPException = 0 in {
12900     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info,
12901                                null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12902                                VK4WM>, EVEX_V128;
12903     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info,
12904                                X86cvtneps2bf16, X86cvtneps2bf16,
12905                                sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12906     }
12907   } // Predicates = [HasBF16, HasVLX]
12908   } // ExeDomain = SSEPackedSingle
12910   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12911                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12912                   VR128X:$src), 0>;
12913   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12914                   (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12915                   f128mem:$src), 0, "intel">;
12916   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12917                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12918                   VR256X:$src), 0>;
12919   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12920                   (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12921                   f256mem:$src), 0, "intel">;
12924 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12925                                        SchedWriteCvtPD2PS>, T8XS,
12926                                        EVEX_CD8<32, CD8VF>;
12928 let Predicates = [HasBF16, HasVLX] in {
12929   // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12930   // patterns have been disabled with null_frag.
12931   def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12932             (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12933   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0),
12934                               VK4WM:$mask),
12935             (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12936   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV,
12937                               VK4WM:$mask),
12938             (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12940   def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12941             (VCVTNEPS2BF16Z128rm addr:$src)>;
12942   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0),
12943                               VK4WM:$mask),
12944             (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12945   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV,
12946                               VK4WM:$mask),
12947             (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12949   def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32
12950                                      (X86VBroadcastld32 addr:$src)))),
12951             (VCVTNEPS2BF16Z128rmb addr:$src)>;
12952   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12953                               (v8bf16 VR128X:$src0), VK4WM:$mask),
12954             (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12955   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12956                               v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
12957             (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12959   def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
12960             (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12961   def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
12962             (VCVTNEPS2BF16Z128rm addr:$src)>;
12964   def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
12965             (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12966   def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
12967             (VCVTNEPS2BF16Z256rm addr:$src)>;
12969   def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
12970             (VPBROADCASTWZ128rm addr:$src)>;
12971   def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
12972             (VPBROADCASTWZ256rm addr:$src)>;
12974   def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12975             (VPBROADCASTWZ128rr VR128X:$src)>;
12976   def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12977             (VPBROADCASTWZ256rr VR128X:$src)>;
12979   def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))),
12980             (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12981   def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
12982             (VCVTNEPS2BF16Z256rm addr:$src)>;
12984   // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12987 let Predicates = [HasBF16] in {
12988   def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
12989             (VPBROADCASTWZrm addr:$src)>;
12991   def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12992             (VPBROADCASTWZrr VR128X:$src)>;
12994   def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))),
12995             (VCVTNEPS2BF16Zrr VR512:$src)>;
12996   def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))),
12997             (VCVTNEPS2BF16Zrm addr:$src)>;
12998   // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
13001 let Constraints = "$src1 = $dst" in {
13002 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
13003                               X86FoldableSchedWrite sched,
13004                               X86VectorVTInfo _, X86VectorVTInfo src_v> {
13005   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13006                            (ins src_v.RC:$src2, src_v.RC:$src3),
13007                            OpcodeStr, "$src3, $src2", "$src2, $src3",
13008                            (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
13009                            EVEX_4V, Sched<[sched]>;
13011   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13012                                (ins src_v.RC:$src2, src_v.MemOp:$src3),
13013                                OpcodeStr, "$src3, $src2", "$src2, $src3",
13014                                (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
13015                                (src_v.LdFrag addr:$src3)))>, EVEX_4V,
13016                                Sched<[sched.Folded, sched.ReadAfterFold]>;
13018   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13019                   (ins src_v.RC:$src2, f32mem:$src3),
13020                   OpcodeStr,
13021                   !strconcat("${src3}", _.BroadcastStr,", $src2"),
13022                   !strconcat("$src2, ${src3}", _.BroadcastStr),
13023                   (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
13024                   (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
13025                   EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
13028 } // Constraints = "$src1 = $dst"
13030 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
13031                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
13032                                  AVX512VLVectorVTInfo src_v, Predicate prd> {
13033   let Predicates = [prd] in {
13034     defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
13035                                    src_v.info512>, EVEX_V512;
13036   }
13037   let Predicates = [HasVLX, prd] in {
13038     defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
13039                                    src_v.info256>, EVEX_V256;
13040     defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
13041                                    src_v.info128>, EVEX_V128;
13042   }
13045 let ExeDomain = SSEPackedSingle in
13046 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
13047                                        avx512vl_f32_info, avx512vl_bf16_info,
13048                                        HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
13050 //===----------------------------------------------------------------------===//
13051 // AVX512FP16
13052 //===----------------------------------------------------------------------===//
13054 let Predicates = [HasFP16] in {
13055 // Move word ( r/m16) to Packed word
13056 def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
13057                       "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
13058 def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
13059                       "vmovw\t{$src, $dst|$dst, $src}",
13060                       [(set VR128X:$dst,
13061                         (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
13062                       T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
13064 def : Pat<(f16 (bitconvert GR16:$src)),
13065           (f16 (COPY_TO_REGCLASS
13066                 (VMOVW2SHrr
13067                  (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
13068                 FR16X))>;
13069 def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
13070           (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
13071 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
13072           (VMOVW2SHrr GR32:$src)>;
13073 // FIXME: We should really find a way to improve these patterns.
13074 def : Pat<(v8i32 (X86vzmovl
13075                   (insert_subvector undef,
13076                                     (v4i32 (scalar_to_vector
13077                                             (and GR32:$src, 0xffff))),
13078                                     (iPTR 0)))),
13079           (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
13080 def : Pat<(v16i32 (X86vzmovl
13081                    (insert_subvector undef,
13082                                      (v4i32 (scalar_to_vector
13083                                              (and GR32:$src, 0xffff))),
13084                                      (iPTR 0)))),
13085           (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
13087 def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
13088           (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
13090 // AVX 128-bit movw instruction write zeros in the high 128-bit part.
13091 def : Pat<(v8i16 (X86vzload16 addr:$src)),
13092           (VMOVWrm addr:$src)>;
13093 def : Pat<(v16i16 (X86vzload16 addr:$src)),
13094           (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
13096 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
13097 def : Pat<(v32i16 (X86vzload16 addr:$src)),
13098           (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
13100 def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
13101           (VMOVWrm addr:$src)>;
13102 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
13103           (VMOVWrm addr:$src)>;
13104 def : Pat<(v8i32 (X86vzmovl
13105                   (insert_subvector undef,
13106                                     (v4i32 (scalar_to_vector
13107                                             (i32 (zextloadi16 addr:$src)))),
13108                                     (iPTR 0)))),
13109           (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
13110 def : Pat<(v16i32 (X86vzmovl
13111                    (insert_subvector undef,
13112                                      (v4i32 (scalar_to_vector
13113                                              (i32 (zextloadi16 addr:$src)))),
13114                                      (iPTR 0)))),
13115           (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
13117 // Move word from xmm register to r/m16
13118 def VMOVSH2Wrr  : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
13119                        "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
13120 def VMOVWmr  : AVX512<0x7E, MRMDestMem, (outs),
13121                        (ins i16mem:$dst, VR128X:$src),
13122                        "vmovw\t{$src, $dst|$dst, $src}",
13123                        [(store (i16 (extractelt (v8i16 VR128X:$src),
13124                                      (iPTR 0))), addr:$dst)]>,
13125                        T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
13127 def : Pat<(i16 (bitconvert FR16X:$src)),
13128           (i16 (EXTRACT_SUBREG
13129                 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
13130                 sub_16bit))>;
13131 def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
13132           (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
13134 // Allow "vmovw" to use GR64
13135 let hasSideEffects = 0 in {
13136   def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
13137                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
13138   def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
13139                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
13143 // Convert 16-bit float to i16/u16
13144 multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13145                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13146                           AVX512VLVectorVTInfo _Dst,
13147                           AVX512VLVectorVTInfo _Src,
13148                           X86SchedWriteWidths sched> {
13149   let Predicates = [HasFP16] in {
13150     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13151                             OpNode, MaskOpNode, sched.ZMM>,
13152              avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
13153                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13154   }
13155   let Predicates = [HasFP16, HasVLX] in {
13156     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13157                                OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13158     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13159                                OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13160   }
13163 // Convert 16-bit float to i16/u16 truncate
13164 multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13165                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13166                            AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
13167                            X86SchedWriteWidths sched> {
13168   let Predicates = [HasFP16] in {
13169     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13170                             OpNode, MaskOpNode, sched.ZMM>,
13171              avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
13172                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13173   }
13174   let Predicates = [HasFP16, HasVLX] in {
13175     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13176                                OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13177     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13178                                OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13179   }
13182 defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
13183                                 X86cvtp2UIntRnd, avx512vl_i16_info,
13184                                 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13185                                 T_MAP5PS, EVEX_CD8<16, CD8VF>;
13186 defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
13187                                 X86VUintToFpRnd, avx512vl_f16_info,
13188                                 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13189                                 T_MAP5XD, EVEX_CD8<16, CD8VF>;
13190 defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
13191                                 X86cvttp2si, X86cvttp2siSAE,
13192                                 avx512vl_i16_info, avx512vl_f16_info,
13193                                 SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
13194 defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
13195                                 X86cvttp2ui, X86cvttp2uiSAE,
13196                                 avx512vl_i16_info, avx512vl_f16_info,
13197                                 SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
13198 defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
13199                                 X86cvtp2IntRnd, avx512vl_i16_info,
13200                                 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13201                                 T_MAP5PD, EVEX_CD8<16, CD8VF>;
13202 defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
13203                                 X86VSintToFpRnd, avx512vl_f16_info,
13204                                 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13205                                 T_MAP5XS, EVEX_CD8<16, CD8VF>;
13207 // Convert Half to Signed/Unsigned Doubleword
13208 multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13209                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13210                            X86SchedWriteWidths sched> {
13211   let Predicates = [HasFP16] in {
13212     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13213                             MaskOpNode, sched.ZMM>,
13214              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
13215                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
13216   }
13217   let Predicates = [HasFP16, HasVLX] in {
13218     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13219                                MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13220     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13221                                MaskOpNode, sched.YMM>, EVEX_V256;
13222   }
13225 // Convert Half to Signed/Unsigned Doubleword with truncation
13226 multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13227                             SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13228                             X86SchedWriteWidths sched> {
13229   let Predicates = [HasFP16] in {
13230     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13231                             MaskOpNode, sched.ZMM>,
13232              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
13233                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
13234   }
13235   let Predicates = [HasFP16, HasVLX] in {
13236     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13237                                MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13238     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13239                                MaskOpNode, sched.YMM>, EVEX_V256;
13240   }
13244 defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
13245                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13246                                  EVEX_CD8<16, CD8VH>;
13247 defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
13248                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
13249                                  EVEX_CD8<16, CD8VH>;
13251 defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
13252                                 X86cvttp2si, X86cvttp2siSAE,
13253                                 SchedWriteCvtPS2DQ>, T_MAP5XS,
13254                                 EVEX_CD8<16, CD8VH>;
13256 defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
13257                                  X86cvttp2ui, X86cvttp2uiSAE,
13258                                  SchedWriteCvtPS2DQ>, T_MAP5PS,
13259                                  EVEX_CD8<16, CD8VH>;
13261 // Convert Half to Signed/Unsigned Quardword
13262 multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13263                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13264                            X86SchedWriteWidths sched> {
13265   let Predicates = [HasFP16] in {
13266     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13267                             MaskOpNode, sched.ZMM>,
13268              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
13269                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13270   }
13271   let Predicates = [HasFP16, HasVLX] in {
13272     // Explicitly specified broadcast string, since we take only 2 elements
13273     // from v8f16x_info source
13274     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13275                                MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
13276                                EVEX_V128;
13277     // Explicitly specified broadcast string, since we take only 4 elements
13278     // from v8f16x_info source
13279     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13280                                MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
13281                                EVEX_V256;
13282   }
13285 // Convert Half to Signed/Unsigned Quardword with truncation
13286 multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13287                             SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13288                             X86SchedWriteWidths sched> {
13289   let Predicates = [HasFP16] in {
13290     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13291                             MaskOpNode, sched.ZMM>,
13292              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13293                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
13294   }
13295   let Predicates = [HasFP16, HasVLX] in {
13296     // Explicitly specified broadcast string, since we take only 2 elements
13297     // from v8f16x_info source
13298     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13299                                MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13300     // Explicitly specified broadcast string, since we take only 4 elements
13301     // from v8f16x_info source
13302     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13303                                MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13304   }
13307 defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13308                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13309                                  EVEX_CD8<16, CD8VQ>;
13311 defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13312                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13313                                  EVEX_CD8<16, CD8VQ>;
13315 defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13316                                  X86cvttp2si, X86cvttp2siSAE,
13317                                  SchedWriteCvtPS2DQ>, T_MAP5PD,
13318                                  EVEX_CD8<16, CD8VQ>;
13320 defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13321                                  X86cvttp2ui, X86cvttp2uiSAE,
13322                                  SchedWriteCvtPS2DQ>, T_MAP5PD,
13323                                  EVEX_CD8<16, CD8VQ>;
13325 // Convert Signed/Unsigned Quardword to Half
13326 multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13327                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13328                            X86SchedWriteWidths sched> {
13329   // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13330   // 512 memory forms of these instructions in Asm Parcer. They have the same
13331   // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13332   // due to the same reason.
13333   let Predicates = [HasFP16] in {
13334     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13335                             MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13336              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13337                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13338   }
13339   let Predicates = [HasFP16, HasVLX] in {
13340     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13341                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13342                                i128mem, VK2WM>,
13343                                EVEX_V128, NotEVEX2VEXConvertible;
13344     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13345                                null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13346                                i256mem, VK4WM>,
13347                                EVEX_V256, NotEVEX2VEXConvertible;
13348   }
13350   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13351                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13352                   VR128X:$src), 0, "att">;
13353   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13354                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13355                   VK2WM:$mask, VR128X:$src), 0, "att">;
13356   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13357                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13358                   VK2WM:$mask, VR128X:$src), 0, "att">;
13359   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13360                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13361                   i64mem:$src), 0, "att">;
13362   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13363                   "$dst {${mask}}, ${src}{1to2}}",
13364                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13365                   VK2WM:$mask, i64mem:$src), 0, "att">;
13366   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13367                   "$dst {${mask}} {z}, ${src}{1to2}}",
13368                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13369                   VK2WM:$mask, i64mem:$src), 0, "att">;
13371   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13372                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13373                   VR256X:$src), 0, "att">;
13374   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13375                   "$dst {${mask}}, $src}",
13376                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13377                   VK4WM:$mask, VR256X:$src), 0, "att">;
13378   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13379                   "$dst {${mask}} {z}, $src}",
13380                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13381                   VK4WM:$mask, VR256X:$src), 0, "att">;
13382   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13383                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13384                   i64mem:$src), 0, "att">;
13385   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13386                   "$dst {${mask}}, ${src}{1to4}}",
13387                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13388                   VK4WM:$mask, i64mem:$src), 0, "att">;
13389   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13390                   "$dst {${mask}} {z}, ${src}{1to4}}",
13391                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13392                   VK4WM:$mask, i64mem:$src), 0, "att">;
13394   def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13395                   (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13396                   VR512:$src), 0, "att">;
13397   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13398                   "$dst {${mask}}, $src}",
13399                   (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13400                   VK8WM:$mask, VR512:$src), 0, "att">;
13401   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13402                   "$dst {${mask}} {z}, $src}",
13403                   (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13404                   VK8WM:$mask, VR512:$src), 0, "att">;
13405   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13406                   (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13407                   i64mem:$src), 0, "att">;
13408   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13409                   "$dst {${mask}}, ${src}{1to8}}",
13410                   (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13411                   VK8WM:$mask, i64mem:$src), 0, "att">;
13412   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13413                   "$dst {${mask}} {z}, ${src}{1to8}}",
13414                   (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13415                   VK8WM:$mask, i64mem:$src), 0, "att">;
13418 defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13419                             X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5PS,
13420                             EVEX_CD8<64, CD8VF>;
13422 defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13423                             X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5XD,
13424                             EVEX_CD8<64, CD8VF>;
13426 // Convert half to signed/unsigned int 32/64
13427 defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13428                                    X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13429                                    T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13430 defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13431                                    X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13432                                    T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
13433 defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13434                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13435                                    T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13436 defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13437                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13438                                    T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
13440 defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13441                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13442                         "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13443 defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13444                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13445                         "{q}", HasFP16>, REX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13446 defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13447                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13448                         "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13449 defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13450                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13451                         "{q}", HasFP16>, T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
13453 let Predicates = [HasFP16] in {
13454   defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13455                                    v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13456                                    T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13457   defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13458                                    v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13459                                    T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>;
13460   defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13461                                     v8f16x_info, i32mem, loadi32,
13462                                     "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13463   defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13464                                     v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13465                                     T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>;
13466   def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13467               (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13469   def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13470               (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13473   def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13474             (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13475   def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13476             (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13478   def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13479             (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13480   def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13481             (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13483   def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13484             (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13485   def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13486             (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13488   def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13489             (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13490   def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13491             (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13493   // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13494   // which produce unnecessary vmovsh instructions
13495   def : Pat<(v8f16 (X86Movsh
13496                      (v8f16 VR128X:$dst),
13497                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13498             (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13500   def : Pat<(v8f16 (X86Movsh
13501                      (v8f16 VR128X:$dst),
13502                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13503             (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13505   def : Pat<(v8f16 (X86Movsh
13506                      (v8f16 VR128X:$dst),
13507                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13508             (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13510   def : Pat<(v8f16 (X86Movsh
13511                      (v8f16 VR128X:$dst),
13512                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13513             (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13515   def : Pat<(v8f16 (X86Movsh
13516                      (v8f16 VR128X:$dst),
13517                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13518             (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13520   def : Pat<(v8f16 (X86Movsh
13521                      (v8f16 VR128X:$dst),
13522                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13523             (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13525   def : Pat<(v8f16 (X86Movsh
13526                      (v8f16 VR128X:$dst),
13527                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13528             (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13530   def : Pat<(v8f16 (X86Movsh
13531                      (v8f16 VR128X:$dst),
13532                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13533             (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13534 } // Predicates = [HasFP16]
13536 let Predicates = [HasFP16, HasVLX] in {
13537   // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13538   // patterns have been disabled with null_frag.
13539   def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13540             (VCVTQQ2PHZ256rr VR256X:$src)>;
13541   def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13542                            VK4WM:$mask),
13543             (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13544   def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13545                            VK4WM:$mask),
13546             (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13548   def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13549             (VCVTQQ2PHZ256rm addr:$src)>;
13550   def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13551                            VK4WM:$mask),
13552             (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13553   def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13554                            VK4WM:$mask),
13555             (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13557   def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13558             (VCVTQQ2PHZ256rmb addr:$src)>;
13559   def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13560                            (v8f16 VR128X:$src0), VK4WM:$mask),
13561             (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13562   def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13563                            v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13564             (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13566   def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13567             (VCVTQQ2PHZ128rr VR128X:$src)>;
13568   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13569                            VK2WM:$mask),
13570             (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13571   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13572                            VK2WM:$mask),
13573             (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13575   def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13576             (VCVTQQ2PHZ128rm addr:$src)>;
13577   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13578                            VK2WM:$mask),
13579             (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13580   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13581                            VK2WM:$mask),
13582             (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13584   def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13585             (VCVTQQ2PHZ128rmb addr:$src)>;
13586   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13587                            (v8f16 VR128X:$src0), VK2WM:$mask),
13588             (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13589   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13590                            v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13591             (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13593   // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13594   // patterns have been disabled with null_frag.
13595   def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13596             (VCVTUQQ2PHZ256rr VR256X:$src)>;
13597   def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13598                            VK4WM:$mask),
13599             (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13600   def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13601                            VK4WM:$mask),
13602             (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13604   def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13605             (VCVTUQQ2PHZ256rm addr:$src)>;
13606   def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13607                            VK4WM:$mask),
13608             (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13609   def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13610                            VK4WM:$mask),
13611             (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13613   def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13614             (VCVTUQQ2PHZ256rmb addr:$src)>;
13615   def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13616                            (v8f16 VR128X:$src0), VK4WM:$mask),
13617             (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13618   def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13619                            v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13620             (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13622   def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13623             (VCVTUQQ2PHZ128rr VR128X:$src)>;
13624   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13625                            VK2WM:$mask),
13626             (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13627   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13628                            VK2WM:$mask),
13629             (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13631   def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13632             (VCVTUQQ2PHZ128rm addr:$src)>;
13633   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13634                            VK2WM:$mask),
13635             (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13636   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13637                            VK2WM:$mask),
13638             (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13640   def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13641             (VCVTUQQ2PHZ128rmb addr:$src)>;
13642   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13643                            (v8f16 VR128X:$src0), VK2WM:$mask),
13644             (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13645   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13646                            v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13647             (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13650 let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13651   multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13652     defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13653             (ins _.RC:$src2, _.RC:$src3),
13654             OpcodeStr, "$src3, $src2", "$src2, $src3",
13655             (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V;
13657     defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13658             (ins _.RC:$src2, _.MemOp:$src3),
13659             OpcodeStr, "$src3, $src2", "$src2, $src3",
13660             (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V;
13662     defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13663             (ins _.RC:$src2, _.ScalarMemOp:$src3),
13664             OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13665             (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V;
13666   }
13667 } // Constraints = "@earlyclobber $dst, $src1 = $dst"
13669 multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13670                                  X86VectorVTInfo _> {
13671   let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13672   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13673           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13674           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13675           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13676           EVEX_4V, EVEX_B, EVEX_RC;
13680 multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13681   let Predicates = [HasFP16] in {
13682     defm Z    : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13683                 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13684                       EVEX_V512, Sched<[WriteFMAZ]>;
13685   }
13686   let Predicates = [HasVLX, HasFP16] in {
13687     defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13688     defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13689   }
13692 multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13693                                  SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13694   let Predicates = [HasFP16] in {
13695     defm Z    : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13696                                  WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13697                 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13698                                        "", "@earlyclobber $dst">, EVEX_V512;
13699   }
13700   let Predicates = [HasVLX, HasFP16] in {
13701     defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13702                                  WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13703     defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13704                                  WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13705   }
13709 let Uses = [MXCSR] in {
13710   defm VFMADDCPH  : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13711                                     T_MAP6XS, EVEX_CD8<32, CD8VF>;
13712   defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13713                                     T_MAP6XD, EVEX_CD8<32, CD8VF>;
13715   defm VFMULCPH  : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13716                                          x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>;
13717   defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13718                                          x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>;
13722 multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13723                                    bit IsCommutable> {
13724   let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13725     defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13726                         (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13727                         "$src3, $src2", "$src2, $src3",
13728                         (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13729                         Sched<[WriteFMAX]>;
13730     defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13731                         (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13732                         "$src3, $src2", "$src2, $src3",
13733                         (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13734                         Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13735     defm rb : AVX512_maskable_3src<opc,  MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13736                         (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13737                         "$rc, $src3, $src2", "$src2, $src3, $rc",
13738                         (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13739                         EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13740   }
13743 multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13744                                      SDNode OpNodeRnd, bit IsCommutable> {
13745   let Predicates = [HasFP16] in {
13746     defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13747                         (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13748                         "$src2, $src1", "$src1, $src2",
13749                         (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13750                         IsCommutable, IsCommutable, IsCommutable,
13751                         X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13752     defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13753                         (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13754                         "$src2, $src1", "$src1, $src2",
13755                         (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13756                         0, 0, 0, X86selects, "@earlyclobber $dst">,
13757                         Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13758     defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13759                         (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13760                         "$rc, $src2, $src1", "$src1, $src2, $rc",
13761                         (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13762                         0, 0, 0, X86selects, "@earlyclobber $dst">,
13763                         EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13764   }
13767 let Uses = [MXCSR] in {
13768   defm VFMADDCSHZ  : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13769                                     T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13770   defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13771                                     T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13773   defm VFMULCSHZ  : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13774                                     T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
13775   defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13776                                     T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;