[X86][APX] Do not emit {evex} prefix for memory variant (#109759)
[llvm-project.git] / llvm / lib / Target / X86 / X86InstrAVX512.td
blobcc1f9090c11acc486517d04af2e5821ce7121b47
1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // This multiclass generates the masking variants from the non-masking
16 // variant.  It only provides the assembly pieces for the masking variants.
17 // It assumes custom ISel patterns for masking which can be provided as
18 // template arguments.
19 multiclass AVX512_maskable_custom<bits<8> O, Format F,
20                                   dag Outs,
21                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
22                                   string OpcodeStr,
23                                   string AttSrcAsm, string IntelSrcAsm,
24                                   list<dag> Pattern,
25                                   list<dag> MaskingPattern,
26                                   list<dag> ZeroMaskingPattern,
27                                   string MaskingConstraint = "",
28                                   bit IsCommutable = 0,
29                                   bit IsKCommutable = 0,
30                                   bit IsKZCommutable = IsCommutable,
31                                   string ClobberConstraint = ""> {
32   let isCommutable = IsCommutable, Constraints = ClobberConstraint in
33     def NAME: AVX512<O, F, Outs, Ins,
34                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
35                                      "$dst, "#IntelSrcAsm#"}",
36                        Pattern>;
38   // Prefer over VMOV*rrk Pat<>
39   let isCommutable = IsKCommutable in
40     def NAME#k: AVX512<O, F, Outs, MaskingIns,
41                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
42                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
43                        MaskingPattern>,
44               EVEX_K {
45       // In case of the 3src subclass this is overridden with a let.
46       string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
47                                !if(!eq(MaskingConstraint, ""), ClobberConstraint,
48                                    !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
49     }
51   // Zero mask does not add any restrictions to commute operands transformation.
52   // So, it is Ok to use IsCommutable instead of IsKCommutable.
53   let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
54       Constraints = ClobberConstraint in
55     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
56                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
57                                      "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
58                        ZeroMaskingPattern>,
59               EVEX_KZ;
63 // Common base class of AVX512_maskable and AVX512_maskable_3src.
64 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
65                                   dag Outs,
66                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
67                                   string OpcodeStr,
68                                   string AttSrcAsm, string IntelSrcAsm,
69                                   dag RHS, dag MaskingRHS,
70                                   SDPatternOperator Select = vselect_mask,
71                                   string MaskingConstraint = "",
72                                   bit IsCommutable = 0,
73                                   bit IsKCommutable = 0,
74                                   bit IsKZCommutable = IsCommutable,
75                                   string ClobberConstraint = ""> :
76   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
77                          AttSrcAsm, IntelSrcAsm,
78                          [(set _.RC:$dst, RHS)],
79                          [(set _.RC:$dst, MaskingRHS)],
80                          [(set _.RC:$dst,
81                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
82                          MaskingConstraint, IsCommutable,
83                          IsKCommutable, IsKZCommutable, ClobberConstraint>;
85 // This multiclass generates the unconditional/non-masking, the masking and
86 // the zero-masking variant of the vector instruction.  In the masking case, the
87 // preserved vector elements come from a new dummy input operand tied to $dst.
88 // This version uses a separate dag for non-masking and masking.
89 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
90                            dag Outs, dag Ins, string OpcodeStr,
91                            string AttSrcAsm, string IntelSrcAsm,
92                            dag RHS, dag MaskRHS,
93                            string ClobberConstraint = "",
94                            bit IsCommutable = 0, bit IsKCommutable = 0,
95                            bit IsKZCommutable = IsCommutable> :
96    AVX512_maskable_custom<O, F, Outs, Ins,
97                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
98                           !con((ins _.KRCWM:$mask), Ins),
99                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
100                           [(set _.RC:$dst, RHS)],
101                           [(set _.RC:$dst,
102                               (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
103                           [(set _.RC:$dst,
104                               (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
105                           "$src0 = $dst", IsCommutable, IsKCommutable,
106                           IsKZCommutable, ClobberConstraint>;
108 // This multiclass generates the unconditional/non-masking, the masking and
109 // the zero-masking variant of the vector instruction.  In the masking case, the
110 // preserved vector elements come from a new dummy input operand tied to $dst.
111 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
112                            dag Outs, dag Ins, string OpcodeStr,
113                            string AttSrcAsm, string IntelSrcAsm,
114                            dag RHS,
115                            bit IsCommutable = 0, bit IsKCommutable = 0,
116                            bit IsKZCommutable = IsCommutable,
117                            SDPatternOperator Select = vselect_mask,
118                            string ClobberConstraint = ""> :
119    AVX512_maskable_common<O, F, _, Outs, Ins,
120                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
121                           !con((ins _.KRCWM:$mask), Ins),
122                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
123                           (Select _.KRCWM:$mask, RHS, _.RC:$src0),
124                           Select, "$src0 = $dst", IsCommutable, IsKCommutable,
125                           IsKZCommutable, ClobberConstraint>;
127 // This multiclass generates the unconditional/non-masking, the masking and
128 // the zero-masking variant of the scalar instruction.
129 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
130                            dag Outs, dag Ins, string OpcodeStr,
131                            string AttSrcAsm, string IntelSrcAsm,
132                            dag RHS> :
133    AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
134                    RHS, 0, 0, 0, X86selects_mask>;
136 // Similar to AVX512_maskable but in this case one of the source operands
137 // ($src1) is already tied to $dst so we just use that for the preserved
138 // vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
139 // $src1.
140 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
141                                 dag Outs, dag NonTiedIns, string OpcodeStr,
142                                 string AttSrcAsm, string IntelSrcAsm,
143                                 dag RHS,
144                                 bit IsCommutable = 0,
145                                 bit IsKCommutable = 0,
146                                 SDPatternOperator Select = vselect_mask,
147                                 bit MaskOnly = 0> :
148    AVX512_maskable_common<O, F, _, Outs,
149                           !con((ins _.RC:$src1), NonTiedIns),
150                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
151                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
152                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
153                           !if(MaskOnly, (null_frag), RHS),
154                           (Select _.KRCWM:$mask, RHS, _.RC:$src1),
155                           Select, "", IsCommutable, IsKCommutable>;
157 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
158 // operand differs from the output VT. This requires a bitconvert on
159 // the preserved vector going into the vselect.
160 // NOTE: The unmasked pattern is disabled.
161 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
162                                      X86VectorVTInfo InVT,
163                                      dag Outs, dag NonTiedIns, string OpcodeStr,
164                                      string AttSrcAsm, string IntelSrcAsm,
165                                      dag RHS, bit IsCommutable = 0> :
166    AVX512_maskable_common<O, F, OutVT, Outs,
167                           !con((ins InVT.RC:$src1), NonTiedIns),
168                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
169                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
170                           OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
171                           (vselect_mask InVT.KRCWM:$mask, RHS,
172                            (bitconvert InVT.RC:$src1)),
173                            vselect_mask, "", IsCommutable>;
175 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
176                                      dag Outs, dag NonTiedIns, string OpcodeStr,
177                                      string AttSrcAsm, string IntelSrcAsm,
178                                      dag RHS,
179                                      bit IsCommutable = 0,
180                                      bit IsKCommutable = 0,
181                                      bit MaskOnly = 0> :
182    AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
183                         IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
184                         X86selects_mask, MaskOnly>;
186 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
187                                   dag Outs, dag Ins,
188                                   string OpcodeStr,
189                                   string AttSrcAsm, string IntelSrcAsm,
190                                   list<dag> Pattern> :
191    AVX512_maskable_custom<O, F, Outs, Ins,
192                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
193                           !con((ins _.KRCWM:$mask), Ins),
194                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
195                           "$src0 = $dst">;
197 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
198                                        dag Outs, dag NonTiedIns,
199                                        string OpcodeStr,
200                                        string AttSrcAsm, string IntelSrcAsm,
201                                        list<dag> Pattern> :
202    AVX512_maskable_custom<O, F, Outs,
203                           !con((ins _.RC:$src1), NonTiedIns),
204                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
205                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
206                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
207                           "">;
209 // Instruction with mask that puts result in mask register,
210 // like "compare" and "vptest"
211 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
212                                   dag Outs,
213                                   dag Ins, dag MaskingIns,
214                                   string OpcodeStr,
215                                   string AttSrcAsm, string IntelSrcAsm,
216                                   list<dag> Pattern,
217                                   list<dag> MaskingPattern,
218                                   bit IsCommutable = 0> {
219     let isCommutable = IsCommutable in {
220     def NAME: AVX512<O, F, Outs, Ins,
221                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
222                                      "$dst, "#IntelSrcAsm#"}",
223                        Pattern>;
225     def NAME#k: AVX512<O, F, Outs, MaskingIns,
226                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
227                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
228                        MaskingPattern>, EVEX_K;
229     }
232 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
233                                   dag Outs,
234                                   dag Ins, dag MaskingIns,
235                                   string OpcodeStr,
236                                   string AttSrcAsm, string IntelSrcAsm,
237                                   dag RHS, dag MaskingRHS,
238                                   bit IsCommutable = 0> :
239   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
240                          AttSrcAsm, IntelSrcAsm,
241                          [(set _.KRC:$dst, RHS)],
242                          [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
244 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
245                            dag Outs, dag Ins, string OpcodeStr,
246                            string AttSrcAsm, string IntelSrcAsm,
247                            dag RHS, dag RHS_su, bit IsCommutable = 0> :
248    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
249                           !con((ins _.KRCWM:$mask), Ins),
250                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
251                           (and _.KRCWM:$mask, RHS_su), IsCommutable>;
253 // Used by conversion instructions.
254 multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
255                                   dag Outs,
256                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
257                                   string OpcodeStr,
258                                   string AttSrcAsm, string IntelSrcAsm,
259                                   dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
260   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261                          AttSrcAsm, IntelSrcAsm,
262                          [(set _.RC:$dst, RHS)],
263                          [(set _.RC:$dst, MaskingRHS)],
264                          [(set _.RC:$dst, ZeroMaskingRHS)],
265                          "$src0 = $dst">;
267 multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
268                                dag Outs, dag NonTiedIns, string OpcodeStr,
269                                string AttSrcAsm, string IntelSrcAsm,
270                                dag RHS, dag MaskingRHS, bit IsCommutable,
271                                bit IsKCommutable> :
272    AVX512_maskable_custom<O, F, Outs,
273                           !con((ins _.RC:$src1), NonTiedIns),
274                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
275                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
276                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
277                           [(set _.RC:$dst, RHS)],
278                           [(set _.RC:$dst,
279                             (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
280                           [(set _.RC:$dst,
281                             (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
282                           "", IsCommutable, IsKCommutable>;
284 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
285 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
286 // swizzled by ExecutionDomainFix to pxor.
287 // We set canFoldAsLoad because this can be converted to a constant-pool
288 // load of an all-zeros value if folding it would be beneficial.
289 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
290     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
291 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
292                [(set VR512:$dst, (v16i32 immAllZerosV))]>;
293 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
294                [(set VR512:$dst, (v16i32 immAllOnesV))]>;
297 let Predicates = [HasAVX512] in {
298 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
299 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
300 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
301 def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
302 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
303 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
306 // Alias instructions that allow VPTERNLOG to be used with a mask to create
307 // a mix of all ones and all zeros elements. This is done this way to force
308 // the same register to be used as input for all three sources.
309 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
310 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
311                                 (ins VK16WM:$mask), "",
312                            [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
313                                                       (v16i32 immAllOnesV),
314                                                       (v16i32 immAllZerosV)))]>;
315 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
316                                 (ins VK8WM:$mask), "",
317                 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
318                                            (v8i64 immAllOnesV),
319                                            (v8i64 immAllZerosV)))]>;
322 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
323     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
324 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
325                [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
326 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
327                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
330 let Predicates = [HasAVX512] in {
331 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
332 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
333 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
334 def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
335 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
336 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
337 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
338 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
339 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
340 def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
341 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
342 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
345 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
346 // This is expanded by ExpandPostRAPseudos.
347 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
348     isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
349   def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
350                           [(set FR16X:$dst, fp16imm0)]>;
351   def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
352                           [(set FR32X:$dst, fp32imm0)]>;
353   def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
354                           [(set FR64X:$dst, fp64imm0)]>;
355   def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
356                             [(set VR128X:$dst, fp128imm0)]>;
359 //===----------------------------------------------------------------------===//
360 // AVX-512 - VECTOR INSERT
363 // Supports two different pattern operators for mask and unmasked ops. Allows
364 // null_frag to be passed for one.
365 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
366                                   X86VectorVTInfo To,
367                                   SDPatternOperator vinsert_insert,
368                                   SDPatternOperator vinsert_for_mask,
369                                   X86FoldableSchedWrite sched> {
370   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
371     defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
372                    (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
373                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
374                    "$src3, $src2, $src1", "$src1, $src2, $src3",
375                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
376                                          (From.VT From.RC:$src2),
377                                          (iPTR imm)),
378                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
379                                            (From.VT From.RC:$src2),
380                                            (iPTR imm))>,
381                    AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
382     let mayLoad = 1 in
383     defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
384                    (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
385                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
386                    "$src3, $src2, $src1", "$src1, $src2, $src3",
387                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
388                                (From.VT (From.LdFrag addr:$src2)),
389                                (iPTR imm)),
390                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
391                                (From.VT (From.LdFrag addr:$src2)),
392                                (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV,
393                    EVEX_CD8<From.EltSize, From.CD8TupleForm>,
394                    Sched<[sched.Folded, sched.ReadAfterFold]>;
395   }
398 // Passes the same pattern operator for masked and unmasked ops.
399 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
400                             X86VectorVTInfo To,
401                             SDPatternOperator vinsert_insert,
402                             X86FoldableSchedWrite sched> :
403   vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
405 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
406                        X86VectorVTInfo To, PatFrag vinsert_insert,
407                        SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
408   let Predicates = p in {
409     def : Pat<(vinsert_insert:$ins
410                      (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
411               (To.VT (!cast<Instruction>(InstrStr#"rr")
412                      To.RC:$src1, From.RC:$src2,
413                      (INSERT_get_vinsert_imm To.RC:$ins)))>;
415     def : Pat<(vinsert_insert:$ins
416                   (To.VT To.RC:$src1),
417                   (From.VT (From.LdFrag addr:$src2)),
418                   (iPTR imm)),
419               (To.VT (!cast<Instruction>(InstrStr#"rm")
420                   To.RC:$src1, addr:$src2,
421                   (INSERT_get_vinsert_imm To.RC:$ins)))>;
422   }
425 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
426                             ValueType EltVT64, int Opcode256,
427                             X86FoldableSchedWrite sched> {
429   let Predicates = [HasVLX] in
430     defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
431                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
432                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
433                                  vinsert128_insert, sched>, EVEX_V256;
435   defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
436                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
437                                  X86VectorVTInfo<16, EltVT32, VR512>,
438                                  vinsert128_insert, sched>, EVEX_V512;
440   defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
441                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
442                                  X86VectorVTInfo< 8, EltVT64, VR512>,
443                                  vinsert256_insert, sched>, REX_W, EVEX_V512;
445   // Even with DQI we'd like to only use these instructions for masking.
446   let Predicates = [HasVLX, HasDQI] in
447     defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
448                                    X86VectorVTInfo< 2, EltVT64, VR128X>,
449                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
450                                    null_frag, vinsert128_insert, sched>,
451                                    EVEX_V256, REX_W;
453   // Even with DQI we'd like to only use these instructions for masking.
454   let Predicates = [HasDQI] in {
455     defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
456                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
457                                  X86VectorVTInfo< 8, EltVT64, VR512>,
458                                  null_frag, vinsert128_insert, sched>,
459                                  REX_W, EVEX_V512;
461     defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
462                                    X86VectorVTInfo< 8, EltVT32, VR256X>,
463                                    X86VectorVTInfo<16, EltVT32, VR512>,
464                                    null_frag, vinsert256_insert, sched>,
465                                    EVEX_V512;
466   }
469 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
470 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
471 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
473 // Codegen pattern with the alternative types,
474 // Even with AVX512DQ we'll still use these for unmasked operations.
475 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
476               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
477 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
478               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
480 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
481               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
482 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
483               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
485 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
486               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
487 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
488               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
490 // Codegen pattern with the alternative types insert VEC128 into VEC256
491 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
492               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
493 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
494               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
495 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
496               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
497 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8bf16x_info, v16bf16x_info,
498               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
499 // Codegen pattern with the alternative types insert VEC128 into VEC512
500 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
501               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
502 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
503                vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
504 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
505               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
506 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8bf16x_info, v32bf16_info,
507               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
508 // Codegen pattern with the alternative types insert VEC256 into VEC512
509 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
510               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
511 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
512               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
513 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
514               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
515 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16bf16x_info, v32bf16_info,
516               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
519 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
520                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
521                                  PatFrag vinsert_insert,
522                                  SDNodeXForm INSERT_get_vinsert_imm,
523                                  list<Predicate> p> {
524 let Predicates = p in {
525   def : Pat<(Cast.VT
526              (vselect_mask Cast.KRCWM:$mask,
527                            (bitconvert
528                             (vinsert_insert:$ins (To.VT To.RC:$src1),
529                                                  (From.VT From.RC:$src2),
530                                                  (iPTR imm))),
531                            Cast.RC:$src0)),
532             (!cast<Instruction>(InstrStr#"rrk")
533              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
534              (INSERT_get_vinsert_imm To.RC:$ins))>;
535   def : Pat<(Cast.VT
536              (vselect_mask Cast.KRCWM:$mask,
537                            (bitconvert
538                             (vinsert_insert:$ins (To.VT To.RC:$src1),
539                                                  (From.VT
540                                                   (bitconvert
541                                                    (From.LdFrag addr:$src2))),
542                                                  (iPTR imm))),
543                            Cast.RC:$src0)),
544             (!cast<Instruction>(InstrStr#"rmk")
545              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
546              (INSERT_get_vinsert_imm To.RC:$ins))>;
548   def : Pat<(Cast.VT
549              (vselect_mask Cast.KRCWM:$mask,
550                            (bitconvert
551                             (vinsert_insert:$ins (To.VT To.RC:$src1),
552                                                  (From.VT From.RC:$src2),
553                                                  (iPTR imm))),
554                            Cast.ImmAllZerosV)),
555             (!cast<Instruction>(InstrStr#"rrkz")
556              Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
557              (INSERT_get_vinsert_imm To.RC:$ins))>;
558   def : Pat<(Cast.VT
559              (vselect_mask Cast.KRCWM:$mask,
560                            (bitconvert
561                             (vinsert_insert:$ins (To.VT To.RC:$src1),
562                                                  (From.VT (From.LdFrag addr:$src2)),
563                                                  (iPTR imm))),
564                            Cast.ImmAllZerosV)),
565             (!cast<Instruction>(InstrStr#"rmkz")
566              Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
567              (INSERT_get_vinsert_imm To.RC:$ins))>;
571 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
572                              v8f32x_info, vinsert128_insert,
573                              INSERT_get_vinsert128_imm, [HasVLX]>;
574 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
575                              v4f64x_info, vinsert128_insert,
576                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
578 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
579                              v8i32x_info, vinsert128_insert,
580                              INSERT_get_vinsert128_imm, [HasVLX]>;
581 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
582                              v8i32x_info, vinsert128_insert,
583                              INSERT_get_vinsert128_imm, [HasVLX]>;
584 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
585                              v8i32x_info, vinsert128_insert,
586                              INSERT_get_vinsert128_imm, [HasVLX]>;
587 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
588                              v4i64x_info, vinsert128_insert,
589                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
590 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
591                              v4i64x_info, vinsert128_insert,
592                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
593 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
594                              v4i64x_info, vinsert128_insert,
595                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
597 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
598                              v16f32_info, vinsert128_insert,
599                              INSERT_get_vinsert128_imm, [HasAVX512]>;
600 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
601                              v8f64_info, vinsert128_insert,
602                              INSERT_get_vinsert128_imm, [HasDQI]>;
604 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
605                              v16i32_info, vinsert128_insert,
606                              INSERT_get_vinsert128_imm, [HasAVX512]>;
607 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
608                              v16i32_info, vinsert128_insert,
609                              INSERT_get_vinsert128_imm, [HasAVX512]>;
610 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
611                              v16i32_info, vinsert128_insert,
612                              INSERT_get_vinsert128_imm, [HasAVX512]>;
613 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
614                              v8i64_info, vinsert128_insert,
615                              INSERT_get_vinsert128_imm, [HasDQI]>;
616 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
617                              v8i64_info, vinsert128_insert,
618                              INSERT_get_vinsert128_imm, [HasDQI]>;
619 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
620                              v8i64_info, vinsert128_insert,
621                              INSERT_get_vinsert128_imm, [HasDQI]>;
623 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
624                              v16f32_info, vinsert256_insert,
625                              INSERT_get_vinsert256_imm, [HasDQI]>;
626 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
627                              v8f64_info, vinsert256_insert,
628                              INSERT_get_vinsert256_imm, [HasAVX512]>;
630 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
631                              v16i32_info, vinsert256_insert,
632                              INSERT_get_vinsert256_imm, [HasDQI]>;
633 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
634                              v16i32_info, vinsert256_insert,
635                              INSERT_get_vinsert256_imm, [HasDQI]>;
636 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
637                              v16i32_info, vinsert256_insert,
638                              INSERT_get_vinsert256_imm, [HasDQI]>;
639 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
640                              v8i64_info, vinsert256_insert,
641                              INSERT_get_vinsert256_imm, [HasAVX512]>;
642 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
643                              v8i64_info, vinsert256_insert,
644                              INSERT_get_vinsert256_imm, [HasAVX512]>;
645 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
646                              v8i64_info, vinsert256_insert,
647                              INSERT_get_vinsert256_imm, [HasAVX512]>;
649 // vinsertps - insert f32 to XMM
650 let ExeDomain = SSEPackedSingle in {
651 let isCommutable = 1 in
652 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
653       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
654       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
655       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
656       EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
657 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
658       (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
659       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
660       [(set VR128X:$dst, (X86insertps VR128X:$src1,
661                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
662                           timm:$src3))]>,
663       EVEX, VVVV, EVEX_CD8<32, CD8VT1>,
664       Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
667 //===----------------------------------------------------------------------===//
668 // AVX-512 VECTOR EXTRACT
669 //---
671 // Supports two different pattern operators for mask and unmasked ops. Allows
672 // null_frag to be passed for one.
673 multiclass vextract_for_size_split<int Opcode,
674                                    X86VectorVTInfo From, X86VectorVTInfo To,
675                                    SDPatternOperator vextract_extract,
676                                    SDPatternOperator vextract_for_mask,
677                                    SchedWrite SchedRR, SchedWrite SchedMR> {
679   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
680     defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
681                 (ins From.RC:$src1, u8imm:$idx),
682                 "vextract" # To.EltTypeName # "x" # To.NumElts,
683                 "$idx, $src1", "$src1, $idx",
684                 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
685                 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
686                 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
688     def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
689                     (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
690                     "vextract" # To.EltTypeName # "x" # To.NumElts #
691                         "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
692                     [(store (To.VT (vextract_extract:$idx
693                                     (From.VT From.RC:$src1), (iPTR imm))),
694                              addr:$dst)]>, EVEX,
695                     Sched<[SchedMR]>;
697     let mayStore = 1, hasSideEffects = 0 in
698     def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
699                     (ins To.MemOp:$dst, To.KRCWM:$mask,
700                                         From.RC:$src1, u8imm:$idx),
701                      "vextract" # To.EltTypeName # "x" # To.NumElts #
702                           "\t{$idx, $src1, $dst {${mask}}|"
703                           "$dst {${mask}}, $src1, $idx}", []>,
704                     EVEX_K, EVEX, Sched<[SchedMR]>;
705   }
708 // Passes the same pattern operator for masked and unmasked ops.
709 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
710                              X86VectorVTInfo To,
711                              SDPatternOperator vextract_extract,
712                              SchedWrite SchedRR, SchedWrite SchedMR> :
713   vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
715 // Codegen pattern for the alternative types
716 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
717                 X86VectorVTInfo To, PatFrag vextract_extract,
718                 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
719   let Predicates = p in {
720      def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
721                (To.VT (!cast<Instruction>(InstrStr#"rr")
722                           From.RC:$src1,
723                           (EXTRACT_get_vextract_imm To.RC:$ext)))>;
724      def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
725                               (iPTR imm))), addr:$dst),
726                (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
727                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
728   }
731 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
732                              ValueType EltVT64, int Opcode256,
733                              SchedWrite SchedRR, SchedWrite SchedMR> {
734   let Predicates = [HasAVX512] in {
735     defm NAME # "32x4Z" : vextract_for_size<Opcode128,
736                                    X86VectorVTInfo<16, EltVT32, VR512>,
737                                    X86VectorVTInfo< 4, EltVT32, VR128X>,
738                                    vextract128_extract, SchedRR, SchedMR>,
739                                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
740     defm NAME # "64x4Z" : vextract_for_size<Opcode256,
741                                    X86VectorVTInfo< 8, EltVT64, VR512>,
742                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
743                                    vextract256_extract, SchedRR, SchedMR>,
744                                        REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
745   }
746   let Predicates = [HasVLX] in
747     defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
748                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
749                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
750                                  vextract128_extract, SchedRR, SchedMR>,
751                                      EVEX_V256, EVEX_CD8<32, CD8VT4>;
753   // Even with DQI we'd like to only use these instructions for masking.
754   let Predicates = [HasVLX, HasDQI] in
755     defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
756                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
757                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
758                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
759                                     EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
761   // Even with DQI we'd like to only use these instructions for masking.
762   let Predicates = [HasDQI] in {
763     defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
764                                  X86VectorVTInfo< 8, EltVT64, VR512>,
765                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
766                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
767                                      REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
768     defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
769                                  X86VectorVTInfo<16, EltVT32, VR512>,
770                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
771                                  null_frag, vextract256_extract, SchedRR, SchedMR>,
772                                      EVEX_V512, EVEX_CD8<32, CD8VT8>;
773   }
776 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
777 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
778 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
780 // extract_subvector codegen patterns with the alternative types.
781 // Even with AVX512DQ we'll still use these for unmasked operations.
782 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
783           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
784 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
785           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
787 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
788           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
789 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
790           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
792 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
793           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
794 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
795           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
797 // Codegen pattern with the alternative types extract VEC128 from VEC256
798 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
799           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
800 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
801           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
802 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
803           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
804 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16bf16x_info, v8bf16x_info,
805           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
807 // Codegen pattern with the alternative types extract VEC128 from VEC512
808 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
809                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
810 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
811                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
812 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
813                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
814 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32bf16_info, v8bf16x_info,
815                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
816 // Codegen pattern with the alternative types extract VEC256 from VEC512
817 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
818                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
819 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
820                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
821 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
822                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
823 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32bf16_info, v16bf16x_info,
824                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
827 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
828 // smaller extract to enable EVEX->VEX.
829 let Predicates = [NoVLX, HasEVEX512] in {
830 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
831           (v2i64 (VEXTRACTI128rr
832                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
833                   (iPTR 1)))>;
834 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
835           (v2f64 (VEXTRACTF128rr
836                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
837                   (iPTR 1)))>;
838 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
839           (v4i32 (VEXTRACTI128rr
840                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
841                   (iPTR 1)))>;
842 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
843           (v4f32 (VEXTRACTF128rr
844                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
845                   (iPTR 1)))>;
846 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
847           (v8i16 (VEXTRACTI128rr
848                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
849                   (iPTR 1)))>;
850 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
851           (v8f16 (VEXTRACTF128rr
852                   (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
853                   (iPTR 1)))>;
854 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
855           (v16i8 (VEXTRACTI128rr
856                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
857                   (iPTR 1)))>;
860 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
861 // smaller extract to enable EVEX->VEX.
862 let Predicates = [HasVLX] in {
863 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
864           (v2i64 (VEXTRACTI32x4Z256rr
865                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
866                   (iPTR 1)))>;
867 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
868           (v2f64 (VEXTRACTF32x4Z256rr
869                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
870                   (iPTR 1)))>;
871 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
872           (v4i32 (VEXTRACTI32x4Z256rr
873                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
874                   (iPTR 1)))>;
875 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
876           (v4f32 (VEXTRACTF32x4Z256rr
877                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
878                   (iPTR 1)))>;
879 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
880           (v8i16 (VEXTRACTI32x4Z256rr
881                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
882                   (iPTR 1)))>;
883 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
884           (v8f16 (VEXTRACTF32x4Z256rr
885                   (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
886                   (iPTR 1)))>;
887 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
888           (v16i8 (VEXTRACTI32x4Z256rr
889                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
890                   (iPTR 1)))>;
894 // Additional patterns for handling a bitcast between the vselect and the
895 // extract_subvector.
896 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
897                                   X86VectorVTInfo To, X86VectorVTInfo Cast,
898                                   PatFrag vextract_extract,
899                                   SDNodeXForm EXTRACT_get_vextract_imm,
900                                   list<Predicate> p> {
901 let Predicates = p in {
902   def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
903                                    (bitconvert
904                                     (To.VT (vextract_extract:$ext
905                                             (From.VT From.RC:$src), (iPTR imm)))),
906                                    To.RC:$src0)),
907             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
908                       Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
909                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
911   def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
912                                    (bitconvert
913                                     (To.VT (vextract_extract:$ext
914                                             (From.VT From.RC:$src), (iPTR imm)))),
915                                    Cast.ImmAllZerosV)),
916             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
917                       Cast.KRCWM:$mask, From.RC:$src,
918                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
922 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
923                               v4f32x_info, vextract128_extract,
924                               EXTRACT_get_vextract128_imm, [HasVLX]>;
925 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
926                               v2f64x_info, vextract128_extract,
927                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
929 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
930                               v4i32x_info, vextract128_extract,
931                               EXTRACT_get_vextract128_imm, [HasVLX]>;
932 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
933                               v4i32x_info, vextract128_extract,
934                               EXTRACT_get_vextract128_imm, [HasVLX]>;
935 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
936                               v4i32x_info, vextract128_extract,
937                               EXTRACT_get_vextract128_imm, [HasVLX]>;
938 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
939                               v2i64x_info, vextract128_extract,
940                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
941 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
942                               v2i64x_info, vextract128_extract,
943                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
944 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
945                               v2i64x_info, vextract128_extract,
946                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
948 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
949                               v4f32x_info, vextract128_extract,
950                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
951 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
952                               v2f64x_info, vextract128_extract,
953                               EXTRACT_get_vextract128_imm, [HasDQI]>;
955 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
956                               v4i32x_info, vextract128_extract,
957                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
958 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
959                               v4i32x_info, vextract128_extract,
960                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
961 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
962                               v4i32x_info, vextract128_extract,
963                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
964 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
965                               v2i64x_info, vextract128_extract,
966                               EXTRACT_get_vextract128_imm, [HasDQI]>;
967 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
968                               v2i64x_info, vextract128_extract,
969                               EXTRACT_get_vextract128_imm, [HasDQI]>;
970 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
971                               v2i64x_info, vextract128_extract,
972                               EXTRACT_get_vextract128_imm, [HasDQI]>;
974 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
975                               v8f32x_info, vextract256_extract,
976                               EXTRACT_get_vextract256_imm, [HasDQI]>;
977 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
978                               v4f64x_info, vextract256_extract,
979                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
981 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
982                               v8i32x_info, vextract256_extract,
983                               EXTRACT_get_vextract256_imm, [HasDQI]>;
984 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
985                               v8i32x_info, vextract256_extract,
986                               EXTRACT_get_vextract256_imm, [HasDQI]>;
987 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
988                               v8i32x_info, vextract256_extract,
989                               EXTRACT_get_vextract256_imm, [HasDQI]>;
990 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
991                               v4i64x_info, vextract256_extract,
992                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
993 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
994                               v4i64x_info, vextract256_extract,
995                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
996 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
997                               v4i64x_info, vextract256_extract,
998                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1000 // vextractps - extract 32 bits from XMM
1001 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1002       (ins VR128X:$src1, u8imm:$src2),
1003       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1004       [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1005       EVEX, WIG, Sched<[WriteVecExtract]>;
1007 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1008       (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1009       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1010       [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1011                           addr:$dst)]>,
1012       EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1014 //===---------------------------------------------------------------------===//
1015 // AVX-512 BROADCAST
1016 //---
1017 // broadcast with a scalar argument.
1018 multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1019                                    X86VectorVTInfo SrcInfo> {
1020   def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1021             (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1022              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1023   def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1024                                        (X86VBroadcast SrcInfo.FRC:$src),
1025                                        DestInfo.RC:$src0)),
1026             (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1027              DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1028              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1029   def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1030                                        (X86VBroadcast SrcInfo.FRC:$src),
1031                                        DestInfo.ImmAllZerosV)),
1032             (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1033              DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1036 // Split version to allow mask and broadcast node to be different types. This
1037 // helps support the 32x2 broadcasts.
1038 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1039                                      SchedWrite SchedRR, SchedWrite SchedRM,
1040                                      X86VectorVTInfo MaskInfo,
1041                                      X86VectorVTInfo DestInfo,
1042                                      X86VectorVTInfo SrcInfo,
1043                                      bit IsConvertibleToThreeAddress,
1044                                      SDPatternOperator UnmaskedOp = X86VBroadcast,
1045                                      SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1046   let hasSideEffects = 0 in
1047   def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1048                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1049                     [(set MaskInfo.RC:$dst,
1050                       (MaskInfo.VT
1051                        (bitconvert
1052                         (DestInfo.VT
1053                          (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1054                     DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>;
1055   def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1056                       (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1057                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1058                        "${dst} {${mask}} {z}, $src}"),
1059                        [(set MaskInfo.RC:$dst,
1060                          (vselect_mask MaskInfo.KRCWM:$mask,
1061                           (MaskInfo.VT
1062                            (bitconvert
1063                             (DestInfo.VT
1064                              (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1065                           MaskInfo.ImmAllZerosV))],
1066                        DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1067   let Constraints = "$src0 = $dst" in
1068   def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1069                      (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1070                           SrcInfo.RC:$src),
1071                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1072                      "${dst} {${mask}}, $src}"),
1073                      [(set MaskInfo.RC:$dst,
1074                        (vselect_mask MaskInfo.KRCWM:$mask,
1075                         (MaskInfo.VT
1076                          (bitconvert
1077                           (DestInfo.VT
1078                            (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1079                         MaskInfo.RC:$src0))],
1080                       DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1082   let hasSideEffects = 0, mayLoad = 1, isReMaterializable = 1, canFoldAsLoad = 1 in
1083   def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1084                     (ins SrcInfo.ScalarMemOp:$src),
1085                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1086                     [(set MaskInfo.RC:$dst,
1087                       (MaskInfo.VT
1088                        (bitconvert
1089                         (DestInfo.VT
1090                          (UnmaskedBcastOp addr:$src)))))],
1091                     DestInfo.ExeDomain>, T8, PD, EVEX,
1092                     EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1094   def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1095                       (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1096                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1097                        "${dst} {${mask}} {z}, $src}"),
1098                        [(set MaskInfo.RC:$dst,
1099                          (vselect_mask MaskInfo.KRCWM:$mask,
1100                           (MaskInfo.VT
1101                            (bitconvert
1102                             (DestInfo.VT
1103                              (SrcInfo.BroadcastLdFrag addr:$src)))),
1104                           MaskInfo.ImmAllZerosV))],
1105                        DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ,
1106                        EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1108   let Constraints = "$src0 = $dst",
1109       isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1110   def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1111                      (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1112                           SrcInfo.ScalarMemOp:$src),
1113                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1114                      "${dst} {${mask}}, $src}"),
1115                      [(set MaskInfo.RC:$dst,
1116                        (vselect_mask MaskInfo.KRCWM:$mask,
1117                         (MaskInfo.VT
1118                          (bitconvert
1119                           (DestInfo.VT
1120                            (SrcInfo.BroadcastLdFrag addr:$src)))),
1121                         MaskInfo.RC:$src0))],
1122                       DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K,
1123                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1126 // Helper class to force mask and broadcast result to same type.
1127 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1128                                SchedWrite SchedRR, SchedWrite SchedRM,
1129                                X86VectorVTInfo DestInfo,
1130                                X86VectorVTInfo SrcInfo,
1131                                bit IsConvertibleToThreeAddress> :
1132   avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1133                             DestInfo, DestInfo, SrcInfo,
1134                             IsConvertibleToThreeAddress>;
1136 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1137                                   AVX512VLVectorVTInfo _> {
1138   let Predicates = [HasAVX512] in {
1139     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1140                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1141               avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1142               EVEX_V512;
1143   }
1145   let Predicates = [HasVLX] in {
1146     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1147                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1148                  avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1149                  EVEX_V256;
1150   }
1153 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1154                                   AVX512VLVectorVTInfo _> {
1155   let Predicates = [HasAVX512] in {
1156     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1157                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1158               avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1159               EVEX_V512;
1160   }
1162   let Predicates = [HasVLX] in {
1163     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1164                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1165                  avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1166                  EVEX_V256;
1167     defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1168                                      WriteFShuffle256Ld, _.info128, _.info128, 1>,
1169                  avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1170                  EVEX_V128;
1171   }
1173 defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1174                                        avx512vl_f32_info>;
1175 defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1176                                        avx512vl_f64_info>, REX_W;
1178 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1179                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1180                                     RegisterClass SrcRC> {
1181   // Fold with a mask even if it has multiple uses since it is cheap.
1182   let ExeDomain = _.ExeDomain in
1183   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1184                           (ins SrcRC:$src),
1185                           "vpbroadcast"#_.Suffix, "$src", "$src",
1186                           (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1187                           /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1188                           T8, PD, EVEX, Sched<[SchedRR]>;
1191 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1192                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1193                                     RegisterClass SrcRC, SubRegIndex Subreg> {
1194   let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1195   defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1196                          (outs _.RC:$dst), (ins GR32:$src),
1197                          !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1198                          !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1199                          "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1200                          "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>;
1202   def : Pat <(_.VT (OpNode SrcRC:$src)),
1203              (!cast<Instruction>(Name#rr)
1204               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1206   // Fold with a mask even if it has multiple uses since it is cheap.
1207   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1208              (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1209               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1211   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1212              (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1213               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1216 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1217                       AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1218                       RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1219   let Predicates = [prd] in
1220     defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1221               OpNode, SrcRC, Subreg>, EVEX_V512;
1222   let Predicates = [prd, HasVLX] in {
1223     defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1224               _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1225     defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1226               _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1227   }
1230 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1231                                        SDPatternOperator OpNode,
1232                                        RegisterClass SrcRC, Predicate prd> {
1233   let Predicates = [prd] in
1234     defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1235                                       SrcRC>, EVEX_V512;
1236   let Predicates = [prd, HasVLX] in {
1237     defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1238                                          SrcRC>, EVEX_V256;
1239     defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1240                                          SrcRC>, EVEX_V128;
1241   }
1244 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1245                        avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1246 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1247                        avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1248                        HasBWI>;
1249 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1250                                                  X86VBroadcast, GR32, HasAVX512>;
1251 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1252                                                  X86VBroadcast, GR64, HasAVX512>, REX_W;
1254 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1255                                       AVX512VLVectorVTInfo _, Predicate prd,
1256                                       bit IsConvertibleToThreeAddress> {
1257   let Predicates = [prd] in {
1258     defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1259                                    WriteShuffle256Ld, _.info512, _.info128,
1260                                    IsConvertibleToThreeAddress>,
1261                                   EVEX_V512;
1262   }
1263   let Predicates = [prd, HasVLX] in {
1264     defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1265                                     WriteShuffle256Ld, _.info256, _.info128,
1266                                     IsConvertibleToThreeAddress>,
1267                                  EVEX_V256;
1268     defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1269                                     WriteShuffleXLd, _.info128, _.info128,
1270                                     IsConvertibleToThreeAddress>,
1271                                  EVEX_V128;
1272   }
1275 defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1276                                            avx512vl_i8_info, HasBWI, 0>;
1277 defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1278                                            avx512vl_i16_info, HasBWI, 0>;
1279 defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1280                                            avx512vl_i32_info, HasAVX512, 1>;
1281 defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1282                                            avx512vl_i64_info, HasAVX512, 1>, REX_W;
1284 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1285                                       SDPatternOperator OpNode,
1286                                       X86VectorVTInfo _Dst,
1287                                       X86VectorVTInfo _Src> {
1288   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1289                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1290                            (_Dst.VT (OpNode addr:$src))>,
1291                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1292                            AVX5128IBase, EVEX;
1295 // This should be used for the AVX512DQ broadcast instructions. It disables
1296 // the unmasked patterns so that we only use the DQ instructions when masking
1297 //  is requested.
1298 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1299                                          SDPatternOperator OpNode,
1300                                          X86VectorVTInfo _Dst,
1301                                          X86VectorVTInfo _Src> {
1302   let hasSideEffects = 0, mayLoad = 1 in
1303   defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1304                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1305                            (null_frag),
1306                            (_Dst.VT (OpNode addr:$src))>,
1307                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1308                            AVX5128IBase, EVEX;
1310 let Predicates = [HasBWI] in {
1311   def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1312             (VPBROADCASTWZrm addr:$src)>;
1314   def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1315             (VPBROADCASTWZrr VR128X:$src)>;
1316   def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1317             (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1319 let Predicates = [HasVLX, HasBWI] in {
1320   def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1321             (VPBROADCASTWZ128rm addr:$src)>;
1322   def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1323             (VPBROADCASTWZ256rm addr:$src)>;
1325   def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1326             (VPBROADCASTWZ128rr VR128X:$src)>;
1327   def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1328             (VPBROADCASTWZ256rr VR128X:$src)>;
1330   def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1331             (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1332   def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1333             (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1336 //===----------------------------------------------------------------------===//
1337 // AVX-512 BROADCAST SUBVECTORS
1340 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1341                        X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1342                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1343 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1344                        X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1345                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1346 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1347                        X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
1348                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1349 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1350                        X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
1351                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1353 let Predicates = [HasAVX512] in {
1354 def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1355           (VBROADCASTF64X4rm addr:$src)>;
1356 def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1357           (VBROADCASTF64X4rm addr:$src)>;
1358 def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1359           (VBROADCASTF64X4rm addr:$src)>;
1360 def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1361           (VBROADCASTI64X4rm addr:$src)>;
1362 def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1363           (VBROADCASTI64X4rm addr:$src)>;
1364 def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1365           (VBROADCASTI64X4rm addr:$src)>;
1366 def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1367           (VBROADCASTI64X4rm addr:$src)>;
1369 def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1370           (VBROADCASTF32X4rm addr:$src)>;
1371 def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1372           (VBROADCASTF32X4rm addr:$src)>;
1373 def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1374           (VBROADCASTF32X4rm addr:$src)>;
1375 def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1376           (VBROADCASTI32X4rm addr:$src)>;
1377 def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1378           (VBROADCASTI32X4rm addr:$src)>;
1379 def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1380           (VBROADCASTI32X4rm addr:$src)>;
1381 def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1382           (VBROADCASTI32X4rm addr:$src)>;
1384 // Patterns for selects of bitcasted operations.
1385 def : Pat<(vselect_mask VK16WM:$mask,
1386                         (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1387                         (v16f32 immAllZerosV)),
1388           (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1389 def : Pat<(vselect_mask VK16WM:$mask,
1390                         (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1391                         VR512:$src0),
1392           (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1393 def : Pat<(vselect_mask VK16WM:$mask,
1394                         (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1395                         (v16i32 immAllZerosV)),
1396           (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1397 def : Pat<(vselect_mask VK16WM:$mask,
1398                         (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1399                         VR512:$src0),
1400           (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1402 def : Pat<(vselect_mask VK8WM:$mask,
1403                         (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1404                         (v8f64 immAllZerosV)),
1405           (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1406 def : Pat<(vselect_mask VK8WM:$mask,
1407                         (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1408                         VR512:$src0),
1409           (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1410 def : Pat<(vselect_mask VK8WM:$mask,
1411                         (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1412                         (v8i64 immAllZerosV)),
1413           (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1414 def : Pat<(vselect_mask VK8WM:$mask,
1415                         (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1416                         VR512:$src0),
1417           (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1420 let Predicates = [HasVLX] in {
1421 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1422                            X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1423                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1424 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1425                            X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1426                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1428 def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1429           (VBROADCASTF32X4Z256rm addr:$src)>;
1430 def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1431           (VBROADCASTF32X4Z256rm addr:$src)>;
1432 def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1433           (VBROADCASTF32X4Z256rm addr:$src)>;
1434 def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1435           (VBROADCASTI32X4Z256rm addr:$src)>;
1436 def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1437           (VBROADCASTI32X4Z256rm addr:$src)>;
1438 def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1439           (VBROADCASTI32X4Z256rm addr:$src)>;
1440 def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1441           (VBROADCASTI32X4Z256rm addr:$src)>;
1443 // Patterns for selects of bitcasted operations.
1444 def : Pat<(vselect_mask VK8WM:$mask,
1445                         (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1446                         (v8f32 immAllZerosV)),
1447           (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1448 def : Pat<(vselect_mask VK8WM:$mask,
1449                         (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1450                         VR256X:$src0),
1451           (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1452 def : Pat<(vselect_mask VK8WM:$mask,
1453                         (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1454                         (v8i32 immAllZerosV)),
1455           (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1456 def : Pat<(vselect_mask VK8WM:$mask,
1457                         (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1458                         VR256X:$src0),
1459           (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1462 let Predicates = [HasBF16] in {
1463   def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)),
1464             (VBROADCASTF64X4rm addr:$src)>;
1465   def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)),
1466             (VBROADCASTF32X4rm addr:$src)>;
1469 let Predicates = [HasBF16, HasVLX] in
1470   def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)),
1471             (VBROADCASTF32X4Z256rm addr:$src)>;
1473 let Predicates = [HasVLX, HasDQI] in {
1474 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1475                            X86SubVBroadcastld128, v4i64x_info, v2i64x_info>,
1476                            EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1477 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1478                            X86SubVBroadcastld128, v4f64x_info, v2f64x_info>,
1479                            EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1481 // Patterns for selects of bitcasted operations.
1482 def : Pat<(vselect_mask VK4WM:$mask,
1483                         (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1484                         (v4f64 immAllZerosV)),
1485           (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1486 def : Pat<(vselect_mask VK4WM:$mask,
1487                         (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1488                         VR256X:$src0),
1489           (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1490 def : Pat<(vselect_mask VK4WM:$mask,
1491                         (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1492                         (v4i64 immAllZerosV)),
1493           (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1494 def : Pat<(vselect_mask VK4WM:$mask,
1495                         (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1496                         VR256X:$src0),
1497           (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1500 let Predicates = [HasDQI] in {
1501 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1502                        X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
1503                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1504 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1505                        X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1506                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1507 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1508                        X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
1509                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1510 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1511                        X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1512                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1514 // Patterns for selects of bitcasted operations.
1515 def : Pat<(vselect_mask VK16WM:$mask,
1516                         (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1517                         (v16f32 immAllZerosV)),
1518           (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1519 def : Pat<(vselect_mask VK16WM:$mask,
1520                         (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1521                         VR512:$src0),
1522           (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1523 def : Pat<(vselect_mask VK16WM:$mask,
1524                         (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1525                         (v16i32 immAllZerosV)),
1526           (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1527 def : Pat<(vselect_mask VK16WM:$mask,
1528                         (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1529                         VR512:$src0),
1530           (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1532 def : Pat<(vselect_mask VK8WM:$mask,
1533                         (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1534                         (v8f64 immAllZerosV)),
1535           (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1536 def : Pat<(vselect_mask VK8WM:$mask,
1537                         (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1538                         VR512:$src0),
1539           (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1540 def : Pat<(vselect_mask VK8WM:$mask,
1541                         (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1542                         (v8i64 immAllZerosV)),
1543           (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1544 def : Pat<(vselect_mask VK8WM:$mask,
1545                         (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1546                         VR512:$src0),
1547           (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1550 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1551                                         AVX512VLVectorVTInfo _Dst,
1552                                         AVX512VLVectorVTInfo _Src> {
1553   let Predicates = [HasDQI] in
1554     defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1555                                           WriteShuffle256Ld, _Dst.info512,
1556                                           _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1557                                           EVEX_V512;
1558   let Predicates = [HasDQI, HasVLX] in
1559     defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1560                                           WriteShuffle256Ld, _Dst.info256,
1561                                           _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1562                                           EVEX_V256;
1565 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1566                                          AVX512VLVectorVTInfo _Dst,
1567                                          AVX512VLVectorVTInfo _Src> :
1568   avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1570   let Predicates = [HasDQI, HasVLX] in
1571     defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1572                                           WriteShuffleXLd, _Dst.info128,
1573                                           _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1574                                           EVEX_V128;
1577 defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1578                                           avx512vl_i32_info, avx512vl_i64_info>;
1579 defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1580                                           avx512vl_f32_info, avx512vl_f64_info>;
1582 //===----------------------------------------------------------------------===//
1583 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1584 //---
1585 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1586                                   X86VectorVTInfo _, RegisterClass KRC> {
1587   def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1588                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1589                   [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1590                   EVEX, Sched<[WriteShuffle]>;
1593 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1594                                  AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1595   let Predicates = [HasCDI] in
1596     defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1597   let Predicates = [HasCDI, HasVLX] in {
1598     defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1599     defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1600   }
1603 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1604                                                avx512vl_i32_info, VK16>;
1605 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1606                                                avx512vl_i64_info, VK8>, REX_W;
1608 //===----------------------------------------------------------------------===//
1609 // -- VPERMI2 - 3 source operands form --
1610 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1611                          X86FoldableSchedWrite sched,
1612                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1613 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1614     hasSideEffects = 0 in {
1615   defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1616           (ins _.RC:$src2, _.RC:$src3),
1617           OpcodeStr, "$src3, $src2", "$src2, $src3",
1618           (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1619           EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1621   let mayLoad = 1 in
1622   defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1623             (ins _.RC:$src2, _.MemOp:$src3),
1624             OpcodeStr, "$src3, $src2", "$src2, $src3",
1625             (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1626                    (_.VT (_.LdFrag addr:$src3)))), 1>,
1627             EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1628   }
1631 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1632                             X86FoldableSchedWrite sched,
1633                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1634   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1635       hasSideEffects = 0, mayLoad = 1 in
1636   defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1637               (ins _.RC:$src2, _.ScalarMemOp:$src3),
1638               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1639               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1640               (_.VT (X86VPermt2 _.RC:$src2,
1641                IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1642               AVX5128IBase, EVEX, VVVV, EVEX_B,
1643               Sched<[sched.Folded, sched.ReadAfterFold]>;
1646 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1647                                X86FoldableSchedWrite sched,
1648                                AVX512VLVectorVTInfo VTInfo,
1649                                AVX512VLVectorVTInfo ShuffleMask> {
1650   defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1651                              ShuffleMask.info512>,
1652                avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1653                                 ShuffleMask.info512>, EVEX_V512;
1654   let Predicates = [HasVLX] in {
1655   defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1656                                 ShuffleMask.info128>,
1657                   avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1658                                    ShuffleMask.info128>, EVEX_V128;
1659   defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1660                                 ShuffleMask.info256>,
1661                   avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1662                                    ShuffleMask.info256>, EVEX_V256;
1663   }
1666 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1667                                   X86FoldableSchedWrite sched,
1668                                   AVX512VLVectorVTInfo VTInfo,
1669                                   AVX512VLVectorVTInfo Idx,
1670                                   Predicate Prd> {
1671   let Predicates = [Prd] in
1672   defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1673                              Idx.info512>, EVEX_V512;
1674   let Predicates = [Prd, HasVLX] in {
1675   defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1676                                 Idx.info128>, EVEX_V128;
1677   defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1678                                 Idx.info256>,  EVEX_V256;
1679   }
1682 defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1683                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1684 defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1685                   avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1686 defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1687                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1688                   REX_W, EVEX_CD8<16, CD8VF>;
1689 defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1690                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1691                   EVEX_CD8<8, CD8VF>;
1692 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1693                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1694 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1695                   avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1697 // Extra patterns to deal with extra bitcasts due to passthru and index being
1698 // different types on the fp versions.
1699 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1700                                   X86VectorVTInfo IdxVT,
1701                                   X86VectorVTInfo CastVT> {
1702   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1703                                 (X86VPermt2 (_.VT _.RC:$src2),
1704                                             (IdxVT.VT (bitconvert
1705                                                        (CastVT.VT _.RC:$src1))),
1706                                             _.RC:$src3),
1707                                 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1708             (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1709                                                 _.RC:$src2, _.RC:$src3)>;
1710   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1711                                 (X86VPermt2 _.RC:$src2,
1712                                             (IdxVT.VT (bitconvert
1713                                                        (CastVT.VT _.RC:$src1))),
1714                                             (_.LdFrag addr:$src3)),
1715                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1716             (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1717                                                 _.RC:$src2, addr:$src3)>;
1718   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1719                                  (X86VPermt2 _.RC:$src2,
1720                                              (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1721                                              (_.BroadcastLdFrag addr:$src3)),
1722                                  (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1723             (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1724                                                  _.RC:$src2, addr:$src3)>;
1727 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1728 defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>;
1729 defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>;
1730 defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>;
1732 // VPERMT2
1733 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1734                          X86FoldableSchedWrite sched,
1735                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1736 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1737   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1738           (ins IdxVT.RC:$src2, _.RC:$src3),
1739           OpcodeStr, "$src3, $src2", "$src2, $src3",
1740           (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1741           EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1743   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1744             (ins IdxVT.RC:$src2, _.MemOp:$src3),
1745             OpcodeStr, "$src3, $src2", "$src2, $src3",
1746             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1747                    (_.LdFrag addr:$src3))), 1>,
1748             EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1749   }
1751 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1752                             X86FoldableSchedWrite sched,
1753                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1754   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1755   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1756               (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1757               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1758               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1759               (_.VT (X86VPermt2 _.RC:$src1,
1760                IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1761               AVX5128IBase, EVEX, VVVV, EVEX_B,
1762               Sched<[sched.Folded, sched.ReadAfterFold]>;
1765 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1766                                X86FoldableSchedWrite sched,
1767                                AVX512VLVectorVTInfo VTInfo,
1768                                AVX512VLVectorVTInfo ShuffleMask> {
1769   defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1770                              ShuffleMask.info512>,
1771                avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1772                                 ShuffleMask.info512>, EVEX_V512;
1773   let Predicates = [HasVLX] in {
1774   defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1775                                 ShuffleMask.info128>,
1776                   avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1777                                    ShuffleMask.info128>, EVEX_V128;
1778   defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1779                                 ShuffleMask.info256>,
1780                    avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1781                                     ShuffleMask.info256>, EVEX_V256;
1782   }
1785 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1786                                   X86FoldableSchedWrite sched,
1787                                   AVX512VLVectorVTInfo VTInfo,
1788                                   AVX512VLVectorVTInfo Idx, Predicate Prd> {
1789   let Predicates = [Prd] in
1790   defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1791                              Idx.info512>, EVEX_V512;
1792   let Predicates = [Prd, HasVLX] in {
1793   defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1794                                 Idx.info128>, EVEX_V128;
1795   defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1796                                 Idx.info256>, EVEX_V256;
1797   }
1800 defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1801                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1802 defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1803                   avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1804 defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1805                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1806                   REX_W, EVEX_CD8<16, CD8VF>;
1807 defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1808                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1809                   EVEX_CD8<8, CD8VF>;
1810 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1811                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1812 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1813                   avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1815 //===----------------------------------------------------------------------===//
1816 // AVX-512 - BLEND using mask
1819 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1820                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1821   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1822   def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1823              (ins _.RC:$src1, _.RC:$src2),
1824              !strconcat(OpcodeStr,
1825              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1826              EVEX, VVVV, Sched<[sched]>;
1827   def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1828              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1829              !strconcat(OpcodeStr,
1830              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1831              []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
1832   def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1833              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1834              !strconcat(OpcodeStr,
1835              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1836              []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>;
1837   let mayLoad = 1 in {
1838   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1839              (ins _.RC:$src1, _.MemOp:$src2),
1840              !strconcat(OpcodeStr,
1841              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1842              []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
1843              Sched<[sched.Folded, sched.ReadAfterFold]>;
1844   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1845              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1846              !strconcat(OpcodeStr,
1847              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1848              []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1849              Sched<[sched.Folded, sched.ReadAfterFold]>;
1850   def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1851              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1852              !strconcat(OpcodeStr,
1853              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1854              []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1855              Sched<[sched.Folded, sched.ReadAfterFold]>;
1856   }
1857   }
1859 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1860                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1861   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1862   def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1863       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1864        !strconcat(OpcodeStr,
1865             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1866             "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1867       EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1868       Sched<[sched.Folded, sched.ReadAfterFold]>;
1870   def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1871       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1872        !strconcat(OpcodeStr,
1873             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1874             "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1875       EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1876       Sched<[sched.Folded, sched.ReadAfterFold]>;
1878   def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1879       (ins _.RC:$src1, _.ScalarMemOp:$src2),
1880        !strconcat(OpcodeStr,
1881             "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1882             "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1883       EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1884       Sched<[sched.Folded, sched.ReadAfterFold]>;
1885   }
1888 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1889                         AVX512VLVectorVTInfo VTInfo> {
1890   defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1891            WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1892                                  EVEX_V512;
1894   let Predicates = [HasVLX] in {
1895     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1896                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1897                                       EVEX_V256;
1898     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1899                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1900                                       EVEX_V128;
1901   }
1904 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1905                         AVX512VLVectorVTInfo VTInfo> {
1906   let Predicates = [HasBWI] in
1907     defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1908                                EVEX_V512;
1910   let Predicates = [HasBWI, HasVLX] in {
1911     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1912                                   EVEX_V256;
1913     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1914                                   EVEX_V128;
1915   }
1918 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
1919                               avx512vl_f32_info>;
1920 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
1921                               avx512vl_f64_info>, REX_W;
1922 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
1923                               avx512vl_i32_info>;
1924 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
1925                               avx512vl_i64_info>, REX_W;
1926 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
1927                               avx512vl_i8_info>;
1928 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
1929                               avx512vl_i16_info>, REX_W;
1931 //===----------------------------------------------------------------------===//
1932 // Compare Instructions
1933 //===----------------------------------------------------------------------===//
1935 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1937 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
1938                              PatFrag OpNode_su, PatFrag OpNodeSAE_su,
1939                              X86FoldableSchedWrite sched> {
1940   defm  rri_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1941                                        (outs _.KRC:$dst),
1942                                        (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1943                                        "vcmp"#_.Suffix,
1944                                        "$cc, $src2, $src1", "$src1, $src2, $cc",
1945                                        (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1946                                        (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc)>,
1947                                        EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1948   let mayLoad = 1 in
1949   defm  rmi_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1950                                        (outs _.KRC:$dst),
1951                                        (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
1952                                        "vcmp"#_.Suffix,
1953                                        "$cc, $src2, $src1", "$src1, $src2, $cc",
1954                                        (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1955                                            timm:$cc),
1956                                        (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1957                                            timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1958                                        Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1960   let Uses = [MXCSR] in
1961   defm  rrib_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1962                                         (outs _.KRC:$dst),
1963                                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1964                                         "vcmp"#_.Suffix,
1965                                         "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
1966                                         (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1967                                                    timm:$cc),
1968                                         (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1969                                                       timm:$cc)>,
1970                                         EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
1972   let isCodeGenOnly = 1 in {
1973     let isCommutable = 1 in
1974     def rri : AVX512Ii8<0xC2, MRMSrcReg,
1975                         (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
1976                         !strconcat("vcmp", _.Suffix,
1977                                    "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1978                         [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1979                                                   _.FRC:$src2,
1980                                                   timm:$cc))]>,
1981                         EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1982     def rmi : AVX512Ii8<0xC2, MRMSrcMem,
1983                         (outs _.KRC:$dst),
1984                         (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1985                         !strconcat("vcmp", _.Suffix,
1986                                    "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1987                         [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1988                                                   (_.ScalarLdFrag addr:$src2),
1989                                                   timm:$cc))]>,
1990                         EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1991                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1992   }
1995 let Predicates = [HasAVX512] in {
1996   let ExeDomain = SSEPackedSingle in
1997   defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
1998                                    X86cmpms_su, X86cmpmsSAE_su,
1999                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2000   let ExeDomain = SSEPackedDouble in
2001   defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2002                                    X86cmpms_su, X86cmpmsSAE_su,
2003                                    SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W;
2005 let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2006   defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2007                                    X86cmpms_su, X86cmpmsSAE_su,
2008                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2010 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2011                               X86FoldableSchedWrite sched,
2012                               X86VectorVTInfo _, bit IsCommutable> {
2013   let isCommutable = IsCommutable, hasSideEffects = 0 in
2014   def rr : AVX512BI<opc, MRMSrcReg,
2015              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2016              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2017              []>, EVEX, VVVV, Sched<[sched]>;
2018   let mayLoad = 1, hasSideEffects = 0 in
2019   def rm : AVX512BI<opc, MRMSrcMem,
2020              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2021              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2022              []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2023   let isCommutable = IsCommutable, hasSideEffects = 0 in
2024   def rrk : AVX512BI<opc, MRMSrcReg,
2025               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2026               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2027                           "$dst {${mask}}, $src1, $src2}"),
2028               []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
2029   let mayLoad = 1, hasSideEffects = 0 in
2030   def rmk : AVX512BI<opc, MRMSrcMem,
2031               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2032               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2033                           "$dst {${mask}}, $src1, $src2}"),
2034               []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2037 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2038                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
2039                                   bit IsCommutable> :
2040            avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2041   let mayLoad = 1, hasSideEffects = 0 in {
2042   def rmb : AVX512BI<opc, MRMSrcMem,
2043               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2044               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2045                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2046               []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2047   def rmbk : AVX512BI<opc, MRMSrcMem,
2048                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2049                                        _.ScalarMemOp:$src2),
2050                !strconcat(OpcodeStr,
2051                           "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2052                           "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2053                []>, EVEX, VVVV, EVEX_K, EVEX_B,
2054                Sched<[sched.Folded, sched.ReadAfterFold]>;
2055   }
2058 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2059                                  X86SchedWriteWidths sched,
2060                                  AVX512VLVectorVTInfo VTInfo, Predicate prd,
2061                                  bit IsCommutable = 0> {
2062   let Predicates = [prd] in
2063   defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2064                               VTInfo.info512, IsCommutable>, EVEX_V512;
2066   let Predicates = [prd, HasVLX] in {
2067     defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2068                                    VTInfo.info256, IsCommutable>, EVEX_V256;
2069     defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2070                                    VTInfo.info128, IsCommutable>, EVEX_V128;
2071   }
2074 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2075                                      X86SchedWriteWidths sched,
2076                                      AVX512VLVectorVTInfo VTInfo,
2077                                      Predicate prd, bit IsCommutable = 0> {
2078   let Predicates = [prd] in
2079   defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2080                                   VTInfo.info512, IsCommutable>, EVEX_V512;
2082   let Predicates = [prd, HasVLX] in {
2083     defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2084                                        VTInfo.info256, IsCommutable>, EVEX_V256;
2085     defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2086                                        VTInfo.info128, IsCommutable>, EVEX_V128;
2087   }
2090 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2091 // increase the pattern complexity the way an immediate would.
2092 let AddedComplexity = 2 in {
2093 // FIXME: Is there a better scheduler class for VPCMP?
2094 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2095                       SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2096                 EVEX_CD8<8, CD8VF>, WIG;
2098 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2099                       SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2100                 EVEX_CD8<16, CD8VF>, WIG;
2102 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2103                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2104                 EVEX_CD8<32, CD8VF>;
2106 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2107                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2108                 T8, REX_W, EVEX_CD8<64, CD8VF>;
2110 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2111                       SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2112                 EVEX_CD8<8, CD8VF>, WIG;
2114 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2115                       SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2116                 EVEX_CD8<16, CD8VF>, WIG;
2118 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2119                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2120                 EVEX_CD8<32, CD8VF>;
2122 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2123                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2124                 T8, REX_W, EVEX_CD8<64, CD8VF>;
2127 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2128                           PatFrag Frag_su,
2129                           X86FoldableSchedWrite sched,
2130                           X86VectorVTInfo _, string Name> {
2131   let isCommutable = 1 in
2132   def rri : AVX512AIi8<opc, MRMSrcReg,
2133              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2134              !strconcat("vpcmp", Suffix,
2135                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2136              [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2137                                                 (_.VT _.RC:$src2),
2138                                                 cond)))]>,
2139              EVEX, VVVV, Sched<[sched]>;
2140   def rmi : AVX512AIi8<opc, MRMSrcMem,
2141              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2142              !strconcat("vpcmp", Suffix,
2143                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2144              [(set _.KRC:$dst, (_.KVT
2145                                 (Frag:$cc
2146                                  (_.VT _.RC:$src1),
2147                                  (_.VT (_.LdFrag addr:$src2)),
2148                                  cond)))]>,
2149              EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2150   let isCommutable = 1 in
2151   def rrik : AVX512AIi8<opc, MRMSrcReg,
2152               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2153                                       u8imm:$cc),
2154               !strconcat("vpcmp", Suffix,
2155                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2156                          "$dst {${mask}}, $src1, $src2, $cc}"),
2157               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2158                                      (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2159                                                          (_.VT _.RC:$src2),
2160                                                          cond))))]>,
2161               EVEX, VVVV, EVEX_K, Sched<[sched]>;
2162   def rmik : AVX512AIi8<opc, MRMSrcMem,
2163               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2164                                     u8imm:$cc),
2165               !strconcat("vpcmp", Suffix,
2166                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2167                          "$dst {${mask}}, $src1, $src2, $cc}"),
2168               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2169                                      (_.KVT
2170                                       (Frag_su:$cc
2171                                        (_.VT _.RC:$src1),
2172                                        (_.VT (_.LdFrag addr:$src2)),
2173                                        cond))))]>,
2174               EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2176   def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2177                              (_.VT _.RC:$src1), cond)),
2178             (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2179              _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2181   def : Pat<(and _.KRCWM:$mask,
2182                  (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2183                                      (_.VT _.RC:$src1), cond))),
2184             (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2185              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2186              (X86pcmpm_imm_commute $cc))>;
2189 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2190                               PatFrag Frag_su, X86FoldableSchedWrite sched,
2191                               X86VectorVTInfo _, string Name> :
2192            avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2193   def rmib : AVX512AIi8<opc, MRMSrcMem,
2194              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2195                                      u8imm:$cc),
2196              !strconcat("vpcmp", Suffix,
2197                         "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2198                         "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2199              [(set _.KRC:$dst, (_.KVT (Frag:$cc
2200                                        (_.VT _.RC:$src1),
2201                                        (_.BroadcastLdFrag addr:$src2),
2202                                        cond)))]>,
2203              EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2204   def rmibk : AVX512AIi8<opc, MRMSrcMem,
2205               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2206                                        _.ScalarMemOp:$src2, u8imm:$cc),
2207               !strconcat("vpcmp", Suffix,
2208                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2209                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2210               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2211                                      (_.KVT (Frag_su:$cc
2212                                              (_.VT _.RC:$src1),
2213                                              (_.BroadcastLdFrag addr:$src2),
2214                                              cond))))]>,
2215               EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2217   def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2218                     (_.VT _.RC:$src1), cond)),
2219             (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2220              _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2222   def : Pat<(and _.KRCWM:$mask,
2223                  (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2224                                      (_.VT _.RC:$src1), cond))),
2225             (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2226              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2227              (X86pcmpm_imm_commute $cc))>;
2230 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2231                              PatFrag Frag_su, X86SchedWriteWidths sched,
2232                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2233   let Predicates = [prd] in
2234   defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2235                           sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2237   let Predicates = [prd, HasVLX] in {
2238     defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2239                                sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2240     defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2241                                sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2242   }
2245 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2246                                  PatFrag Frag_su, X86SchedWriteWidths sched,
2247                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2248   let Predicates = [prd] in
2249   defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2250                               sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2252   let Predicates = [prd, HasVLX] in {
2253     defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2254                                    sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2255     defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2256                                    sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2257   }
2260 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2261 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2262                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2263                                 EVEX_CD8<8, CD8VF>;
2264 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2265                                  SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2266                                  EVEX_CD8<8, CD8VF>;
2268 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2269                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2270                                 REX_W, EVEX_CD8<16, CD8VF>;
2271 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2272                                  SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2273                                  REX_W, EVEX_CD8<16, CD8VF>;
2275 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2276                                     SchedWriteVecALU, avx512vl_i32_info,
2277                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2278 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2279                                      SchedWriteVecALU, avx512vl_i32_info,
2280                                      HasAVX512>, EVEX_CD8<32, CD8VF>;
2282 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2283                                     SchedWriteVecALU, avx512vl_i64_info,
2284                                     HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2285 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2286                                      SchedWriteVecALU, avx512vl_i64_info,
2287                                      HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2289 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2290                               string Name> {
2291 let Uses = [MXCSR], mayRaiseFPException = 1 in {
2292   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2293                    (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2294                    "vcmp"#_.Suffix,
2295                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2296                    (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2297                    (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2298                    1>, Sched<[sched]>;
2300   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2301                 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2302                 "vcmp"#_.Suffix,
2303                 "$cc, $src2, $src1", "$src1, $src2, $cc",
2304                 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2305                              timm:$cc),
2306                 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2307                             timm:$cc)>,
2308                 Sched<[sched.Folded, sched.ReadAfterFold]>;
2310   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2311                 (outs _.KRC:$dst),
2312                 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2313                 "vcmp"#_.Suffix,
2314                 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2315                 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2316                 (X86any_cmpm (_.VT _.RC:$src1),
2317                              (_.VT (_.BroadcastLdFrag addr:$src2)),
2318                              timm:$cc),
2319                 (X86cmpm_su (_.VT _.RC:$src1),
2320                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2321                             timm:$cc)>,
2322                 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2323   }
2325   // Patterns for selecting with loads in other operand.
2326   def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2327                          timm:$cc),
2328             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2329                                                       (X86cmpm_imm_commute timm:$cc))>;
2331   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2332                                             (_.VT _.RC:$src1),
2333                                             timm:$cc)),
2334             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2335                                                        _.RC:$src1, addr:$src2,
2336                                                        (X86cmpm_imm_commute timm:$cc))>;
2338   def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2339                          (_.VT _.RC:$src1), timm:$cc),
2340             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2341                                                        (X86cmpm_imm_commute timm:$cc))>;
2343   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2344                                             (_.VT _.RC:$src1),
2345                                             timm:$cc)),
2346             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2347                                                         _.RC:$src1, addr:$src2,
2348                                                         (X86cmpm_imm_commute timm:$cc))>;
2350   // Patterns for mask intrinsics.
2351   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2352                       (_.KVT immAllOnesV)),
2353             (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2355   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2356             (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2357                                                        _.RC:$src2, timm:$cc)>;
2359   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2360                       (_.KVT immAllOnesV)),
2361             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2363   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2364                       _.KRCWM:$mask),
2365             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2366                                                        addr:$src2, timm:$cc)>;
2368   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2369                       (_.KVT immAllOnesV)),
2370             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2372   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2373                       _.KRCWM:$mask),
2374             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2375                                                         addr:$src2, timm:$cc)>;
2377   // Patterns for mask intrinsics with loads in other operand.
2378   def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2379                       (_.KVT immAllOnesV)),
2380             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2381                                                       (X86cmpm_imm_commute timm:$cc))>;
2383   def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2384                       _.KRCWM:$mask),
2385             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2386                                                        _.RC:$src1, addr:$src2,
2387                                                        (X86cmpm_imm_commute timm:$cc))>;
2389   def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2390                       (_.KVT immAllOnesV)),
2391             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2392                                                        (X86cmpm_imm_commute timm:$cc))>;
2394   def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2395                       _.KRCWM:$mask),
2396             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2397                                                         _.RC:$src1, addr:$src2,
2398                                                         (X86cmpm_imm_commute  timm:$cc))>;
2401 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2402   // comparison code form (VCMP[EQ/LT/LE/...]
2403   let Uses = [MXCSR] in
2404   defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2405                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2406                      (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2407                      "vcmp"#_.Suffix,
2408                      "$cc, {sae}, $src2, $src1",
2409                      "$src1, $src2, {sae}, $cc",
2410                      [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2411                                         (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2412                      [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2413                                         (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2414                      EVEX_B, Sched<[sched]>;
2417 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2418                        Predicate Pred = HasAVX512> {
2419   let Predicates = [Pred] in {
2420     defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2421                 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2423   }
2424   let Predicates = [Pred,HasVLX] in {
2425    defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2426    defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2427   }
2430 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2431                           AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
2432 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2433                           AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
2434 defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2435                           AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA;
2437 // Patterns to select fp compares with load as first operand.
2438 let Predicates = [HasAVX512] in {
2439   def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2440             (VCMPSDZrmi FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2442   def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2443             (VCMPSSZrmi FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2446 let Predicates = [HasFP16] in {
2447   def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2448             (VCMPSHZrmi FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2451 // ----------------------------------------------------------------
2452 // FPClass
2454 //handle fpclass instruction  mask =  op(reg_scalar,imm)
2455 //                                    op(mem_scalar,imm)
2456 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2457                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2458                                  Predicate prd> {
2459   let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2460       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2461                       (ins _.RC:$src1, i32u8imm:$src2),
2462                       OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2463                       [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2464                               (i32 timm:$src2)))]>,
2465                       Sched<[sched]>;
2466       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2467                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2468                       OpcodeStr#_.Suffix#
2469                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2470                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2471                                       (X86Vfpclasss_su (_.VT _.RC:$src1),
2472                                       (i32 timm:$src2))))]>,
2473                       EVEX_K, Sched<[sched]>;
2474     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2475                     (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2476                     OpcodeStr#_.Suffix#
2477                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2478                     [(set _.KRC:$dst,
2479                           (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2480                                         (i32 timm:$src2)))]>,
2481                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2482     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2483                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2484                     OpcodeStr#_.Suffix#
2485                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2486                     [(set _.KRC:$dst,(and _.KRCWM:$mask,
2487                         (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2488                             (i32 timm:$src2))))]>,
2489                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2490   }
2493 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2494 //                                  fpclass(reg_vec, mem_vec, imm)
2495 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
2496 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2497                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2498                                  string mem>{
2499   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2500   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2501                       (ins _.RC:$src1, i32u8imm:$src2),
2502                       OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2503                       [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2504                                        (i32 timm:$src2)))]>,
2505                       Sched<[sched]>;
2506   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2507                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2508                       OpcodeStr#_.Suffix#
2509                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2510                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2511                                        (X86Vfpclass_su (_.VT _.RC:$src1),
2512                                        (i32 timm:$src2))))]>,
2513                       EVEX_K, Sched<[sched]>;
2514   def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2515                     (ins _.MemOp:$src1, i32u8imm:$src2),
2516                     OpcodeStr#_.Suffix#"{"#mem#"}"#
2517                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2518                     [(set _.KRC:$dst,(X86Vfpclass
2519                                      (_.VT (_.LdFrag addr:$src1)),
2520                                      (i32 timm:$src2)))]>,
2521                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2522   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2523                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2524                     OpcodeStr#_.Suffix#"{"#mem#"}"#
2525                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2526                     [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2527                                   (_.VT (_.LdFrag addr:$src1)),
2528                                   (i32 timm:$src2))))]>,
2529                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2530   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2531                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2532                     OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2533                                       _.BroadcastStr#", $dst|$dst, ${src1}"
2534                                                   #_.BroadcastStr#", $src2}",
2535                     [(set _.KRC:$dst,(X86Vfpclass
2536                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2537                                      (i32 timm:$src2)))]>,
2538                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2539   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2540                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2541                     OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2542                           _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2543                                                    _.BroadcastStr#", $src2}",
2544                     [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2545                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2546                                      (i32 timm:$src2))))]>,
2547                     EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2548   }
2550   // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2551   // the memory form.
2552   def : InstAlias<OpcodeStr#_.Suffix#mem#
2553                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2554                   (!cast<Instruction>(NAME#"rr")
2555                    _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2556   def : InstAlias<OpcodeStr#_.Suffix#mem#
2557                   "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2558                   (!cast<Instruction>(NAME#"rrk")
2559                    _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2560   def : InstAlias<OpcodeStr#_.Suffix#mem#
2561                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2562                   _.BroadcastStr#", $src2}",
2563                   (!cast<Instruction>(NAME#"rmb")
2564                    _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2565   def : InstAlias<OpcodeStr#_.Suffix#mem#
2566                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2567                   "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2568                   (!cast<Instruction>(NAME#"rmbk")
2569                    _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2572 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2573                                      bits<8> opc, X86SchedWriteWidths sched,
2574                                      Predicate prd>{
2575   let Predicates = [prd] in {
2576     defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2577                                       _.info512, "z">, EVEX_V512;
2578   }
2579   let Predicates = [prd, HasVLX] in {
2580     defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2581                                       _.info128, "x">, EVEX_V128;
2582     defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2583                                       _.info256, "y">, EVEX_V256;
2584   }
2587 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2588                                  bits<8> opcScalar, X86SchedWriteWidths sched> {
2589   defm PH : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f16_info, opcVec,
2590                                       sched, HasFP16>,
2591                                       EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2592   defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2593                                    sched.Scl, f16x_info, HasFP16>,
2594                                    EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2595   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2596                                       sched, HasDQI>,
2597                                       EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2598   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2599                                       sched, HasDQI>,
2600                                       EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W;
2601   defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2602                                    sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2603                                    EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2604   defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2605                                    sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2606                                    EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W;
2609 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2611 //-----------------------------------------------------------------
2612 // Mask register copy, including
2613 // - copy between mask registers
2614 // - load/store mask registers
2615 // - copy from GPR to mask register and vice versa
2617 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2618                           string OpcodeStr, RegisterClass KRC, ValueType vvt,
2619                           X86MemOperand x86memop, string Suffix = ""> {
2620   let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
2621       explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in
2622   def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2623                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2624                   Sched<[WriteMove]>;
2625   def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2626                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2627                     [(set KRC:$dst, (vvt (load addr:$src)))]>,
2628                   Sched<[WriteLoad]>, NoCD8;
2629   def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2630                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2631                     [(store KRC:$src, addr:$dst)]>,
2632                   Sched<[WriteStore]>, NoCD8;
2635 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2636                                string OpcodeStr, RegisterClass KRC,
2637                                RegisterClass GRC, string Suffix = ""> {
2638   let hasSideEffects = 0 in {
2639     def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2640                       !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2641                     Sched<[WriteMove]>;
2642     def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2643                       !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2644                     Sched<[WriteMove]>;
2645   }
2648 let Predicates = [HasDQI, NoEGPR] in
2649   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2650                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2651                VEX, TB, PD;
2652 let Predicates = [HasDQI, HasEGPR, In64BitMode] in
2653   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
2654                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
2655                EVEX, TB, PD;
2657 let Predicates = [HasAVX512, NoEGPR] in
2658   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2659                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2660                VEX, TB;
2661 let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
2662   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
2663                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
2664                EVEX, TB;
2666 let Predicates = [HasBWI, NoEGPR] in {
2667   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2668                VEX, TB, PD, REX_W;
2669   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2670                VEX, TB, XD;
2671   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2672                VEX, TB, REX_W;
2673   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2674                VEX, TB, XD, REX_W;
2676 let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
2677   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
2678                EVEX, TB, PD, REX_W;
2679   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
2680                EVEX, TB, XD;
2681   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
2682                EVEX, TB, REX_W;
2683   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
2684                EVEX, TB, XD, REX_W;
2687 // GR from/to mask register
2688 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2689           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2690 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2691           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2692 def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2693           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2695 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2696           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2697 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2698           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2700 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2701           (KMOVWrk VK16:$src)>;
2702 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2703           (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2704 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2705           (COPY_TO_REGCLASS VK16:$src, GR32)>;
2706 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2707           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2709 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2710           (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2711 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2712           (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2713 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2714           (COPY_TO_REGCLASS VK8:$src, GR32)>;
2715 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2716           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2718 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2719           (COPY_TO_REGCLASS GR32:$src, VK32)>;
2720 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2721           (COPY_TO_REGCLASS VK32:$src, GR32)>;
2722 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2723           (COPY_TO_REGCLASS GR64:$src, VK64)>;
2724 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2725           (COPY_TO_REGCLASS VK64:$src, GR64)>;
2727 // Load/store kreg
2728 let Predicates = [HasDQI] in {
2729   def : Pat<(v1i1 (load addr:$src)),
2730             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2731   def : Pat<(v2i1 (load addr:$src)),
2732             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2733   def : Pat<(v4i1 (load addr:$src)),
2734             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2737 let Predicates = [HasAVX512] in {
2738   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2739             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2740   def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2741             (KMOVWkm addr:$src)>;
2744 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2745                          SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2746                                               SDTCVecEltisVT<1, i1>,
2747                                               SDTCisPtrTy<2>]>>;
2749 let Predicates = [HasAVX512] in {
2750   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2751     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2752               (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2754     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2755               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2757     def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2758               (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2760     def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2761               (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2762   }
2764   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2765   defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2766   defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2767   defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2768   defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2769   defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2770   defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2772   def : Pat<(insert_subvector (v16i1 immAllZerosV),
2773                               (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2774             (KMOVWkr (AND32ri
2775                       (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2776                       (i32 1)))>;
2779 // Mask unary operation
2780 // - KNOT
2781 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2782                             RegisterClass KRC, SDPatternOperator OpNode,
2783                             X86FoldableSchedWrite sched, Predicate prd> {
2784   let Predicates = [prd] in
2785     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2786                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2787                [(set KRC:$dst, (OpNode KRC:$src))]>,
2788                Sched<[sched]>;
2791 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2792                                 SDPatternOperator OpNode,
2793                                 X86FoldableSchedWrite sched> {
2794   defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2795                             sched, HasDQI>, VEX, TB, PD;
2796   defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2797                             sched, HasAVX512>, VEX, TB;
2798   defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2799                             sched, HasBWI>, VEX, TB, PD, REX_W;
2800   defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2801                             sched, HasBWI>, VEX, TB, REX_W;
2804 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2805 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2807 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2808 let Predicates = [HasAVX512, NoDQI] in
2809 def : Pat<(vnot VK8:$src),
2810           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2812 def : Pat<(vnot VK4:$src),
2813           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2814 def : Pat<(vnot VK2:$src),
2815           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2816 def : Pat<(vnot VK1:$src),
2817           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
2819 // Mask binary operation
2820 // - KAND, KANDN, KOR, KXNOR, KXOR
2821 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2822                            RegisterClass KRC, SDPatternOperator OpNode,
2823                            X86FoldableSchedWrite sched, Predicate prd,
2824                            bit IsCommutable> {
2825   let Predicates = [prd], isCommutable = IsCommutable in
2826     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2827                !strconcat(OpcodeStr,
2828                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2829                [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2830                Sched<[sched]>;
2833 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2834                                  SDPatternOperator OpNode,
2835                                  X86FoldableSchedWrite sched, bit IsCommutable,
2836                                  Predicate prdW = HasAVX512> {
2837   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2838                              sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD;
2839   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2840                              sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB;
2841   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2842                              sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD;
2843   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2844                              sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB;
2847 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2848 defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
2849 defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
2850 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
2851 defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
2852 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
2853 defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
2855 multiclass avx512_binop_pat<SDPatternOperator VOpNode,
2856                             Instruction Inst> {
2857   // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2858   // for the DQI set, this type is legal and KxxxB instruction is used
2859   let Predicates = [NoDQI] in
2860   def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2861             (COPY_TO_REGCLASS
2862               (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2863                     (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2865   // All types smaller than 8 bits require conversion anyway
2866   def : Pat<(VOpNode VK1:$src1, VK1:$src2),
2867         (COPY_TO_REGCLASS (Inst
2868                            (COPY_TO_REGCLASS VK1:$src1, VK16),
2869                            (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2870   def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2871         (COPY_TO_REGCLASS (Inst
2872                            (COPY_TO_REGCLASS VK2:$src1, VK16),
2873                            (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
2874   def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2875         (COPY_TO_REGCLASS (Inst
2876                            (COPY_TO_REGCLASS VK4:$src1, VK16),
2877                            (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
2880 defm : avx512_binop_pat<and,   KANDWrr>;
2881 defm : avx512_binop_pat<vandn, KANDNWrr>;
2882 defm : avx512_binop_pat<or,    KORWrr>;
2883 defm : avx512_binop_pat<vxnor, KXNORWrr>;
2884 defm : avx512_binop_pat<xor,   KXORWrr>;
2886 // Mask unpacking
2887 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
2888                              X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
2889                              Predicate prd> {
2890   let Predicates = [prd] in {
2891     let hasSideEffects = 0 in
2892     def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
2893                (ins Src.KRC:$src1, Src.KRC:$src2),
2894                "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
2895                VEX, VVVV, VEX_L, Sched<[sched]>;
2897     def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
2898               (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
2899   }
2902 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, TB, PD;
2903 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB;
2904 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W;
2906 // Mask bit testing
2907 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2908                               SDNode OpNode, X86FoldableSchedWrite sched,
2909                               Predicate prd> {
2910   let Predicates = [prd], Defs = [EFLAGS] in
2911     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2912                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2913                [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
2914                Sched<[sched]>;
2917 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2918                                 X86FoldableSchedWrite sched,
2919                                 Predicate prdW = HasAVX512> {
2920   defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
2921                                                                 VEX, TB, PD;
2922   defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
2923                                                                 VEX, TB;
2924   defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
2925                                                                 VEX, TB, REX_W;
2926   defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
2927                                                                 VEX, TB, PD, REX_W;
2930 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2931 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
2932 defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
2934 // Mask shift
2935 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2936                                SDNode OpNode, X86FoldableSchedWrite sched> {
2937   let Predicates = [HasAVX512] in
2938     def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2939                  !strconcat(OpcodeStr,
2940                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2941                             [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
2942                  Sched<[sched]>;
2945 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2946                                  SDNode OpNode, X86FoldableSchedWrite sched> {
2947   defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2948                                sched>, VEX, TA, PD, REX_W;
2949   let Predicates = [HasDQI] in
2950   defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2951                                sched>, VEX, TA, PD;
2952   let Predicates = [HasBWI] in {
2953   defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2954                                sched>, VEX, TA, PD, REX_W;
2955   defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2956                                sched>, VEX, TA, PD;
2957   }
2960 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
2961 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
2963 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
2964 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2965                                                  string InstStr,
2966                                                  X86VectorVTInfo Narrow,
2967                                                  X86VectorVTInfo Wide> {
2968 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2969                                 (Narrow.VT Narrow.RC:$src2), cond)),
2970           (COPY_TO_REGCLASS
2971            (!cast<Instruction>(InstStr#"Zrri")
2972             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2973             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2974             (X86pcmpm_imm $cc)), Narrow.KRC)>;
2976 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2977                            (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
2978                                                     (Narrow.VT Narrow.RC:$src2),
2979                                                     cond)))),
2980           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
2981            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2982            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2983            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2984            (X86pcmpm_imm $cc)), Narrow.KRC)>;
2987 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2988                                                      string InstStr,
2989                                                      X86VectorVTInfo Narrow,
2990                                                      X86VectorVTInfo Wide> {
2991 // Broadcast load.
2992 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2993                                 (Narrow.BroadcastLdFrag addr:$src2), cond)),
2994           (COPY_TO_REGCLASS
2995            (!cast<Instruction>(InstStr#"Zrmib")
2996             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2997             addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
2999 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3000                            (Narrow.KVT
3001                             (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3002                                          (Narrow.BroadcastLdFrag addr:$src2),
3003                                          cond)))),
3004           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3005            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3006            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3007            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3009 // Commuted with broadcast load.
3010 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3011                                 (Narrow.VT Narrow.RC:$src1),
3012                                 cond)),
3013           (COPY_TO_REGCLASS
3014            (!cast<Instruction>(InstStr#"Zrmib")
3015             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3016             addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3018 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3019                            (Narrow.KVT
3020                             (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3021                                          (Narrow.VT Narrow.RC:$src1),
3022                                          cond)))),
3023           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3024            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3025            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3026            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3029 // Same as above, but for fp types which don't use PatFrags.
3030 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3031                                                 X86VectorVTInfo Narrow,
3032                                                 X86VectorVTInfo Wide> {
3033 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3034                                (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3035           (COPY_TO_REGCLASS
3036            (!cast<Instruction>(InstStr#"Zrri")
3037             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3038             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3039             timm:$cc), Narrow.KRC)>;
3041 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3042                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3043                                        (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3044           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3045            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3046            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3047            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3048            timm:$cc), Narrow.KRC)>;
3050 // Broadcast load.
3051 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3052                                (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3053           (COPY_TO_REGCLASS
3054            (!cast<Instruction>(InstStr#"Zrmbi")
3055             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3056             addr:$src2, timm:$cc), Narrow.KRC)>;
3058 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3059                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3060                                        (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3061           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3062            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3063            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3064            addr:$src2, timm:$cc), Narrow.KRC)>;
3066 // Commuted with broadcast load.
3067 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3068                                (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3069           (COPY_TO_REGCLASS
3070            (!cast<Instruction>(InstStr#"Zrmbi")
3071             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3072             addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3074 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3075                            (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3076                                        (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3077           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3078            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3079            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3080            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3083 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
3084   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3085   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3087   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3088   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3090   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3091   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3093   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3094   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3096   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3097   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3099   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3100   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3102   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3103   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3105   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3106   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3108   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3109   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3110   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3111   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3114 let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
3115   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3116   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3118   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3119   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3121   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3122   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3124   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3125   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3128 // Mask setting all 0s or 1s
3129 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3130   let Predicates = [HasAVX512] in
3131     let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3132         SchedRW = [WriteZero] in
3133       def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3134                      [(set KRC:$dst, (VT Val))]>;
3137 multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3138   defm W : avx512_mask_setop<VK16, v16i1, Val>;
3139   defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3140   defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3143 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3144 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3146 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3147 let Predicates = [HasAVX512] in {
3148   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3149   def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3150   def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3151   def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3152   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3153   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3154   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3155   def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3158 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3159 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3160                                              RegisterClass RC, ValueType VT> {
3161   def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3162             (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3164   def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3165             (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3167 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3168 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3169 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3170 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3171 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3172 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3174 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3175 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3176 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3177 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3178 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3180 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3181 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3182 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3183 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3185 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3186 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3187 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3189 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3190 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3192 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3194 //===----------------------------------------------------------------------===//
3195 // AVX-512 - Aligned and unaligned load and store
3198 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3199                        X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3200                        X86SchedWriteMoveLS Sched, bit NoRMPattern = 0,
3201                        SDPatternOperator SelectOprr = vselect> {
3202   let hasSideEffects = 0 in {
3203   let isMoveReg = 1 in
3204   def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3205                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3206                     _.ExeDomain>, EVEX, Sched<[Sched.RR]>;
3207   def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3208                       (ins _.KRCWM:$mask,  _.RC:$src),
3209                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3210                        "${dst} {${mask}} {z}, $src}"),
3211                        [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3212                                            (_.VT _.RC:$src),
3213                                            _.ImmAllZerosV)))], _.ExeDomain>,
3214                        EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3216   let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3217   def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3218                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3219                     !if(NoRMPattern, [],
3220                         [(set _.RC:$dst,
3221                           (_.VT (ld_frag addr:$src)))]),
3222                     _.ExeDomain>, EVEX, Sched<[Sched.RM]>;
3224   let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3225     def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3226                       (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3227                       !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3228                       "${dst} {${mask}}, $src1}"),
3229                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3230                                           (_.VT _.RC:$src1),
3231                                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3232                        EVEX, EVEX_K, Sched<[Sched.RR]>;
3233     def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3234                      (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3235                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3236                       "${dst} {${mask}}, $src1}"),
3237                      [(set _.RC:$dst, (_.VT
3238                          (vselect_mask _.KRCWM:$mask,
3239                           (_.VT (ld_frag addr:$src1)),
3240                            (_.VT _.RC:$src0))))], _.ExeDomain>,
3241                      EVEX, EVEX_K, Sched<[Sched.RM]>;
3242   }
3243   def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3244                   (ins _.KRCWM:$mask, _.MemOp:$src),
3245                   OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3246                                 "${dst} {${mask}} {z}, $src}",
3247                   [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3248                     (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3249                   _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3250   }
3251   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3252             (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3254   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3255             (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3257   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3258             (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3259              _.KRCWM:$mask, addr:$ptr)>;
3262 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3263                                  AVX512VLVectorVTInfo _, Predicate prd,
3264                                  X86SchedWriteMoveLSWidths Sched,
3265                                  bit NoRMPattern = 0> {
3266   let Predicates = [prd] in
3267   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3268                        _.info512.AlignedLdFrag, masked_load_aligned,
3269                        Sched.ZMM, NoRMPattern>, EVEX_V512;
3271   let Predicates = [prd, HasVLX] in {
3272   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3273                           _.info256.AlignedLdFrag, masked_load_aligned,
3274                           Sched.YMM, NoRMPattern>, EVEX_V256;
3275   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3276                           _.info128.AlignedLdFrag, masked_load_aligned,
3277                           Sched.XMM, NoRMPattern>, EVEX_V128;
3278   }
3281 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3282                           AVX512VLVectorVTInfo _, Predicate prd,
3283                           X86SchedWriteMoveLSWidths Sched,
3284                           bit NoRMPattern = 0,
3285                           SDPatternOperator SelectOprr = vselect> {
3286   let Predicates = [prd] in
3287   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3288                        masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512;
3290   let Predicates = [prd, HasVLX] in {
3291   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3292                          masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256;
3293   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3294                          masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128;
3295   }
3298 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3299                         X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3300                         X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> {
3301   let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3302   let isMoveReg = 1 in
3303   def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3304                          OpcodeStr # "\t{$src, $dst|$dst, $src}",
3305                          [], _.ExeDomain>, EVEX,
3306                          Sched<[Sched.RR]>;
3307   def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3308                          (ins _.KRCWM:$mask, _.RC:$src),
3309                          OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3310                          "${dst} {${mask}}, $src}",
3311                          [], _.ExeDomain>,  EVEX, EVEX_K,
3312                          Sched<[Sched.RR]>;
3313   def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3314                           (ins _.KRCWM:$mask, _.RC:$src),
3315                           OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3316                           "${dst} {${mask}} {z}, $src}",
3317                           [], _.ExeDomain>, EVEX, EVEX_KZ,
3318                           Sched<[Sched.RR]>;
3319   }
3321   let hasSideEffects = 0, mayStore = 1 in
3322   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3323                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3324                     !if(NoMRPattern, [],
3325                         [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3326                     _.ExeDomain>, EVEX, Sched<[Sched.MR]>;
3327   def mrk : AVX512PI<opc, MRMDestMem, (outs),
3328                      (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3329               OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3330                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>;
3332   def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3333            (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3334                                                         _.KRCWM:$mask, _.RC:$src)>;
3336   def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3337                   (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3338                    _.RC:$dst, _.RC:$src), 0>;
3339   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3340                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3341                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3342   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3343                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3344                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3347 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3348                             AVX512VLVectorVTInfo _, Predicate prd,
3349                             X86SchedWriteMoveLSWidths Sched,
3350                             bit NoMRPattern = 0> {
3351   let Predicates = [prd] in
3352   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3353                         masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512;
3354   let Predicates = [prd, HasVLX] in {
3355     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3356                              masked_store, Sched.YMM, NoMRPattern>, EVEX_V256;
3357     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3358                              masked_store, Sched.XMM, NoMRPattern>, EVEX_V128;
3359   }
3362 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3363                                   AVX512VLVectorVTInfo _, Predicate prd,
3364                                   X86SchedWriteMoveLSWidths Sched,
3365                                   bit NoMRPattern = 0> {
3366   let Predicates = [prd] in
3367   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3368                         masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512;
3370   let Predicates = [prd, HasVLX] in {
3371     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3372                              masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256;
3373     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3374                              masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128;
3375   }
3378 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3379                                      HasAVX512, SchedWriteFMoveLS>,
3380                avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3381                                       HasAVX512, SchedWriteFMoveLS>,
3382                TB, EVEX_CD8<32, CD8VF>;
3384 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3385                                      HasAVX512, SchedWriteFMoveLS>,
3386                avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3387                                       HasAVX512, SchedWriteFMoveLS>,
3388                TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3390 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3391                               SchedWriteFMoveLS, 0, null_frag>,
3392                avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3393                                SchedWriteFMoveLS>,
3394                                TB, EVEX_CD8<32, CD8VF>;
3396 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3397                               SchedWriteFMoveLS, 0, null_frag>,
3398                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3399                                SchedWriteFMoveLS>,
3400                TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3402 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3403                                        HasAVX512, SchedWriteVecMoveLS, 1>,
3404                  avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3405                                         HasAVX512, SchedWriteVecMoveLS, 1>,
3406                  TB, PD, EVEX_CD8<32, CD8VF>;
3408 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3409                                        HasAVX512, SchedWriteVecMoveLS>,
3410                  avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3411                                         HasAVX512, SchedWriteVecMoveLS>,
3412                  TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3414 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3415                                SchedWriteVecMoveLS, 1>,
3416                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3417                                 SchedWriteVecMoveLS, 1>,
3418                 TB, XD, EVEX_CD8<8, CD8VF>;
3420 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3421                                 SchedWriteVecMoveLS, 1>,
3422                  avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3423                                  SchedWriteVecMoveLS, 1>,
3424                  TB, XD, REX_W, EVEX_CD8<16, CD8VF>;
3426 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3427                                 SchedWriteVecMoveLS, 1, null_frag>,
3428                  avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3429                                  SchedWriteVecMoveLS, 1>,
3430                  TB, XS, EVEX_CD8<32, CD8VF>;
3432 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3433                                 SchedWriteVecMoveLS, 0, null_frag>,
3434                  avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3435                                  SchedWriteVecMoveLS>,
3436                  TB, XS, REX_W, EVEX_CD8<64, CD8VF>;
3438 // Special instructions to help with spilling when we don't have VLX. We need
3439 // to load or store from a ZMM register instead. These are converted in
3440 // expandPostRAPseudos.
3441 let isReMaterializable = 1, canFoldAsLoad = 1,
3442     isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3443 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3444                             "", []>, Sched<[WriteFLoadX]>;
3445 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3446                             "", []>, Sched<[WriteFLoadY]>;
3447 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3448                             "", []>, Sched<[WriteFLoadX]>;
3449 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3450                             "", []>, Sched<[WriteFLoadY]>;
3453 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3454 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3455                             "", []>, Sched<[WriteFStoreX]>;
3456 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3457                             "", []>, Sched<[WriteFStoreY]>;
3458 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3459                             "", []>, Sched<[WriteFStoreX]>;
3460 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3461                             "", []>, Sched<[WriteFStoreY]>;
3464 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3465                           (v8i64 VR512:$src))),
3466    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3467                                               VK8), VR512:$src)>;
3469 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3470                            (v16i32 VR512:$src))),
3471                   (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3473 // These patterns exist to prevent the above patterns from introducing a second
3474 // mask inversion when one already exists.
3475 def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3476                           (v8i64 immAllZerosV),
3477                           (v8i64 VR512:$src))),
3478                  (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3479 def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3480                            (v16i32 immAllZerosV),
3481                            (v16i32 VR512:$src))),
3482                   (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3484 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3485                               X86VectorVTInfo Wide> {
3486  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3487                                Narrow.RC:$src1, Narrow.RC:$src0)),
3488            (EXTRACT_SUBREG
3489             (Wide.VT
3490              (!cast<Instruction>(InstrStr#"rrk")
3491               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3492               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3493               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3494             Narrow.SubRegIdx)>;
3496  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3497                                Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3498            (EXTRACT_SUBREG
3499             (Wide.VT
3500              (!cast<Instruction>(InstrStr#"rrkz")
3501               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3502               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3503             Narrow.SubRegIdx)>;
3506 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3507 // available. Use a 512-bit operation and extract.
3508 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
3509   defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3510   defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3511   defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3512   defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3514   defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3515   defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3516   defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3517   defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3520 let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
3521   defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3522   defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3524   defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3525   defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3527   defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
3528   defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
3530   defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
3531   defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
3534 let Predicates = [HasAVX512] in {
3535   // 512-bit load.
3536   def : Pat<(alignedloadv16i32 addr:$src),
3537             (VMOVDQA64Zrm addr:$src)>;
3538   def : Pat<(alignedloadv32i16 addr:$src),
3539             (VMOVDQA64Zrm addr:$src)>;
3540   def : Pat<(alignedloadv32f16 addr:$src),
3541             (VMOVAPSZrm addr:$src)>;
3542   def : Pat<(alignedloadv32bf16 addr:$src),
3543             (VMOVAPSZrm addr:$src)>;
3544   def : Pat<(alignedloadv64i8 addr:$src),
3545             (VMOVDQA64Zrm addr:$src)>;
3546   def : Pat<(loadv16i32 addr:$src),
3547             (VMOVDQU64Zrm addr:$src)>;
3548   def : Pat<(loadv32i16 addr:$src),
3549             (VMOVDQU64Zrm addr:$src)>;
3550   def : Pat<(loadv32f16 addr:$src),
3551             (VMOVUPSZrm addr:$src)>;
3552   def : Pat<(loadv32bf16 addr:$src),
3553             (VMOVUPSZrm addr:$src)>;
3554   def : Pat<(loadv64i8 addr:$src),
3555             (VMOVDQU64Zrm addr:$src)>;
3557   // 512-bit store.
3558   def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3559             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3560   def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3561             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3562   def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3563             (VMOVAPSZmr addr:$dst, VR512:$src)>;
3564   def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
3565             (VMOVAPSZmr addr:$dst, VR512:$src)>;
3566   def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3567             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3568   def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3569             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3570   def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3571             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3572   def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3573             (VMOVUPSZmr addr:$dst, VR512:$src)>;
3574   def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
3575             (VMOVUPSZmr addr:$dst, VR512:$src)>;
3576   def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3577             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3580 let Predicates = [HasVLX] in {
3581   // 128-bit load.
3582   def : Pat<(alignedloadv4i32 addr:$src),
3583             (VMOVDQA64Z128rm addr:$src)>;
3584   def : Pat<(alignedloadv8i16 addr:$src),
3585             (VMOVDQA64Z128rm addr:$src)>;
3586   def : Pat<(alignedloadv8f16 addr:$src),
3587             (VMOVAPSZ128rm addr:$src)>;
3588   def : Pat<(alignedloadv8bf16 addr:$src),
3589             (VMOVAPSZ128rm addr:$src)>;
3590   def : Pat<(alignedloadv16i8 addr:$src),
3591             (VMOVDQA64Z128rm addr:$src)>;
3592   def : Pat<(loadv4i32 addr:$src),
3593             (VMOVDQU64Z128rm addr:$src)>;
3594   def : Pat<(loadv8i16 addr:$src),
3595             (VMOVDQU64Z128rm addr:$src)>;
3596   def : Pat<(loadv8f16 addr:$src),
3597             (VMOVUPSZ128rm addr:$src)>;
3598   def : Pat<(loadv8bf16 addr:$src),
3599             (VMOVUPSZ128rm addr:$src)>;
3600   def : Pat<(loadv16i8 addr:$src),
3601             (VMOVDQU64Z128rm addr:$src)>;
3603   // 128-bit store.
3604   def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3605             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3606   def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3607             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3608   def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3609             (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3610   def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
3611             (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3612   def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3613             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3614   def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3615             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3616   def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3617             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3618   def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3619             (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3620   def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
3621             (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3622   def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3623             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3625   // 256-bit load.
3626   def : Pat<(alignedloadv8i32 addr:$src),
3627             (VMOVDQA64Z256rm addr:$src)>;
3628   def : Pat<(alignedloadv16i16 addr:$src),
3629             (VMOVDQA64Z256rm addr:$src)>;
3630   def : Pat<(alignedloadv16f16 addr:$src),
3631             (VMOVAPSZ256rm addr:$src)>;
3632   def : Pat<(alignedloadv16bf16 addr:$src),
3633             (VMOVAPSZ256rm addr:$src)>;
3634   def : Pat<(alignedloadv32i8 addr:$src),
3635             (VMOVDQA64Z256rm addr:$src)>;
3636   def : Pat<(loadv8i32 addr:$src),
3637             (VMOVDQU64Z256rm addr:$src)>;
3638   def : Pat<(loadv16i16 addr:$src),
3639             (VMOVDQU64Z256rm addr:$src)>;
3640   def : Pat<(loadv16f16 addr:$src),
3641             (VMOVUPSZ256rm addr:$src)>;
3642   def : Pat<(loadv16bf16 addr:$src),
3643             (VMOVUPSZ256rm addr:$src)>;
3644   def : Pat<(loadv32i8 addr:$src),
3645             (VMOVDQU64Z256rm addr:$src)>;
3647   // 256-bit store.
3648   def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3649             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3650   def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3651             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3652   def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3653             (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3654   def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
3655             (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3656   def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3657             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3658   def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3659             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3660   def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3661             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3662   def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3663             (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3664   def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
3665             (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3666   def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3667             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3670 multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
3671 let Predicates = [HasBWI] in {
3672   def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
3673             (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3674   def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
3675             (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3676   def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3677                      (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3678             (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3679   def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3680                      (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
3681             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3682   def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3683                      (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3684             (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3685   def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3686                      (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
3687             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3688   def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
3689             (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3690   def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
3691             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3692   def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
3693             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3695   def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
3696             (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3698 let Predicates = [HasBWI, HasVLX] in {
3699   def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
3700             (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3701   def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
3702             (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3703   def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3704                      (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3705             (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3706   def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3707                      (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
3708             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3709   def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3710                      (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3711             (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3712   def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3713                      (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
3714             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3715   def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
3716             (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3717   def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
3718             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3719   def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
3720             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3722   def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
3723             (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3725   def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
3726             (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3727   def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
3728             (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3729   def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3730                      (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3731             (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3732   def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3733                      (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
3734             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3735   def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3736                      (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3737             (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3738   def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3739                      (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
3740             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3741   def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
3742             (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3743   def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
3744             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3745   def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
3746             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3748   def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
3749             (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3753 defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
3754 defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;
3756 // Move Int Doubleword to Packed Double Int
3758 let ExeDomain = SSEPackedInt in {
3759 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3760                       "vmovd\t{$src, $dst|$dst, $src}",
3761                       [(set VR128X:$dst,
3762                         (v4i32 (scalar_to_vector GR32:$src)))]>,
3763                         EVEX, Sched<[WriteVecMoveFromGpr]>;
3764 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3765                       "vmovd\t{$src, $dst|$dst, $src}",
3766                       [(set VR128X:$dst,
3767                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3768                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3769 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3770                       "vmovq\t{$src, $dst|$dst, $src}",
3771                         [(set VR128X:$dst,
3772                           (v2i64 (scalar_to_vector GR64:$src)))]>,
3773                       EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3774 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3775 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3776                       (ins i64mem:$src),
3777                       "vmovq\t{$src, $dst|$dst, $src}", []>,
3778                       EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3779 let isCodeGenOnly = 1 in {
3780 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3781                        "vmovq\t{$src, $dst|$dst, $src}",
3782                        [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3783                        EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3784 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3785                          "vmovq\t{$src, $dst|$dst, $src}",
3786                          [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3787                          EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3789 } // ExeDomain = SSEPackedInt
3791 // Move Int Doubleword to Single Scalar
3793 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3794 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3795                       "vmovd\t{$src, $dst|$dst, $src}",
3796                       [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3797                       EVEX, Sched<[WriteVecMoveFromGpr]>;
3798 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3800 // Move doubleword from xmm register to r/m32
3802 let ExeDomain = SSEPackedInt in {
3803 def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3804                        "vmovd\t{$src, $dst|$dst, $src}",
3805                        [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3806                                         (iPTR 0)))]>,
3807                        EVEX, Sched<[WriteVecMoveToGpr]>;
3808 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3809                        (ins i32mem:$dst, VR128X:$src),
3810                        "vmovd\t{$src, $dst|$dst, $src}",
3811                        [(store (i32 (extractelt (v4i32 VR128X:$src),
3812                                      (iPTR 0))), addr:$dst)]>,
3813                        EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3814 } // ExeDomain = SSEPackedInt
3816 // Move quadword from xmm1 register to r/m64
3818 let ExeDomain = SSEPackedInt in {
3819 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3820                       "vmovq\t{$src, $dst|$dst, $src}",
3821                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3822                                                    (iPTR 0)))]>,
3823                       TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
3824                       Requires<[HasAVX512]>;
3826 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3827 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3828                       "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD,
3829                       EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>,
3830                       Requires<[HasAVX512, In64BitMode]>;
3832 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3833                       (ins i64mem:$dst, VR128X:$src),
3834                       "vmovq\t{$src, $dst|$dst, $src}",
3835                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3836                               addr:$dst)]>,
3837                       EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>,
3838                       Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3840 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3841 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3842                              (ins VR128X:$src),
3843                              "vmovq\t{$src, $dst|$dst, $src}", []>,
3844                              EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>;
3845 } // ExeDomain = SSEPackedInt
3847 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3848                 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3850 let Predicates = [HasAVX512] in {
3851   def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3852             (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3855 // Move Scalar Single to Double Int
3857 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3858 def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3859                       (ins FR32X:$src),
3860                       "vmovd\t{$src, $dst|$dst, $src}",
3861                       [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3862                       EVEX, Sched<[WriteVecMoveToGpr]>;
3863 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3865 // Move Quadword Int to Packed Quadword Int
3867 let ExeDomain = SSEPackedInt in {
3868 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3869                       (ins i64mem:$src),
3870                       "vmovq\t{$src, $dst|$dst, $src}",
3871                       [(set VR128X:$dst,
3872                         (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3873                       EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3874 } // ExeDomain = SSEPackedInt
3876 // Allow "vmovd" but print "vmovq".
3877 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3878                 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3879 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3880                 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3882 // Conversions between masks and scalar fp.
3883 def : Pat<(v32i1 (bitconvert FR32X:$src)),
3884           (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3885 def : Pat<(f32 (bitconvert VK32:$src)),
3886           (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3888 def : Pat<(v64i1 (bitconvert FR64X:$src)),
3889           (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3890 def : Pat<(f64 (bitconvert VK64:$src)),
3891           (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3893 //===----------------------------------------------------------------------===//
3894 // AVX-512  MOVSH, MOVSS, MOVSD
3895 //===----------------------------------------------------------------------===//
3897 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3898                               X86VectorVTInfo _, Predicate prd = HasAVX512> {
3899   let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
3900   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3901              (ins _.RC:$src1, _.RC:$src2),
3902              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3903              [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3904              _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
3905   let Predicates = [prd] in {
3906   def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3907               (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3908               !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3909               "$dst {${mask}} {z}, $src1, $src2}"),
3910               [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3911                                       (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3912                                       _.ImmAllZerosV)))],
3913               _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3914   let Constraints = "$src0 = $dst"  in
3915   def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3916              (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3917              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3918              "$dst {${mask}}, $src1, $src2}"),
3919              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3920                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3921                                      (_.VT _.RC:$src0))))],
3922              _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3923   let canFoldAsLoad = 1, isReMaterializable = 1 in {
3924   def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3925              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3926              [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3927              _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3928   // _alt version uses FR32/FR64 register class.
3929   let isCodeGenOnly = 1 in
3930   def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3931                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3932                  [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3933                  _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3934   }
3935   let mayLoad = 1, hasSideEffects = 0 in {
3936     let Constraints = "$src0 = $dst" in
3937     def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3938                (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3939                !strconcat(asm, "\t{$src, $dst {${mask}}|",
3940                "$dst {${mask}}, $src}"),
3941                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3942     def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3943                (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3944                !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3945                "$dst {${mask}} {z}, $src}"),
3946                [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3947   }
3948   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3949              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3950              [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3951              EVEX, Sched<[WriteFStore]>;
3952   let mayStore = 1, hasSideEffects = 0 in
3953   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3954               (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3955               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3956               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>;
3957   }
3960 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3961                                   VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>;
3963 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3964                                   VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
3966 defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
3967                                   HasFP16>,
3968                                   VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
3970 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3971                                        PatLeaf ZeroFP, X86VectorVTInfo _> {
3973 def : Pat<(_.VT (OpNode _.RC:$src0,
3974                         (_.VT (scalar_to_vector
3975                                   (_.EltVT (X86selects VK1WM:$mask,
3976                                                        (_.EltVT _.FRC:$src1),
3977                                                        (_.EltVT _.FRC:$src2))))))),
3978           (!cast<Instruction>(InstrStr#rrk)
3979                         (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3980                         VK1WM:$mask,
3981                         (_.VT _.RC:$src0),
3982                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3984 def : Pat<(_.VT (OpNode _.RC:$src0,
3985                         (_.VT (scalar_to_vector
3986                                   (_.EltVT (X86selects VK1WM:$mask,
3987                                                        (_.EltVT _.FRC:$src1),
3988                                                        (_.EltVT ZeroFP))))))),
3989           (!cast<Instruction>(InstrStr#rrkz)
3990                         VK1WM:$mask,
3991                         (_.VT _.RC:$src0),
3992                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3995 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3996                                         dag Mask, RegisterClass MaskRC> {
3998 def : Pat<(masked_store
3999              (_.info512.VT (insert_subvector undef,
4000                                (_.info128.VT _.info128.RC:$src),
4001                                (iPTR 0))), addr:$dst, Mask),
4002           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4003                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4004                       _.info128.RC:$src)>;
4008 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4009                                                AVX512VLVectorVTInfo _,
4010                                                dag Mask, RegisterClass MaskRC,
4011                                                SubRegIndex subreg> {
4013 def : Pat<(masked_store
4014              (_.info512.VT (insert_subvector undef,
4015                                (_.info128.VT _.info128.RC:$src),
4016                                (iPTR 0))), addr:$dst, Mask),
4017           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4018                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4019                       _.info128.RC:$src)>;
4023 // This matches the more recent codegen from clang that avoids emitting a 512
4024 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4025 // bits on AVX512F only targets.
4026 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4027                                                AVX512VLVectorVTInfo _,
4028                                                dag Mask512, dag Mask128,
4029                                                RegisterClass MaskRC,
4030                                                SubRegIndex subreg> {
4032 // AVX512F pattern.
4033 def : Pat<(masked_store
4034              (_.info512.VT (insert_subvector undef,
4035                                (_.info128.VT _.info128.RC:$src),
4036                                (iPTR 0))), addr:$dst, Mask512),
4037           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4038                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4039                       _.info128.RC:$src)>;
4041 // AVX512VL pattern.
4042 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4043           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4044                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4045                       _.info128.RC:$src)>;
4048 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4049                                        dag Mask, RegisterClass MaskRC> {
4051 def : Pat<(_.info128.VT (extract_subvector
4052                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4053                                         _.info512.ImmAllZerosV)),
4054                            (iPTR 0))),
4055           (!cast<Instruction>(InstrStr#rmkz)
4056                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4057                       addr:$srcAddr)>;
4059 def : Pat<(_.info128.VT (extract_subvector
4060                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4061                       (_.info512.VT (insert_subvector undef,
4062                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4063                             (iPTR 0))))),
4064                 (iPTR 0))),
4065           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4066                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4067                       addr:$srcAddr)>;
4071 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4072                                               AVX512VLVectorVTInfo _,
4073                                               dag Mask, RegisterClass MaskRC,
4074                                               SubRegIndex subreg> {
4076 def : Pat<(_.info128.VT (extract_subvector
4077                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4078                                         _.info512.ImmAllZerosV)),
4079                            (iPTR 0))),
4080           (!cast<Instruction>(InstrStr#rmkz)
4081                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4082                       addr:$srcAddr)>;
4084 def : Pat<(_.info128.VT (extract_subvector
4085                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4086                       (_.info512.VT (insert_subvector undef,
4087                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4088                             (iPTR 0))))),
4089                 (iPTR 0))),
4090           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4091                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4092                       addr:$srcAddr)>;
4096 // This matches the more recent codegen from clang that avoids emitting a 512
4097 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4098 // bits on AVX512F only targets.
4099 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4100                                               AVX512VLVectorVTInfo _,
4101                                               dag Mask512, dag Mask128,
4102                                               RegisterClass MaskRC,
4103                                               SubRegIndex subreg> {
4104 // AVX512F patterns.
4105 def : Pat<(_.info128.VT (extract_subvector
4106                          (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4107                                         _.info512.ImmAllZerosV)),
4108                            (iPTR 0))),
4109           (!cast<Instruction>(InstrStr#rmkz)
4110                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4111                       addr:$srcAddr)>;
4113 def : Pat<(_.info128.VT (extract_subvector
4114                 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4115                       (_.info512.VT (insert_subvector undef,
4116                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4117                             (iPTR 0))))),
4118                 (iPTR 0))),
4119           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4120                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4121                       addr:$srcAddr)>;
4123 // AVX512Vl patterns.
4124 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4125                          _.info128.ImmAllZerosV)),
4126           (!cast<Instruction>(InstrStr#rmkz)
4127                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4128                       addr:$srcAddr)>;
4130 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4131                          (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4132           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4133                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4134                       addr:$srcAddr)>;
4137 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4138 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4140 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4141                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4142 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4143                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4144 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4145                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4147 let Predicates = [HasFP16] in {
4148 defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4149 defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4150                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4151 defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4152                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4153 defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4154                    (v32i1 (insert_subvector
4155                            (v32i1 immAllZerosV),
4156                            (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4157                            (iPTR 0))),
4158                    (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4159                    GR8, sub_8bit>;
4161 defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4162                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4163 defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4164                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4165 defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4166                    (v32i1 (insert_subvector
4167                            (v32i1 immAllZerosV),
4168                            (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4169                            (iPTR 0))),
4170                    (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4171                    GR8, sub_8bit>;
4173 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4174           (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4175            (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4176            VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4177            (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4179 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4180           (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4181            (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4184 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4185                    (v16i1 (insert_subvector
4186                            (v16i1 immAllZerosV),
4187                            (v4i1 (extract_subvector
4188                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4189                                   (iPTR 0))),
4190                            (iPTR 0))),
4191                    (v4i1 (extract_subvector
4192                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4193                           (iPTR 0))), GR8, sub_8bit>;
4194 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4195                    (v8i1
4196                     (extract_subvector
4197                      (v16i1
4198                       (insert_subvector
4199                        (v16i1 immAllZerosV),
4200                        (v2i1 (extract_subvector
4201                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4202                               (iPTR 0))),
4203                        (iPTR 0))),
4204                      (iPTR 0))),
4205                    (v2i1 (extract_subvector
4206                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4207                           (iPTR 0))), GR8, sub_8bit>;
4209 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4210                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4211 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4212                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4213 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4214                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4216 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4217                    (v16i1 (insert_subvector
4218                            (v16i1 immAllZerosV),
4219                            (v4i1 (extract_subvector
4220                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4221                                   (iPTR 0))),
4222                            (iPTR 0))),
4223                    (v4i1 (extract_subvector
4224                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4225                           (iPTR 0))), GR8, sub_8bit>;
4226 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4227                    (v8i1
4228                     (extract_subvector
4229                      (v16i1
4230                       (insert_subvector
4231                        (v16i1 immAllZerosV),
4232                        (v2i1 (extract_subvector
4233                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4234                               (iPTR 0))),
4235                        (iPTR 0))),
4236                      (iPTR 0))),
4237                    (v2i1 (extract_subvector
4238                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4239                           (iPTR 0))), GR8, sub_8bit>;
4241 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4242           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4243            (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4244            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4245            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4247 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4248           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4249            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4251 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4252           (COPY_TO_REGCLASS
4253            (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4254                                                        VK1WM:$mask, addr:$src)),
4255            FR32X)>;
4256 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4257           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4259 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4260           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4261            (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4262            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4263            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4265 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4266           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4267            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4269 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4270           (COPY_TO_REGCLASS
4271            (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4272                                                        VK1WM:$mask, addr:$src)),
4273            FR64X)>;
4274 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4275           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4278 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4279           (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4280 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4281           (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4283 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4284           (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4285 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4286           (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4288 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4289   let Predicates = [HasFP16] in {
4290     def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4291         (ins VR128X:$src1, VR128X:$src2),
4292         "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4293         []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4294         Sched<[SchedWriteFShuffle.XMM]>;
4296     let Constraints = "$src0 = $dst" in
4297     def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4298         (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4299          VR128X:$src1, VR128X:$src2),
4300         "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4301           "$dst {${mask}}, $src1, $src2}",
4302         []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG,
4303         Sched<[SchedWriteFShuffle.XMM]>;
4305     def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4306         (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4307         "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4308           "$dst {${mask}} {z}, $src1, $src2}",
4309         []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4310         Sched<[SchedWriteFShuffle.XMM]>;
4311   }
4312   def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4313                            (ins VR128X:$src1, VR128X:$src2),
4314                            "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4315                            []>, TB, XS, EVEX, VVVV, VEX_LIG,
4316                            Sched<[SchedWriteFShuffle.XMM]>;
4318   let Constraints = "$src0 = $dst" in
4319   def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4320                              (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4321                                                    VR128X:$src1, VR128X:$src2),
4322                              "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4323                                         "$dst {${mask}}, $src1, $src2}",
4324                              []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG,
4325                              Sched<[SchedWriteFShuffle.XMM]>;
4327   def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4328                          (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4329                          "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4330                                     "$dst {${mask}} {z}, $src1, $src2}",
4331                          []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG,
4332                          Sched<[SchedWriteFShuffle.XMM]>;
4334   def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4335                            (ins VR128X:$src1, VR128X:$src2),
4336                            "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4337                            []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W,
4338                            Sched<[SchedWriteFShuffle.XMM]>;
4340   let Constraints = "$src0 = $dst" in
4341   def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4342                              (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4343                                                    VR128X:$src1, VR128X:$src2),
4344                              "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4345                                         "$dst {${mask}}, $src1, $src2}",
4346                              []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG,
4347                              REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4349   def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4350                               (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4351                                                           VR128X:$src2),
4352                               "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4353                                          "$dst {${mask}} {z}, $src1, $src2}",
4354                               []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG,
4355                               REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4358 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4359                 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4360 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4361                              "$dst {${mask}}, $src1, $src2}",
4362                 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4363                                 VR128X:$src1, VR128X:$src2), 0>;
4364 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4365                              "$dst {${mask}} {z}, $src1, $src2}",
4366                 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4367                                  VR128X:$src1, VR128X:$src2), 0>;
4368 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4369                 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4370 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4371                              "$dst {${mask}}, $src1, $src2}",
4372                 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4373                                 VR128X:$src1, VR128X:$src2), 0>;
4374 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4375                              "$dst {${mask}} {z}, $src1, $src2}",
4376                 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4377                                  VR128X:$src1, VR128X:$src2), 0>;
4378 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4379                 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4380 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4381                              "$dst {${mask}}, $src1, $src2}",
4382                 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4383                                 VR128X:$src1, VR128X:$src2), 0>;
4384 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4385                              "$dst {${mask}} {z}, $src1, $src2}",
4386                 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4387                                  VR128X:$src1, VR128X:$src2), 0>;
4389 let Predicates = [HasAVX512, OptForSize] in {
4390   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4391             (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4392   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4393             (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4395   // Move low f32 and clear high bits.
4396   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4397             (SUBREG_TO_REG (i32 0),
4398              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4399               (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4400   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4401             (SUBREG_TO_REG (i32 0),
4402              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4403               (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4405   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4406             (SUBREG_TO_REG (i32 0),
4407              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4408               (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4409   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4410             (SUBREG_TO_REG (i32 0),
4411              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4412               (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4415 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4416 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4417 let Predicates = [HasAVX512, OptForSpeed] in {
4418   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4419             (SUBREG_TO_REG (i32 0),
4420              (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4421                           (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4422                           (i8 1))), sub_xmm)>;
4423   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4424             (SUBREG_TO_REG (i32 0),
4425              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4426                           (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4427                           (i8 3))), sub_xmm)>;
4430 let Predicates = [HasAVX512] in {
4431   def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4432             (VMOVSSZrm addr:$src)>;
4433   def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4434             (VMOVSDZrm addr:$src)>;
4436   // Represent the same patterns above but in the form they appear for
4437   // 256-bit types
4438   def : Pat<(v8f32 (X86vzload32 addr:$src)),
4439             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4440   def : Pat<(v4f64 (X86vzload64 addr:$src)),
4441             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4443   // Represent the same patterns above but in the form they appear for
4444   // 512-bit types
4445   def : Pat<(v16f32 (X86vzload32 addr:$src)),
4446             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4447   def : Pat<(v8f64 (X86vzload64 addr:$src)),
4448             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4450 let Predicates = [HasFP16] in {
4451   def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4452             (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4453   def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
4454             (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;
4456   // FIXME we need better canonicalization in dag combine
4457   def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4458             (SUBREG_TO_REG (i32 0),
4459              (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4460               (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4461   def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
4462             (SUBREG_TO_REG (i32 0),
4463              (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4464               (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4466   // FIXME we need better canonicalization in dag combine
4467   def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4468             (SUBREG_TO_REG (i32 0),
4469              (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4470               (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4471   def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
4472             (SUBREG_TO_REG (i32 0),
4473              (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4474               (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;
4476   def : Pat<(v8f16 (X86vzload16 addr:$src)),
4477             (VMOVSHZrm addr:$src)>;
4479   def : Pat<(v16f16 (X86vzload16 addr:$src)),
4480             (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4482   def : Pat<(v32f16 (X86vzload16 addr:$src)),
4483             (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4486 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4487 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4488                                 (ins VR128X:$src),
4489                                 "vmovq\t{$src, $dst|$dst, $src}",
4490                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
4491                                                    (v2i64 VR128X:$src))))]>,
4492                                 EVEX, REX_W;
4495 let Predicates = [HasAVX512] in {
4496   def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
4497             (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
4498                                               GR8:$src, sub_8bit)))>;
4499   def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4500             (VMOVDI2PDIZrr GR32:$src)>;
4502   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4503             (VMOV64toPQIZrr GR64:$src)>;
4505   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4506   def : Pat<(v4i32 (X86vzload32 addr:$src)),
4507             (VMOVDI2PDIZrm addr:$src)>;
4508   def : Pat<(v8i32 (X86vzload32 addr:$src)),
4509             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4510   def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4511             (VMOVZPQILo2PQIZrr VR128X:$src)>;
4512   def : Pat<(v2i64 (X86vzload64 addr:$src)),
4513             (VMOVQI2PQIZrm addr:$src)>;
4514   def : Pat<(v4i64 (X86vzload64 addr:$src)),
4515             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4517   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4518   def : Pat<(v16i32 (X86vzload32 addr:$src)),
4519             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4520   def : Pat<(v8i64 (X86vzload64 addr:$src)),
4521             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4523   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4524             (SUBREG_TO_REG (i32 0),
4525              (v2f64 (VMOVZPQILo2PQIZrr
4526                      (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4527              sub_xmm)>;
4528   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4529             (SUBREG_TO_REG (i32 0),
4530              (v2i64 (VMOVZPQILo2PQIZrr
4531                      (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4532              sub_xmm)>;
4534   def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4535             (SUBREG_TO_REG (i32 0),
4536              (v2f64 (VMOVZPQILo2PQIZrr
4537                      (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4538              sub_xmm)>;
4539   def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4540             (SUBREG_TO_REG (i32 0),
4541              (v2i64 (VMOVZPQILo2PQIZrr
4542                      (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4543              sub_xmm)>;
4546 //===----------------------------------------------------------------------===//
4547 // AVX-512 - Non-temporals
4548 //===----------------------------------------------------------------------===//
4550 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4551                       (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4552                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4553                       EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4555 let Predicates = [HasVLX] in {
4556   def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4557                        (ins i256mem:$src),
4558                        "vmovntdqa\t{$src, $dst|$dst, $src}",
4559                        [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4560                        EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4562   def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4563                       (ins i128mem:$src),
4564                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4565                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4566                       EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4569 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4570                         X86SchedWriteMoveLS Sched,
4571                         PatFrag st_frag = alignednontemporalstore> {
4572   let SchedRW = [Sched.MR], AddedComplexity = 400 in
4573   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4574                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4575                     [(st_frag (_.VT _.RC:$src), addr:$dst)],
4576                     _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4579 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4580                            AVX512VLVectorVTInfo VTInfo,
4581                            X86SchedWriteMoveLSWidths Sched> {
4582   let Predicates = [HasAVX512] in
4583     defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4585   let Predicates = [HasAVX512, HasVLX] in {
4586     defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4587     defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4588   }
4591 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4592                                 SchedWriteVecMoveLSNT>, TB, PD;
4593 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4594                                 SchedWriteFMoveLSNT>, TB, PD, REX_W;
4595 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4596                                 SchedWriteFMoveLSNT>, TB;
4598 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4599   def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4600             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4601   def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4602             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4603   def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4604             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4606   def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4607             (VMOVNTDQAZrm addr:$src)>;
4608   def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4609             (VMOVNTDQAZrm addr:$src)>;
4610   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4611             (VMOVNTDQAZrm addr:$src)>;
4612   def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4613             (VMOVNTDQAZrm addr:$src)>;
4614   def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4615             (VMOVNTDQAZrm addr:$src)>;
4616   def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4617             (VMOVNTDQAZrm addr:$src)>;
4620 let Predicates = [HasVLX], AddedComplexity = 400 in {
4621   def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4622             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4623   def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4624             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4625   def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4626             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4628   def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4629             (VMOVNTDQAZ256rm addr:$src)>;
4630   def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4631             (VMOVNTDQAZ256rm addr:$src)>;
4632   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4633             (VMOVNTDQAZ256rm addr:$src)>;
4634   def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4635             (VMOVNTDQAZ256rm addr:$src)>;
4636   def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4637             (VMOVNTDQAZ256rm addr:$src)>;
4638   def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4639             (VMOVNTDQAZ256rm addr:$src)>;
4641   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4642             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4643   def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4644             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4645   def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4646             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4648   def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4649             (VMOVNTDQAZ128rm addr:$src)>;
4650   def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4651             (VMOVNTDQAZ128rm addr:$src)>;
4652   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4653             (VMOVNTDQAZ128rm addr:$src)>;
4654   def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4655             (VMOVNTDQAZ128rm addr:$src)>;
4656   def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4657             (VMOVNTDQAZ128rm addr:$src)>;
4658   def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4659             (VMOVNTDQAZ128rm addr:$src)>;
4662 //===----------------------------------------------------------------------===//
4663 // AVX-512 - Integer arithmetic
4665 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4666                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4667                            bit IsCommutable = 0> {
4668   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4669                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4670                     "$src2, $src1", "$src1, $src2",
4671                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4672                     IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV,
4673                     Sched<[sched]>;
4675   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4676                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4677                   "$src2, $src1", "$src1, $src2",
4678                   (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4679                   AVX512BIBase, EVEX, VVVV,
4680                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4683 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4684                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
4685                             bit IsCommutable = 0> :
4686            avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4687   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4688                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4689                   "${src2}"#_.BroadcastStr#", $src1",
4690                   "$src1, ${src2}"#_.BroadcastStr,
4691                   (_.VT (OpNode _.RC:$src1,
4692                                 (_.BroadcastLdFrag addr:$src2)))>,
4693                   AVX512BIBase, EVEX, VVVV, EVEX_B,
4694                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4697 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4698                               AVX512VLVectorVTInfo VTInfo,
4699                               X86SchedWriteWidths sched, Predicate prd,
4700                               bit IsCommutable = 0> {
4701   let Predicates = [prd] in
4702     defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4703                              IsCommutable>, EVEX_V512;
4705   let Predicates = [prd, HasVLX] in {
4706     defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4707                                 sched.YMM, IsCommutable>, EVEX_V256;
4708     defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4709                                 sched.XMM, IsCommutable>, EVEX_V128;
4710   }
4713 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4714                                AVX512VLVectorVTInfo VTInfo,
4715                                X86SchedWriteWidths sched, Predicate prd,
4716                                bit IsCommutable = 0> {
4717   let Predicates = [prd] in
4718     defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4719                              IsCommutable>, EVEX_V512;
4721   let Predicates = [prd, HasVLX] in {
4722     defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4723                                  sched.YMM, IsCommutable>, EVEX_V256;
4724     defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4725                                  sched.XMM, IsCommutable>, EVEX_V128;
4726   }
4729 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4730                                 X86SchedWriteWidths sched, Predicate prd,
4731                                 bit IsCommutable = 0> {
4732   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4733                                   sched, prd, IsCommutable>,
4734                                   REX_W, EVEX_CD8<64, CD8VF>;
4737 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4738                                 X86SchedWriteWidths sched, Predicate prd,
4739                                 bit IsCommutable = 0> {
4740   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4741                                   sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4744 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4745                                 X86SchedWriteWidths sched, Predicate prd,
4746                                 bit IsCommutable = 0> {
4747   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4748                                  sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4749                                  WIG;
4752 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4753                                 X86SchedWriteWidths sched, Predicate prd,
4754                                 bit IsCommutable = 0> {
4755   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4756                                  sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4757                                  WIG;
4760 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4761                                  SDNode OpNode, X86SchedWriteWidths sched,
4762                                  Predicate prd, bit IsCommutable = 0> {
4763   defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4764                                    IsCommutable>;
4766   defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4767                                    IsCommutable>;
4770 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4771                                  SDNode OpNode, X86SchedWriteWidths sched,
4772                                  Predicate prd, bit IsCommutable = 0> {
4773   defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4774                                    IsCommutable>;
4776   defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4777                                    IsCommutable>;
4780 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4781                                   bits<8> opc_d, bits<8> opc_q,
4782                                   string OpcodeStr, SDNode OpNode,
4783                                   X86SchedWriteWidths sched,
4784                                   bit IsCommutable = 0> {
4785   defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4786                                     sched, HasAVX512, IsCommutable>,
4787               avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4788                                     sched, HasBWI, IsCommutable>;
4791 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4792                             X86FoldableSchedWrite sched,
4793                             SDNode OpNode,X86VectorVTInfo _Src,
4794                             X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4795                             bit IsCommutable = 0> {
4796   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4797                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4798                             "$src2, $src1","$src1, $src2",
4799                             (_Dst.VT (OpNode
4800                                          (_Src.VT _Src.RC:$src1),
4801                                          (_Src.VT _Src.RC:$src2))),
4802                             IsCommutable>,
4803                             AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
4804   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4805                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4806                         "$src2, $src1", "$src1, $src2",
4807                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4808                                       (_Src.LdFrag addr:$src2)))>,
4809                         AVX512BIBase, EVEX, VVVV,
4810                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4812   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4813                     (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4814                     OpcodeStr,
4815                     "${src2}"#_Brdct.BroadcastStr#", $src1",
4816                      "$src1, ${src2}"#_Brdct.BroadcastStr,
4817                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4818                                  (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4819                     AVX512BIBase, EVEX, VVVV, EVEX_B,
4820                     Sched<[sched.Folded, sched.ReadAfterFold]>;
4823 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4824                                     SchedWriteVecALU, 1>;
4825 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4826                                     SchedWriteVecALU, 0>;
4827 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4828                                     SchedWriteVecALU, HasBWI, 1>;
4829 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4830                                     SchedWriteVecALU, HasBWI, 0>;
4831 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4832                                      SchedWriteVecALU, HasBWI, 1>;
4833 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4834                                      SchedWriteVecALU, HasBWI, 0>;
4835 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4836                                     SchedWritePMULLD, HasAVX512, 1>, T8;
4837 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4838                                     SchedWriteVecIMul, HasBWI, 1>;
4839 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4840                                     SchedWriteVecIMul, HasDQI, 1>, T8;
4841 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4842                                     HasBWI, 1>;
4843 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4844                                      HasBWI, 1>;
4845 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4846                                       SchedWriteVecIMul, HasBWI, 1>, T8;
4847 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
4848                                    SchedWriteVecALU, HasBWI, 1>;
4849 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4850                                     SchedWriteVecIMul, HasAVX512, 1>, T8;
4851 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4852                                      SchedWriteVecIMul, HasAVX512, 1>;
4854 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4855                             X86SchedWriteWidths sched,
4856                             AVX512VLVectorVTInfo _SrcVTInfo,
4857                             AVX512VLVectorVTInfo _DstVTInfo,
4858                             SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4859   let Predicates = [prd] in
4860     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4861                                  _SrcVTInfo.info512, _DstVTInfo.info512,
4862                                  v8i64_info, IsCommutable>,
4863                                   EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W;
4864   let Predicates = [HasVLX, prd] in {
4865     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4866                                       _SrcVTInfo.info256, _DstVTInfo.info256,
4867                                       v4i64x_info, IsCommutable>,
4868                                       EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W;
4869     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4870                                       _SrcVTInfo.info128, _DstVTInfo.info128,
4871                                       v2i64x_info, IsCommutable>,
4872                                      EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W;
4873   }
4876 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4877                                 avx512vl_i8_info, avx512vl_i8_info,
4878                                 X86multishift, HasVBMI, 0>, T8;
4880 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4881                             X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4882                             X86FoldableSchedWrite sched> {
4883   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4884                     (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4885                     OpcodeStr,
4886                     "${src2}"#_Src.BroadcastStr#", $src1",
4887                      "$src1, ${src2}"#_Src.BroadcastStr,
4888                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4889                                  (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4890                     EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4891                     Sched<[sched.Folded, sched.ReadAfterFold]>;
4894 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4895                             SDNode OpNode,X86VectorVTInfo _Src,
4896                             X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4897                             bit IsCommutable = 0> {
4898   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4899                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4900                             "$src2, $src1","$src1, $src2",
4901                             (_Dst.VT (OpNode
4902                                          (_Src.VT _Src.RC:$src1),
4903                                          (_Src.VT _Src.RC:$src2))),
4904                             IsCommutable, IsCommutable>,
4905                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>;
4906   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4907                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4908                         "$src2, $src1", "$src1, $src2",
4909                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4910                                       (_Src.LdFrag addr:$src2)))>,
4911                          EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>,
4912                          Sched<[sched.Folded, sched.ReadAfterFold]>;
4915 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4916                                     SDNode OpNode> {
4917   let Predicates = [HasBWI] in
4918   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4919                                  v32i16_info, SchedWriteShuffle.ZMM>,
4920                 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4921                                  v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4922   let Predicates = [HasBWI, HasVLX] in {
4923     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4924                                      v16i16x_info, SchedWriteShuffle.YMM>,
4925                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4926                                       v16i16x_info, SchedWriteShuffle.YMM>,
4927                                       EVEX_V256;
4928     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4929                                      v8i16x_info, SchedWriteShuffle.XMM>,
4930                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4931                                       v8i16x_info, SchedWriteShuffle.XMM>,
4932                                       EVEX_V128;
4933   }
4935 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4936                             SDNode OpNode> {
4937   let Predicates = [HasBWI] in
4938   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4939                                 SchedWriteShuffle.ZMM>, EVEX_V512, WIG;
4940   let Predicates = [HasBWI, HasVLX] in {
4941     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4942                                      v32i8x_info, SchedWriteShuffle.YMM>,
4943                                      EVEX_V256, WIG;
4944     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4945                                      v16i8x_info, SchedWriteShuffle.XMM>,
4946                                      EVEX_V128, WIG;
4947   }
4950 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4951                             SDNode OpNode, AVX512VLVectorVTInfo _Src,
4952                             AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4953   let Predicates = [HasBWI] in
4954   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4955                                 _Dst.info512, SchedWriteVecIMul.ZMM,
4956                                 IsCommutable>, EVEX_V512;
4957   let Predicates = [HasBWI, HasVLX] in {
4958     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4959                                      _Dst.info256, SchedWriteVecIMul.YMM,
4960                                      IsCommutable>, EVEX_V256;
4961     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4962                                      _Dst.info128, SchedWriteVecIMul.XMM,
4963                                      IsCommutable>, EVEX_V128;
4964   }
4967 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4968 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4969 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4970 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4972 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4973                      avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG;
4974 defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4975                      avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG;
4977 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4978                                     SchedWriteVecALU, HasBWI, 1>, T8;
4979 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4980                                     SchedWriteVecALU, HasBWI, 1>;
4981 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4982                                     SchedWriteVecALU, HasAVX512, 1>, T8;
4983 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4984                                     SchedWriteVecALU, HasAVX512, 1>, T8;
4986 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4987                                     SchedWriteVecALU, HasBWI, 1>;
4988 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4989                                     SchedWriteVecALU, HasBWI, 1>, T8;
4990 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4991                                     SchedWriteVecALU, HasAVX512, 1>, T8;
4992 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4993                                     SchedWriteVecALU, HasAVX512, 1>, T8;
4995 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4996                                     SchedWriteVecALU, HasBWI, 1>, T8;
4997 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4998                                     SchedWriteVecALU, HasBWI, 1>;
4999 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5000                                     SchedWriteVecALU, HasAVX512, 1>, T8;
5001 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5002                                     SchedWriteVecALU, HasAVX512, 1>, T8;
5004 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5005                                     SchedWriteVecALU, HasBWI, 1>;
5006 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5007                                     SchedWriteVecALU, HasBWI, 1>, T8;
5008 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5009                                     SchedWriteVecALU, HasAVX512, 1>, T8;
5010 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5011                                     SchedWriteVecALU, HasAVX512, 1>, T8;
5013 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512.
5014 let Predicates = [HasDQI, NoVLX, HasEVEX512] in {
5015   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5016             (EXTRACT_SUBREG
5017                 (VPMULLQZrr
5018                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5019                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5020              sub_ymm)>;
5021   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5022             (EXTRACT_SUBREG
5023                 (VPMULLQZrmb
5024                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5025                     addr:$src2),
5026              sub_ymm)>;
5028   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5029             (EXTRACT_SUBREG
5030                 (VPMULLQZrr
5031                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5032                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5033              sub_xmm)>;
5034   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5035             (EXTRACT_SUBREG
5036                 (VPMULLQZrmb
5037                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5038                     addr:$src2),
5039              sub_xmm)>;
5042 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5043   def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5044             (EXTRACT_SUBREG
5045                 (!cast<Instruction>(Instr#"rr")
5046                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5047                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5048              sub_ymm)>;
5049   def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5050             (EXTRACT_SUBREG
5051                 (!cast<Instruction>(Instr#"rmb")
5052                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5053                     addr:$src2),
5054              sub_ymm)>;
5056   def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5057             (EXTRACT_SUBREG
5058                 (!cast<Instruction>(Instr#"rr")
5059                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5060                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5061              sub_xmm)>;
5062   def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5063             (EXTRACT_SUBREG
5064                 (!cast<Instruction>(Instr#"rmb")
5065                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5066                     addr:$src2),
5067              sub_xmm)>;
5070 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
5071   defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5072   defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5073   defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5074   defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5077 //===----------------------------------------------------------------------===//
5078 // AVX-512  Logical Instructions
5079 //===----------------------------------------------------------------------===//
5081 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5082                                    SchedWriteVecLogic, HasAVX512, 1>;
5083 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5084                                   SchedWriteVecLogic, HasAVX512, 1>;
5085 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5086                                    SchedWriteVecLogic, HasAVX512, 1>;
5087 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5088                                     SchedWriteVecLogic, HasAVX512>;
5090 let Predicates = [HasVLX] in {
5091   def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5092             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5093   def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5094             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5096   def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5097             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5098   def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5099             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5101   def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5102             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5103   def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5104             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5106   def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5107             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5108   def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5109             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5111   def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5112             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5113   def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5114             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5116   def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5117             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5118   def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5119             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5121   def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5122             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5123   def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5124             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5126   def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5127             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5128   def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5129             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5131   def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5132             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5133   def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5134             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5136   def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5137             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5138   def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5139             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5141   def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5142             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5143   def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5144             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5146   def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5147             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5148   def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5149             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5151   def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5152             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5153   def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5154             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5156   def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5157             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5158   def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5159             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5161   def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5162             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5163   def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5164             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5166   def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5167             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5168   def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5169             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5172 let Predicates = [HasAVX512] in {
5173   def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5174             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5175   def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5176             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5178   def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5179             (VPORQZrr VR512:$src1, VR512:$src2)>;
5180   def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5181             (VPORQZrr VR512:$src1, VR512:$src2)>;
5183   def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5184             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5185   def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5186             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5188   def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5189             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5190   def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5191             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5193   def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5194             (VPANDQZrm VR512:$src1, addr:$src2)>;
5195   def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5196             (VPANDQZrm VR512:$src1, addr:$src2)>;
5198   def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5199             (VPORQZrm VR512:$src1, addr:$src2)>;
5200   def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5201             (VPORQZrm VR512:$src1, addr:$src2)>;
5203   def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5204             (VPXORQZrm VR512:$src1, addr:$src2)>;
5205   def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5206             (VPXORQZrm VR512:$src1, addr:$src2)>;
5208   def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5209             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5210   def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5211             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5214 // Patterns to catch vselect with different type than logic op.
5215 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5216                                     X86VectorVTInfo _,
5217                                     X86VectorVTInfo IntInfo> {
5218   // Masked register-register logical operations.
5219   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5220                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5221                    _.RC:$src0)),
5222             (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5223              _.RC:$src1, _.RC:$src2)>;
5225   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5226                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5227                    _.ImmAllZerosV)),
5228             (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5229              _.RC:$src2)>;
5231   // Masked register-memory logical operations.
5232   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5233                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5234                                             (load addr:$src2)))),
5235                    _.RC:$src0)),
5236             (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5237              _.RC:$src1, addr:$src2)>;
5238   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5239                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5240                                             (load addr:$src2)))),
5241                    _.ImmAllZerosV)),
5242             (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5243              addr:$src2)>;
5246 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5247                                          X86VectorVTInfo _,
5248                                          X86VectorVTInfo IntInfo> {
5249   // Register-broadcast logical operations.
5250   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5251                    (bitconvert
5252                     (IntInfo.VT (OpNode _.RC:$src1,
5253                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5254                    _.RC:$src0)),
5255             (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5256              _.RC:$src1, addr:$src2)>;
5257   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5258                    (bitconvert
5259                     (IntInfo.VT (OpNode _.RC:$src1,
5260                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5261                    _.ImmAllZerosV)),
5262             (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5263              _.RC:$src1, addr:$src2)>;
5266 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5267                                          AVX512VLVectorVTInfo SelectInfo,
5268                                          AVX512VLVectorVTInfo IntInfo> {
5269 let Predicates = [HasVLX] in {
5270   defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5271                                  IntInfo.info128>;
5272   defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5273                                  IntInfo.info256>;
5275 let Predicates = [HasAVX512] in {
5276   defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5277                                  IntInfo.info512>;
5281 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5282                                                AVX512VLVectorVTInfo SelectInfo,
5283                                                AVX512VLVectorVTInfo IntInfo> {
5284 let Predicates = [HasVLX] in {
5285   defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5286                                        SelectInfo.info128, IntInfo.info128>;
5287   defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5288                                        SelectInfo.info256, IntInfo.info256>;
5290 let Predicates = [HasAVX512] in {
5291   defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5292                                        SelectInfo.info512, IntInfo.info512>;
5296 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5297   // i64 vselect with i32/i16/i8 logic op
5298   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5299                                        avx512vl_i32_info>;
5300   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5301                                        avx512vl_i16_info>;
5302   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5303                                        avx512vl_i8_info>;
5305   // i32 vselect with i64/i16/i8 logic op
5306   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5307                                        avx512vl_i64_info>;
5308   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5309                                        avx512vl_i16_info>;
5310   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5311                                        avx512vl_i8_info>;
5313   // f32 vselect with i64/i32/i16/i8 logic op
5314   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5315                                        avx512vl_i64_info>;
5316   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5317                                        avx512vl_i32_info>;
5318   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5319                                        avx512vl_i16_info>;
5320   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5321                                        avx512vl_i8_info>;
5323   // f64 vselect with i64/i32/i16/i8 logic op
5324   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5325                                        avx512vl_i64_info>;
5326   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5327                                        avx512vl_i32_info>;
5328   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5329                                        avx512vl_i16_info>;
5330   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5331                                        avx512vl_i8_info>;
5333   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5334                                              avx512vl_f32_info,
5335                                              avx512vl_i32_info>;
5336   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5337                                              avx512vl_f64_info,
5338                                              avx512vl_i64_info>;
5341 defm : avx512_logical_lowering_types<"VPAND", and>;
5342 defm : avx512_logical_lowering_types<"VPOR",  or>;
5343 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5344 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5346 //===----------------------------------------------------------------------===//
5347 // AVX-512  FP arithmetic
5348 //===----------------------------------------------------------------------===//
5350 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5351                             SDPatternOperator OpNode, SDNode VecNode,
5352                             X86FoldableSchedWrite sched, bit IsCommutable> {
5353   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5354   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5355                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5356                            "$src2, $src1", "$src1, $src2",
5357                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5358                            Sched<[sched]>;
5360   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5361                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5362                          "$src2, $src1", "$src1, $src2",
5363                          (_.VT (VecNode _.RC:$src1,
5364                                         (_.ScalarIntMemFrags addr:$src2)))>,
5365                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5366   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5367   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5368                          (ins _.FRC:$src1, _.FRC:$src2),
5369                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5370                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5371                           Sched<[sched]> {
5372     let isCommutable = IsCommutable;
5373   }
5374   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5375                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5376                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5377                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5378                          (_.ScalarLdFrag addr:$src2)))]>,
5379                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5380   }
5381   }
5384 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5385                                   SDNode VecNode, X86FoldableSchedWrite sched> {
5386   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5387   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5388                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5389                           "$rc, $src2, $src1", "$src1, $src2, $rc",
5390                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5391                           (i32 timm:$rc))>,
5392                           EVEX_B, EVEX_RC, Sched<[sched]>;
5394 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5395                                 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5396                                 X86FoldableSchedWrite sched, bit IsCommutable> {
5397   let ExeDomain = _.ExeDomain in {
5398   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5399                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5400                            "$src2, $src1", "$src1, $src2",
5401                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5402                            Sched<[sched]>, SIMD_EXC;
5404   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5405                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5406                          "$src2, $src1", "$src1, $src2",
5407                          (_.VT (VecNode _.RC:$src1,
5408                                         (_.ScalarIntMemFrags addr:$src2)))>,
5409                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5411   let isCodeGenOnly = 1, Predicates = [HasAVX512],
5412       Uses = [MXCSR], mayRaiseFPException = 1 in {
5413   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5414                          (ins _.FRC:$src1, _.FRC:$src2),
5415                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5416                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5417                           Sched<[sched]> {
5418     let isCommutable = IsCommutable;
5419   }
5420   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5421                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5422                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5423                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5424                          (_.ScalarLdFrag addr:$src2)))]>,
5425                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5426   }
5428   let Uses = [MXCSR] in
5429   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5430                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5431                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5432                             (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5433                             EVEX_B, Sched<[sched]>;
5434   }
5437 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5438                                 SDNode VecNode, SDNode RndNode,
5439                                 X86SchedWriteSizes sched, bit IsCommutable> {
5440   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5441                               sched.PS.Scl, IsCommutable>,
5442              avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5443                               sched.PS.Scl>,
5444                               TB, XS, EVEX, VVVV, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5445   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5446                               sched.PD.Scl, IsCommutable>,
5447              avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5448                               sched.PD.Scl>,
5449                               TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5450   let Predicates = [HasFP16] in
5451     defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5452                                 VecNode, sched.PH.Scl, IsCommutable>,
5453                avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5454                                 sched.PH.Scl>,
5455                                 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5458 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5459                               SDNode VecNode, SDNode SaeNode,
5460                               X86SchedWriteSizes sched, bit IsCommutable> {
5461   defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5462                               VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5463                               TB, XS, EVEX, VVVV, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5464   defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5465                               VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5466                               TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5467   let Predicates = [HasFP16] in {
5468     defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5469                                 VecNode, SaeNode, sched.PH.Scl, IsCommutable>,
5470                                 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5471   }
5473 defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5474                                  SchedWriteFAddSizes, 1>;
5475 defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5476                                  SchedWriteFMulSizes, 1>;
5477 defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5478                                  SchedWriteFAddSizes, 0>;
5479 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5480                                  SchedWriteFDivSizes, 0>;
5481 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5482                                SchedWriteFCmpSizes, 0>;
5483 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5484                                SchedWriteFCmpSizes, 0>;
5486 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5487 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5488 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5489                                     X86VectorVTInfo _, SDNode OpNode,
5490                                     X86FoldableSchedWrite sched> {
5491   let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5492   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5493                          (ins _.FRC:$src1, _.FRC:$src2),
5494                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5495                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5496                           Sched<[sched]> {
5497     let isCommutable = 1;
5498   }
5499   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5500                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5501                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5502                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5503                          (_.ScalarLdFrag addr:$src2)))]>,
5504                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5505   }
5507 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5508                                          SchedWriteFCmp.Scl>, TB, XS,
5509                                          EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5511 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5512                                          SchedWriteFCmp.Scl>, TB, XD,
5513                                          REX_W, EVEX, VVVV, VEX_LIG,
5514                                          EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5516 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5517                                          SchedWriteFCmp.Scl>, TB, XS,
5518                                          EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5520 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5521                                          SchedWriteFCmp.Scl>, TB, XD,
5522                                          REX_W, EVEX, VVVV, VEX_LIG,
5523                                          EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5525 defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5526                                          SchedWriteFCmp.Scl>, T_MAP5, XS,
5527                                          EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5529 defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5530                                          SchedWriteFCmp.Scl>, T_MAP5, XS,
5531                                          EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5533 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5534                             SDPatternOperator MaskOpNode,
5535                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
5536                             bit IsCommutable,
5537                             bit IsKCommutable = IsCommutable,
5538                             string suffix = _.Suffix,
5539                             string ClobberConstraint = "",
5540                             bit MayRaiseFPException = 1> {
5541   let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5542       Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5543   defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5544                                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5545                                  "$src2, $src1", "$src1, $src2",
5546                                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5547                                  (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5548                                  IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>;
5549   let mayLoad = 1 in {
5550     defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5551                                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5552                                    "$src2, $src1", "$src1, $src2",
5553                                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5554                                    (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5555                                    ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5556     defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5557                                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5558                                     "${src2}"#_.BroadcastStr#", $src1",
5559                                     "$src1, ${src2}"#_.BroadcastStr,
5560                                     (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5561                                     (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5562                                     ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5563     }
5564   }
5567 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5568                                   SDPatternOperator OpNodeRnd,
5569                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
5570                                   string suffix = _.Suffix,
5571                                   string ClobberConstraint = ""> {
5572   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5573   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5574                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5575                   "$rc, $src2, $src1", "$src1, $src2, $rc",
5576                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5577                   0, 0, 0, vselect_mask, ClobberConstraint>,
5578                   EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
5581 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5582                                 SDPatternOperator OpNodeSAE,
5583                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5584   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5585   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5586                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5587                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5588                   (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5589                   EVEX, VVVV, EVEX_B, Sched<[sched]>;
5592 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5593                              SDPatternOperator MaskOpNode,
5594                              Predicate prd, X86SchedWriteSizes sched,
5595                              bit IsCommutable = 0,
5596                              bit IsPD128Commutable = IsCommutable> {
5597   let Predicates = [prd] in {
5598   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5599                               sched.PS.ZMM, IsCommutable>, EVEX_V512, TB,
5600                               EVEX_CD8<32, CD8VF>;
5601   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5602                               sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W,
5603                               EVEX_CD8<64, CD8VF>;
5604   }
5606     // Define only if AVX512VL feature is present.
5607   let Predicates = [prd, HasVLX] in {
5608     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5609                                    sched.PS.XMM, IsCommutable>, EVEX_V128, TB,
5610                                    EVEX_CD8<32, CD8VF>;
5611     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5612                                    sched.PS.YMM, IsCommutable>, EVEX_V256, TB,
5613                                    EVEX_CD8<32, CD8VF>;
5614     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5615                                    sched.PD.XMM, IsPD128Commutable,
5616                                    IsCommutable>, EVEX_V128, TB, PD, REX_W,
5617                                    EVEX_CD8<64, CD8VF>;
5618     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5619                                    sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W,
5620                                    EVEX_CD8<64, CD8VF>;
5621   }
5624 multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5625                               SDPatternOperator MaskOpNode,
5626                               X86SchedWriteSizes sched, bit IsCommutable = 0> {
5627   let Predicates = [HasFP16] in {
5628     defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5629                                 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5,
5630                                 EVEX_CD8<16, CD8VF>;
5631   }
5632   let Predicates = [HasVLX, HasFP16] in {
5633     defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5634                                    sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5,
5635                                    EVEX_CD8<16, CD8VF>;
5636     defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5637                                    sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5,
5638                                    EVEX_CD8<16, CD8VF>;
5639   }
5642 let Uses = [MXCSR] in
5643 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5644                                    X86SchedWriteSizes sched> {
5645   let Predicates = [HasFP16] in {
5646     defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5647                                       v32f16_info>,
5648                                       EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5649   }
5650   defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5651                                     v16f32_info>,
5652                                     EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5653   defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5654                                     v8f64_info>,
5655                                     EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5658 let Uses = [MXCSR] in
5659 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5660                                  X86SchedWriteSizes sched> {
5661   let Predicates = [HasFP16] in {
5662     defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5663                                     v32f16_info>,
5664                                     EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5665   }
5666   defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5667                                   v16f32_info>,
5668                                   EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5669   defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5670                                   v8f64_info>,
5671                                   EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5674 defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5675                               SchedWriteFAddSizes, 1>,
5676             avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5677             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5678 defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5679                               SchedWriteFMulSizes, 1>,
5680             avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5681             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5682 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5683                               SchedWriteFAddSizes>,
5684             avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5685             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5686 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5687                               SchedWriteFDivSizes>,
5688             avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5689             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5690 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5691                               SchedWriteFCmpSizes, 0>,
5692             avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5693             avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5694 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5695                               SchedWriteFCmpSizes, 0>,
5696             avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5697             avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5698 let isCodeGenOnly = 1 in {
5699   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5700                                  SchedWriteFCmpSizes, 1>,
5701                avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5702                                  SchedWriteFCmpSizes, 1>;
5703   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5704                                  SchedWriteFCmpSizes, 1>,
5705                avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5706                                  SchedWriteFCmpSizes, 1>;
5708 let Uses = []<Register>, mayRaiseFPException = 0 in {
5709 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5710                                SchedWriteFLogicSizes, 1>;
5711 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5712                                SchedWriteFLogicSizes, 0>;
5713 defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5714                                SchedWriteFLogicSizes, 1>;
5715 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5716                                SchedWriteFLogicSizes, 1>;
5719 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5720                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5721   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5722   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5723                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5724                   "$src2, $src1", "$src1, $src2",
5725                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5726                   EVEX, VVVV, Sched<[sched]>;
5727   defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5728                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5729                   "$src2, $src1", "$src1, $src2",
5730                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5731                   EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5732   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5733                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5734                    "${src2}"#_.BroadcastStr#", $src1",
5735                    "$src1, ${src2}"#_.BroadcastStr,
5736                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5737                    EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5738   }
5741 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5742                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5743   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5744   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5745                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5746                   "$src2, $src1", "$src1, $src2",
5747                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5748                   Sched<[sched]>;
5749   defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5750                   (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5751                   "$src2, $src1", "$src1, $src2",
5752                   (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5753                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5754   }
5757 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5758                                 X86SchedWriteWidths sched> {
5759   let Predicates = [HasFP16] in {
5760     defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
5761                avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
5762                                 EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
5763     defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
5764                avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
5765                              EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>;
5766   }
5767   defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5768              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5769                               EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD;
5770   defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5771              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5772                               EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5773   defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5774              avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5775                                     X86scalefsRnd, sched.Scl>,
5776                                     EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD;
5777   defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5778              avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5779                                     X86scalefsRnd, sched.Scl>,
5780                                     EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD;
5782   // Define only if AVX512VL feature is present.
5783   let Predicates = [HasVLX] in {
5784     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5785                                    EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD;
5786     defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5787                                    EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD;
5788     defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5789                                    EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5790     defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5791                                    EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5792   }
5794   let Predicates = [HasFP16, HasVLX] in {
5795     defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
5796                                    EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5797     defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
5798                                    EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5799   }
5801 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>;
5803 //===----------------------------------------------------------------------===//
5804 // AVX-512  VPTESTM instructions
5805 //===----------------------------------------------------------------------===//
5807 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5808                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5809   // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5810   // There are just too many permutations due to commutability and bitcasts.
5811   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5812   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5813                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5814                       "$src2, $src1", "$src1, $src2",
5815                    (null_frag), (null_frag), 1>,
5816                    EVEX, VVVV, Sched<[sched]>;
5817   let mayLoad = 1 in
5818   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5819                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5820                        "$src2, $src1", "$src1, $src2",
5821                    (null_frag), (null_frag)>,
5822                    EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5823                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5824   }
5827 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5828                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5829   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5830   defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5831                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5832                     "${src2}"#_.BroadcastStr#", $src1",
5833                     "$src1, ${src2}"#_.BroadcastStr,
5834                     (null_frag), (null_frag)>,
5835                     EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5836                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5839 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5840                                   X86SchedWriteWidths sched,
5841                                   AVX512VLVectorVTInfo _> {
5842   let Predicates  = [HasAVX512] in
5843   defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
5844            avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5846   let Predicates = [HasAVX512, HasVLX] in {
5847   defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
5848               avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5849   defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
5850               avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5851   }
5854 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5855                             X86SchedWriteWidths sched> {
5856   defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5857                                  avx512vl_i32_info>;
5858   defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5859                                  avx512vl_i64_info>, REX_W;
5862 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5863                             X86SchedWriteWidths sched> {
5864   let Predicates = [HasBWI] in {
5865   defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5866                             v32i16_info>, EVEX_V512, REX_W;
5867   defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5868                             v64i8_info>, EVEX_V512;
5869   }
5871   let Predicates = [HasVLX, HasBWI] in {
5872   defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5873                             v16i16x_info>, EVEX_V256, REX_W;
5874   defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5875                             v8i16x_info>, EVEX_V128, REX_W;
5876   defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5877                             v32i8x_info>, EVEX_V256;
5878   defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5879                             v16i8x_info>, EVEX_V128;
5880   }
5883 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5884                                    X86SchedWriteWidths sched> :
5885   avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5886   avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5888 defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5889                                          SchedWriteVecLogic>, T8, PD;
5890 defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5891                                          SchedWriteVecLogic>, T8, XS;
5893 //===----------------------------------------------------------------------===//
5894 // AVX-512  Shift instructions
5895 //===----------------------------------------------------------------------===//
5897 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5898                             string OpcodeStr, SDNode OpNode,
5899                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5900   let ExeDomain = _.ExeDomain in {
5901   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5902                    (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5903                       "$src2, $src1", "$src1, $src2",
5904                    (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5905                    Sched<[sched]>;
5906   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5907                    (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5908                        "$src2, $src1", "$src1, $src2",
5909                    (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5910                           (i8 timm:$src2)))>,
5911                    Sched<[sched.Folded]>;
5912   }
5915 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5916                              string OpcodeStr, SDNode OpNode,
5917                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5918   let ExeDomain = _.ExeDomain in
5919   defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5920                    (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5921       "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
5922      (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5923      EVEX_B, Sched<[sched.Folded]>;
5926 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5927                             X86FoldableSchedWrite sched, ValueType SrcVT,
5928                             X86VectorVTInfo _> {
5929    // src2 is always 128-bit
5930   let ExeDomain = _.ExeDomain in {
5931   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5932                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5933                       "$src2, $src1", "$src1, $src2",
5934                    (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5935                    AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
5936   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5937                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5938                        "$src2, $src1", "$src1, $src2",
5939                    (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5940                    AVX512BIBase,
5941                    EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5942   }
5945 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5946                               X86SchedWriteWidths sched, ValueType SrcVT,
5947                               AVX512VLVectorVTInfo VTInfo,
5948                               Predicate prd> {
5949   let Predicates = [prd] in
5950   defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5951                                VTInfo.info512>, EVEX_V512,
5952                                EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5953   let Predicates = [prd, HasVLX] in {
5954   defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5955                                VTInfo.info256>, EVEX_V256,
5956                                EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5957   defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5958                                VTInfo.info128>, EVEX_V128,
5959                                EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5960   }
5963 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5964                               string OpcodeStr, SDNode OpNode,
5965                               X86SchedWriteWidths sched> {
5966   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5967                               avx512vl_i32_info, HasAVX512>;
5968   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5969                               avx512vl_i64_info, HasAVX512>, REX_W;
5970   defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5971                               avx512vl_i16_info, HasBWI>;
5974 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5975                                   string OpcodeStr, SDNode OpNode,
5976                                   X86SchedWriteWidths sched,
5977                                   AVX512VLVectorVTInfo VTInfo> {
5978   let Predicates = [HasAVX512] in
5979   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5980                               sched.ZMM, VTInfo.info512>,
5981              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5982                                VTInfo.info512>, EVEX_V512;
5983   let Predicates = [HasAVX512, HasVLX] in {
5984   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5985                               sched.YMM, VTInfo.info256>,
5986              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5987                                VTInfo.info256>, EVEX_V256;
5988   defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5989                               sched.XMM, VTInfo.info128>,
5990              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5991                                VTInfo.info128>, EVEX_V128;
5992   }
5995 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5996                               string OpcodeStr, SDNode OpNode,
5997                               X86SchedWriteWidths sched> {
5998   let Predicates = [HasBWI] in
5999   defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6000                                sched.ZMM, v32i16_info>, EVEX_V512, WIG;
6001   let Predicates = [HasVLX, HasBWI] in {
6002   defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6003                                sched.YMM, v16i16x_info>, EVEX_V256, WIG;
6004   defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6005                                sched.XMM, v8i16x_info>, EVEX_V128, WIG;
6006   }
6009 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6010                                Format ImmFormR, Format ImmFormM,
6011                                string OpcodeStr, SDNode OpNode,
6012                                X86SchedWriteWidths sched> {
6013   defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6014                                  sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6015   defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6016                                  sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
6019 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6020                                  SchedWriteVecShiftImm>,
6021              avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6022                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6024 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6025                                  SchedWriteVecShiftImm>,
6026              avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6027                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6029 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6030                                  SchedWriteVecShiftImm>,
6031              avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6032                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6034 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6035                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6036 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6037                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6039 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6040                                 SchedWriteVecShift>;
6041 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6042                                 SchedWriteVecShift>;
6043 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6044                                 SchedWriteVecShift>;
6046 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6047 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6048   def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6049             (EXTRACT_SUBREG (v8i64
6050               (VPSRAQZrr
6051                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6052                  VR128X:$src2)), sub_ymm)>;
6054   def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6055             (EXTRACT_SUBREG (v8i64
6056               (VPSRAQZrr
6057                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6058                  VR128X:$src2)), sub_xmm)>;
6060   def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6061             (EXTRACT_SUBREG (v8i64
6062               (VPSRAQZri
6063                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6064                  timm:$src2)), sub_ymm)>;
6066   def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6067             (EXTRACT_SUBREG (v8i64
6068               (VPSRAQZri
6069                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6070                  timm:$src2)), sub_xmm)>;
6073 //===-------------------------------------------------------------------===//
6074 // Variable Bit Shifts
6075 //===-------------------------------------------------------------------===//
6077 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6078                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6079   let ExeDomain = _.ExeDomain in {
6080   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6081                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6082                       "$src2, $src1", "$src1, $src2",
6083                    (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6084                    AVX5128IBase, EVEX, VVVV, Sched<[sched]>;
6085   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6086                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6087                        "$src2, $src1", "$src1, $src2",
6088                    (_.VT (OpNode _.RC:$src1,
6089                    (_.VT (_.LdFrag addr:$src2))))>,
6090                    AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6091                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6092   }
6095 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6096                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6097   let ExeDomain = _.ExeDomain in
6098   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6099                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6100                     "${src2}"#_.BroadcastStr#", $src1",
6101                     "$src1, ${src2}"#_.BroadcastStr,
6102                     (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6103                     AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6104                     Sched<[sched.Folded, sched.ReadAfterFold]>;
6107 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6108                                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6109   let Predicates  = [HasAVX512] in
6110   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6111            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6113   let Predicates = [HasAVX512, HasVLX] in {
6114   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6115               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6116   defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6117               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6118   }
6121 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6122                                   SDNode OpNode, X86SchedWriteWidths sched> {
6123   defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6124                                  avx512vl_i32_info>;
6125   defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6126                                  avx512vl_i64_info>, REX_W;
6129 // Use 512bit version to implement 128/256 bit in case NoVLX.
6130 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6131                                      SDNode OpNode, list<Predicate> p> {
6132   let Predicates = p in {
6133   def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6134                                   (_.info256.VT _.info256.RC:$src2))),
6135             (EXTRACT_SUBREG
6136                 (!cast<Instruction>(OpcodeStr#"Zrr")
6137                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6138                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6139              sub_ymm)>;
6141   def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6142                                   (_.info128.VT _.info128.RC:$src2))),
6143             (EXTRACT_SUBREG
6144                 (!cast<Instruction>(OpcodeStr#"Zrr")
6145                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6146                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6147              sub_xmm)>;
6148   }
6150 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6151                               SDNode OpNode, X86SchedWriteWidths sched> {
6152   let Predicates = [HasBWI] in
6153   defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6154               EVEX_V512, REX_W;
6155   let Predicates = [HasVLX, HasBWI] in {
6157   defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6158               EVEX_V256, REX_W;
6159   defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6160               EVEX_V128, REX_W;
6161   }
6164 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6165               avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6167 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6168               avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6170 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6171               avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6173 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6174 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6176 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX, HasEVEX512]>;
6177 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX, HasEVEX512]>;
6178 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX, HasEVEX512]>;
6179 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX, HasEVEX512]>;
6182 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6183 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6184   def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6185             (EXTRACT_SUBREG (v8i64
6186               (VPROLVQZrr
6187                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6188                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6189                        sub_xmm)>;
6190   def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6191             (EXTRACT_SUBREG (v8i64
6192               (VPROLVQZrr
6193                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6194                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6195                        sub_ymm)>;
6197   def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6198             (EXTRACT_SUBREG (v16i32
6199               (VPROLVDZrr
6200                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6201                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6202                         sub_xmm)>;
6203   def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6204             (EXTRACT_SUBREG (v16i32
6205               (VPROLVDZrr
6206                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6207                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6208                         sub_ymm)>;
6210   def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6211             (EXTRACT_SUBREG (v8i64
6212               (VPROLQZri
6213                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6214                         timm:$src2)), sub_xmm)>;
6215   def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6216             (EXTRACT_SUBREG (v8i64
6217               (VPROLQZri
6218                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6219                        timm:$src2)), sub_ymm)>;
6221   def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6222             (EXTRACT_SUBREG (v16i32
6223               (VPROLDZri
6224                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6225                         timm:$src2)), sub_xmm)>;
6226   def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6227             (EXTRACT_SUBREG (v16i32
6228               (VPROLDZri
6229                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6230                         timm:$src2)), sub_ymm)>;
6233 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6234 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6235   def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6236             (EXTRACT_SUBREG (v8i64
6237               (VPRORVQZrr
6238                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6239                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6240                        sub_xmm)>;
6241   def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6242             (EXTRACT_SUBREG (v8i64
6243               (VPRORVQZrr
6244                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6245                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6246                        sub_ymm)>;
6248   def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6249             (EXTRACT_SUBREG (v16i32
6250               (VPRORVDZrr
6251                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6252                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6253                         sub_xmm)>;
6254   def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6255             (EXTRACT_SUBREG (v16i32
6256               (VPRORVDZrr
6257                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6258                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6259                         sub_ymm)>;
6261   def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6262             (EXTRACT_SUBREG (v8i64
6263               (VPRORQZri
6264                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6265                         timm:$src2)), sub_xmm)>;
6266   def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6267             (EXTRACT_SUBREG (v8i64
6268               (VPRORQZri
6269                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6270                        timm:$src2)), sub_ymm)>;
6272   def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6273             (EXTRACT_SUBREG (v16i32
6274               (VPRORDZri
6275                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6276                         timm:$src2)), sub_xmm)>;
6277   def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6278             (EXTRACT_SUBREG (v16i32
6279               (VPRORDZri
6280                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6281                         timm:$src2)), sub_ymm)>;
6284 //===-------------------------------------------------------------------===//
6285 // 1-src variable permutation VPERMW/D/Q
6286 //===-------------------------------------------------------------------===//
6288 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6289                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6290   let Predicates  = [HasAVX512] in
6291   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6292            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6294   let Predicates = [HasAVX512, HasVLX] in
6295   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6296               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6299 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6300                                  string OpcodeStr, SDNode OpNode,
6301                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6302   let Predicates = [HasAVX512] in
6303   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6304                               sched, VTInfo.info512>,
6305              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6306                                sched, VTInfo.info512>, EVEX_V512;
6307   let Predicates = [HasAVX512, HasVLX] in
6308   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6309                               sched, VTInfo.info256>,
6310              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6311                                sched, VTInfo.info256>, EVEX_V256;
6314 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6315                               Predicate prd, SDNode OpNode,
6316                               X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6317   let Predicates = [prd] in
6318   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6319               EVEX_V512 ;
6320   let Predicates = [HasVLX, prd] in {
6321   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6322               EVEX_V256 ;
6323   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6324               EVEX_V128 ;
6325   }
6328 defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6329                                WriteVarShuffle256, avx512vl_i16_info>, REX_W;
6330 defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6331                                WriteVarShuffle256, avx512vl_i8_info>;
6333 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6334                                     WriteVarShuffle256, avx512vl_i32_info>;
6335 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6336                                     WriteVarShuffle256, avx512vl_i64_info>, REX_W;
6337 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6338                                      WriteFVarShuffle256, avx512vl_f32_info>;
6339 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6340                                      WriteFVarShuffle256, avx512vl_f64_info>, REX_W;
6342 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6343                              X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6344                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6345 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6346                              X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6347                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6349 //===----------------------------------------------------------------------===//
6350 // AVX-512 - VPERMIL
6351 //===----------------------------------------------------------------------===//
6353 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6354                              X86FoldableSchedWrite sched, X86VectorVTInfo _,
6355                              X86VectorVTInfo Ctrl> {
6356   defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6357                   (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6358                   "$src2, $src1", "$src1, $src2",
6359                   (_.VT (OpNode _.RC:$src1,
6360                                (Ctrl.VT Ctrl.RC:$src2)))>,
6361                   T8, PD, EVEX, VVVV, Sched<[sched]>;
6362   defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6363                   (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6364                   "$src2, $src1", "$src1, $src2",
6365                   (_.VT (OpNode
6366                            _.RC:$src1,
6367                            (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6368                   T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6369                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6370   defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6371                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6372                    "${src2}"#_.BroadcastStr#", $src1",
6373                    "$src1, ${src2}"#_.BroadcastStr,
6374                    (_.VT (OpNode
6375                             _.RC:$src1,
6376                             (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6377                    T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6378                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6381 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6382                                     X86SchedWriteWidths sched,
6383                                     AVX512VLVectorVTInfo _,
6384                                     AVX512VLVectorVTInfo Ctrl> {
6385   let Predicates = [HasAVX512] in {
6386     defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6387                                   _.info512, Ctrl.info512>, EVEX_V512;
6388   }
6389   let Predicates = [HasAVX512, HasVLX] in {
6390     defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6391                                   _.info128, Ctrl.info128>, EVEX_V128;
6392     defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6393                                   _.info256, Ctrl.info256>, EVEX_V256;
6394   }
6397 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6398                          AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6399   defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6400                                       _, Ctrl>;
6401   defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6402                                     X86VPermilpi, SchedWriteFShuffle, _>,
6403                     EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6406 let ExeDomain = SSEPackedSingle in
6407 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6408                                avx512vl_i32_info>;
6409 let ExeDomain = SSEPackedDouble in
6410 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6411                                avx512vl_i64_info>, REX_W;
6413 //===----------------------------------------------------------------------===//
6414 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6415 //===----------------------------------------------------------------------===//
6417 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6418                              X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6419                              EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6420 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6421                                   X86PShufhw, SchedWriteShuffle>,
6422                                   EVEX, AVX512XSIi8Base;
6423 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6424                                   X86PShuflw, SchedWriteShuffle>,
6425                                   EVEX, AVX512XDIi8Base;
6427 //===----------------------------------------------------------------------===//
6428 // AVX-512 - VPSHUFB
6429 //===----------------------------------------------------------------------===//
6431 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6432                                X86SchedWriteWidths sched> {
6433   let Predicates = [HasBWI] in
6434   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6435                               EVEX_V512;
6437   let Predicates = [HasVLX, HasBWI] in {
6438   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6439                               EVEX_V256;
6440   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6441                               EVEX_V128;
6442   }
6445 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6446                                   SchedWriteVarShuffle>, WIG;
6448 //===----------------------------------------------------------------------===//
6449 // Move Low to High and High to Low packed FP Instructions
6450 //===----------------------------------------------------------------------===//
6452 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6453           (ins VR128X:$src1, VR128X:$src2),
6454           "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6455           [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6456           Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6457 let isCommutable = 1 in
6458 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6459           (ins VR128X:$src1, VR128X:$src2),
6460           "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6461           [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6462           Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6464 //===----------------------------------------------------------------------===//
6465 // VMOVHPS/PD VMOVLPS Instructions
6466 // All patterns was taken from SSS implementation.
6467 //===----------------------------------------------------------------------===//
6469 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6470                                   SDPatternOperator OpNode,
6471                                   X86VectorVTInfo _> {
6472   let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6473   def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6474                   (ins _.RC:$src1, f64mem:$src2),
6475                   !strconcat(OpcodeStr,
6476                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6477                   [(set _.RC:$dst,
6478                      (OpNode _.RC:$src1,
6479                        (_.VT (bitconvert
6480                          (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6481                   Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV;
6484 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6485 // SSE1. And MOVLPS pattern is even more complex.
6486 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6487                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6488 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6489                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6490 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6491                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6492 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6493                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6495 let Predicates = [HasAVX512] in {
6496   // VMOVHPD patterns
6497   def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6498             (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6500   // VMOVLPD patterns
6501   def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6502             (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6505 let SchedRW = [WriteFStore] in {
6506 let mayStore = 1, hasSideEffects = 0 in
6507 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6508                        (ins f64mem:$dst, VR128X:$src),
6509                        "vmovhps\t{$src, $dst|$dst, $src}",
6510                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6511 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6512                        (ins f64mem:$dst, VR128X:$src),
6513                        "vmovhpd\t{$src, $dst|$dst, $src}",
6514                        [(store (f64 (extractelt
6515                                      (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6516                                      (iPTR 0))), addr:$dst)]>,
6517                        EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6518 let mayStore = 1, hasSideEffects = 0 in
6519 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6520                        (ins f64mem:$dst, VR128X:$src),
6521                        "vmovlps\t{$src, $dst|$dst, $src}",
6522                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6523 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6524                        (ins f64mem:$dst, VR128X:$src),
6525                        "vmovlpd\t{$src, $dst|$dst, $src}",
6526                        [(store (f64 (extractelt (v2f64 VR128X:$src),
6527                                      (iPTR 0))), addr:$dst)]>,
6528                        EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6529 } // SchedRW
6531 let Predicates = [HasAVX512] in {
6532   // VMOVHPD patterns
6533   def : Pat<(store (f64 (extractelt
6534                            (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6535                            (iPTR 0))), addr:$dst),
6536            (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6538 //===----------------------------------------------------------------------===//
6539 // FMA - Fused Multiply Operations
6542 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6543                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6544                                X86VectorVTInfo _> {
6545   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6546       Uses = [MXCSR], mayRaiseFPException = 1 in {
6547   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6548           (ins _.RC:$src2, _.RC:$src3),
6549           OpcodeStr, "$src3, $src2", "$src2, $src3",
6550           (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6551           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6552           EVEX, VVVV, Sched<[sched]>;
6554   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6555           (ins _.RC:$src2, _.MemOp:$src3),
6556           OpcodeStr, "$src3, $src2", "$src2, $src3",
6557           (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6558           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6559           EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6560                           sched.ReadAfterFold]>;
6562   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6563             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6564             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6565             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6566             (OpNode _.RC:$src2,
6567              _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6568             (MaskOpNode _.RC:$src2,
6569              _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6570             EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6571                                     sched.ReadAfterFold]>;
6572   }
6575 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6576                                  X86FoldableSchedWrite sched,
6577                                  X86VectorVTInfo _> {
6578   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6579       Uses = [MXCSR] in
6580   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6581           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6582           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6583           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6584           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6585           EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6588 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6589                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6590                                    X86SchedWriteWidths sched,
6591                                    AVX512VLVectorVTInfo _,
6592                                    Predicate prd = HasAVX512> {
6593   let Predicates = [prd] in {
6594     defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6595                                       sched.ZMM, _.info512>,
6596                   avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6597                                         _.info512>,
6598                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6599   }
6600   let Predicates = [HasVLX, prd] in {
6601     defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6602                                     sched.YMM, _.info256>,
6603                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6604     defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6605                                     sched.XMM, _.info128>,
6606                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6607   }
6610 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6611                               SDNode MaskOpNode, SDNode OpNodeRnd> {
6612     defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6613                                       OpNodeRnd, SchedWriteFMA,
6614                                       avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6615     defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6616                                       OpNodeRnd, SchedWriteFMA,
6617                                       avx512vl_f32_info>, T8, PD;
6618     defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6619                                       OpNodeRnd, SchedWriteFMA,
6620                                       avx512vl_f64_info>, T8, PD, REX_W;
6623 defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6624                                        fma, X86FmaddRnd>;
6625 defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6626                                        X86Fmsub, X86FmsubRnd>;
6627 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6628                                        X86Fmaddsub, X86FmaddsubRnd>;
6629 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6630                                        X86Fmsubadd, X86FmsubaddRnd>;
6631 defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6632                                        X86Fnmadd, X86FnmaddRnd>;
6633 defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6634                                        X86Fnmsub, X86FnmsubRnd>;
6637 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6638                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6639                                X86VectorVTInfo _> {
6640   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6641       Uses = [MXCSR], mayRaiseFPException = 1 in {
6642   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6643           (ins _.RC:$src2, _.RC:$src3),
6644           OpcodeStr, "$src3, $src2", "$src2, $src3",
6645           (null_frag),
6646           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6647           EVEX, VVVV, Sched<[sched]>;
6649   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6650           (ins _.RC:$src2, _.MemOp:$src3),
6651           OpcodeStr, "$src3, $src2", "$src2, $src3",
6652           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6653           (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6654           EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6655                           sched.ReadAfterFold]>;
6657   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6658          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6659          OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6660          "$src2, ${src3}"#_.BroadcastStr,
6661          (_.VT (OpNode _.RC:$src2,
6662                       (_.VT (_.BroadcastLdFrag addr:$src3)),
6663                       _.RC:$src1)),
6664          (_.VT (MaskOpNode _.RC:$src2,
6665                            (_.VT (_.BroadcastLdFrag addr:$src3)),
6666                            _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B,
6667          Sched<[sched.Folded, sched.ReadAfterFold,
6668                 sched.ReadAfterFold]>;
6669   }
6672 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6673                                  X86FoldableSchedWrite sched,
6674                                  X86VectorVTInfo _> {
6675   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6676       Uses = [MXCSR] in
6677   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6678           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6679           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6680           (null_frag),
6681           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6682           1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6685 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6686                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6687                                    X86SchedWriteWidths sched,
6688                                    AVX512VLVectorVTInfo _,
6689                                    Predicate prd = HasAVX512> {
6690   let Predicates = [prd] in {
6691     defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6692                                       sched.ZMM, _.info512>,
6693                   avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6694                                         _.info512>,
6695                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6696   }
6697   let Predicates = [HasVLX, prd] in {
6698     defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6699                                     sched.YMM, _.info256>,
6700                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6701     defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6702                                     sched.XMM, _.info128>,
6703                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6704   }
6707 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6708                               SDNode MaskOpNode, SDNode OpNodeRnd > {
6709     defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6710                                       OpNodeRnd, SchedWriteFMA,
6711                                       avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6712     defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6713                                       OpNodeRnd, SchedWriteFMA,
6714                                       avx512vl_f32_info>, T8, PD;
6715     defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6716                                       OpNodeRnd, SchedWriteFMA,
6717                                       avx512vl_f64_info>, T8, PD, REX_W;
6720 defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6721                                        fma, X86FmaddRnd>;
6722 defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6723                                        X86Fmsub, X86FmsubRnd>;
6724 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6725                                        X86Fmaddsub, X86FmaddsubRnd>;
6726 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6727                                        X86Fmsubadd, X86FmsubaddRnd>;
6728 defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6729                                        X86Fnmadd, X86FnmaddRnd>;
6730 defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6731                                        X86Fnmsub, X86FnmsubRnd>;
6733 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6734                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6735                                X86VectorVTInfo _> {
6736   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6737       Uses = [MXCSR], mayRaiseFPException = 1 in {
6738   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6739           (ins _.RC:$src2, _.RC:$src3),
6740           OpcodeStr, "$src3, $src2", "$src2, $src3",
6741           (null_frag),
6742           (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6743           EVEX, VVVV, Sched<[sched]>;
6745   // Pattern is 312 order so that the load is in a different place from the
6746   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6747   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6748           (ins _.RC:$src2, _.MemOp:$src3),
6749           OpcodeStr, "$src3, $src2", "$src2, $src3",
6750           (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6751           (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6752           EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6753                           sched.ReadAfterFold]>;
6755   // Pattern is 312 order so that the load is in a different place from the
6756   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6757   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6758          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6759          OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6760          "$src2, ${src3}"#_.BroadcastStr,
6761          (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6762                        _.RC:$src1, _.RC:$src2)),
6763          (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6764                            _.RC:$src1, _.RC:$src2)), 1, 0>,
6765          EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6766                                  sched.ReadAfterFold]>;
6767   }
6770 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6771                                  X86FoldableSchedWrite sched,
6772                                  X86VectorVTInfo _> {
6773   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6774       Uses = [MXCSR] in
6775   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6776           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6777           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6778           (null_frag),
6779           (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6780           1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6783 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6784                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6785                                    X86SchedWriteWidths sched,
6786                                    AVX512VLVectorVTInfo _,
6787                                    Predicate prd = HasAVX512> {
6788   let Predicates = [prd] in {
6789     defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6790                                       sched.ZMM, _.info512>,
6791                   avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6792                                         _.info512>,
6793                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6794   }
6795   let Predicates = [HasVLX, prd] in {
6796     defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6797                                     sched.YMM, _.info256>,
6798                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6799     defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6800                                     sched.XMM, _.info128>,
6801                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6802   }
6805 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6806                               SDNode MaskOpNode, SDNode OpNodeRnd > {
6807     defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6808                                       OpNodeRnd, SchedWriteFMA,
6809                                       avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6810     defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6811                                       OpNodeRnd, SchedWriteFMA,
6812                                       avx512vl_f32_info>, T8, PD;
6813     defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6814                                       OpNodeRnd, SchedWriteFMA,
6815                                       avx512vl_f64_info>, T8, PD, REX_W;
6818 defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
6819                                        fma, X86FmaddRnd>;
6820 defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
6821                                        X86Fmsub, X86FmsubRnd>;
6822 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
6823                                        X86Fmaddsub, X86FmaddsubRnd>;
6824 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
6825                                        X86Fmsubadd, X86FmsubaddRnd>;
6826 defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
6827                                        X86Fnmadd, X86FnmaddRnd>;
6828 defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
6829                                        X86Fnmsub, X86FnmsubRnd>;
6831 // Scalar FMA
6832 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6833                                dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6834 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6835   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6836           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6837           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6838           EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6840   let mayLoad = 1 in
6841   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6842           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6843           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6844           EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6845                           SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6847   let Uses = [MXCSR] in
6848   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6849          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6850          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6851          EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6853   let isCodeGenOnly = 1, isCommutable = 1 in {
6854     def r     : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6855                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6856                      !strconcat(OpcodeStr,
6857                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6858                      !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC;
6859     def m     : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
6860                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6861                     !strconcat(OpcodeStr,
6862                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6863                     [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6864                                      SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC;
6866     let Uses = [MXCSR] in
6867     def rb    : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6868                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6869                      !strconcat(OpcodeStr,
6870                               "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6871                      !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6872                      Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV;
6873   }// isCodeGenOnly = 1
6874 }// Constraints = "$src1 = $dst"
6877 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6878                             string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
6879                             X86VectorVTInfo _, string SUFF> {
6880   let ExeDomain = _.ExeDomain in {
6881   defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6882                 // Operands for intrinsic are in 123 order to preserve passthu
6883                 // semantics.
6884                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6885                          _.FRC:$src3))),
6886                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6887                          (_.ScalarLdFrag addr:$src3)))),
6888                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6889                          _.FRC:$src3, (i32 timm:$rc)))), 0>;
6891   defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6892                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6893                                           _.FRC:$src1))),
6894                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6895                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6896                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6897                          _.FRC:$src1, (i32 timm:$rc)))), 1>;
6899   // One pattern is 312 order so that the load is in a different place from the
6900   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6901   defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6902                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6903                          _.FRC:$src2))),
6904                 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6905                                  _.FRC:$src1, _.FRC:$src2))),
6906                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6907                          _.FRC:$src2, (i32 timm:$rc)))), 1>;
6908   }
6911 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6912                         string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
6913   let Predicates = [HasAVX512] in {
6914     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6915                                  OpNodeRnd, f32x_info, "SS">,
6916                                  EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD;
6917     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6918                                  OpNodeRnd, f64x_info, "SD">,
6919                                  EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD;
6920   }
6921   let Predicates = [HasFP16] in {
6922     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6923                                  OpNodeRnd, f16x_info, "SH">,
6924                                  EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD;
6925   }
6928 defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
6929 defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
6930 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6931 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6933 multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
6934                                       SDNode RndOp, string Prefix,
6935                                       string Suffix, SDNode Move,
6936                                       X86VectorVTInfo _, PatLeaf ZeroFP,
6937                                       Predicate prd = HasAVX512> {
6938   let Predicates = [prd] in {
6939     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6940                 (Op _.FRC:$src2,
6941                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6942                     _.FRC:$src3))))),
6943               (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6944                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6945                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6947     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6948                 (Op _.FRC:$src2, _.FRC:$src3,
6949                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6950               (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6951                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6952                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6954     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6955                 (Op _.FRC:$src2,
6956                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6957                     (_.ScalarLdFrag addr:$src3)))))),
6958               (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6959                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6960                addr:$src3)>;
6962     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6963                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6964                     (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6965               (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6966                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6967                addr:$src3)>;
6969     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6970                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6971                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6972               (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6973                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6974                addr:$src3)>;
6976     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6977                (X86selects_mask VK1WM:$mask,
6978                 (MaskedOp _.FRC:$src2,
6979                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6980                     _.FRC:$src3),
6981                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6982               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6983                VR128X:$src1, VK1WM:$mask,
6984                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6985                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6987     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6988                (X86selects_mask VK1WM:$mask,
6989                 (MaskedOp _.FRC:$src2,
6990                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6991                     (_.ScalarLdFrag addr:$src3)),
6992                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6993               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6994                VR128X:$src1, VK1WM:$mask,
6995                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6997     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6998                (X86selects_mask VK1WM:$mask,
6999                 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7000                           (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7001                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7002               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7003                VR128X:$src1, VK1WM:$mask,
7004                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7006     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7007                (X86selects_mask VK1WM:$mask,
7008                 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7009                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7010                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7011               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7012                VR128X:$src1, VK1WM:$mask,
7013                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7014                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7016     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7017                (X86selects_mask VK1WM:$mask,
7018                 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7019                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7020                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7021               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7022                VR128X:$src1, VK1WM:$mask,
7023                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7025     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7026                (X86selects_mask VK1WM:$mask,
7027                 (MaskedOp _.FRC:$src2,
7028                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7029                           _.FRC:$src3),
7030                 (_.EltVT ZeroFP)))))),
7031               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7032                VR128X:$src1, VK1WM:$mask,
7033                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7034                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7036     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7037                (X86selects_mask VK1WM:$mask,
7038                 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7039                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7040                 (_.EltVT ZeroFP)))))),
7041               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7042                VR128X:$src1, VK1WM:$mask,
7043                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7044                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7046     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7047                (X86selects_mask VK1WM:$mask,
7048                 (MaskedOp _.FRC:$src2,
7049                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7050                           (_.ScalarLdFrag addr:$src3)),
7051                 (_.EltVT ZeroFP)))))),
7052               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7053                VR128X:$src1, VK1WM:$mask,
7054                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7056     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7057                (X86selects_mask VK1WM:$mask,
7058                 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7059                           _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7060                 (_.EltVT ZeroFP)))))),
7061               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7062                VR128X:$src1, VK1WM:$mask,
7063                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7065     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7066                (X86selects_mask VK1WM:$mask,
7067                 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7068                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7069                 (_.EltVT ZeroFP)))))),
7070               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7071                VR128X:$src1, VK1WM:$mask,
7072                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7074     // Patterns with rounding mode.
7075     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7076                 (RndOp _.FRC:$src2,
7077                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7078                        _.FRC:$src3, (i32 timm:$rc)))))),
7079               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7080                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7081                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7083     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7084                 (RndOp _.FRC:$src2, _.FRC:$src3,
7085                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7086                        (i32 timm:$rc)))))),
7087               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7088                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7089                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7091     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7092                (X86selects_mask VK1WM:$mask,
7093                 (RndOp _.FRC:$src2,
7094                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7095                        _.FRC:$src3, (i32 timm:$rc)),
7096                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7097               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7098                VR128X:$src1, VK1WM:$mask,
7099                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7100                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7102     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7103                (X86selects_mask VK1WM:$mask,
7104                 (RndOp _.FRC:$src2, _.FRC:$src3,
7105                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7106                        (i32 timm:$rc)),
7107                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7108               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7109                VR128X:$src1, VK1WM:$mask,
7110                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7111                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7113     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7114                (X86selects_mask VK1WM:$mask,
7115                 (RndOp _.FRC:$src2,
7116                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7117                        _.FRC:$src3, (i32 timm:$rc)),
7118                 (_.EltVT ZeroFP)))))),
7119               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7120                VR128X:$src1, VK1WM:$mask,
7121                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7122                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7124     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7125                (X86selects_mask VK1WM:$mask,
7126                 (RndOp _.FRC:$src2, _.FRC:$src3,
7127                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7128                        (i32 timm:$rc)),
7129                 (_.EltVT ZeroFP)))))),
7130               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7131                VR128X:$src1, VK1WM:$mask,
7132                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7133                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7134   }
7136 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7137                                   X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7138 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7139                                   X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7140 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7141                                   X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7142 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7143                                   X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7145 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7146                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7147 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7148                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7149 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7150                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7151 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7152                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7154 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7155                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7156 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7157                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7158 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7159                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7160 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7161                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7163 //===----------------------------------------------------------------------===//
7164 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7165 //===----------------------------------------------------------------------===//
7166 let Constraints = "$src1 = $dst" in {
7167 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7168                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7169   // NOTE: The SDNode have the multiply operands first with the add last.
7170   // This enables commuted load patterns to be autogenerated by tablegen.
7171   let ExeDomain = _.ExeDomain in {
7172   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7173           (ins _.RC:$src2, _.RC:$src3),
7174           OpcodeStr, "$src3, $src2", "$src2, $src3",
7175           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7176           T8, PD, EVEX, VVVV, Sched<[sched]>;
7178   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7179           (ins _.RC:$src2, _.MemOp:$src3),
7180           OpcodeStr, "$src3, $src2", "$src2, $src3",
7181           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7182           T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
7183                                 sched.ReadAfterFold]>;
7185   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7186             (ins _.RC:$src2, _.ScalarMemOp:$src3),
7187             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7188             !strconcat("$src2, ${src3}", _.BroadcastStr ),
7189             (OpNode _.RC:$src2,
7190                     (_.VT (_.BroadcastLdFrag addr:$src3)),
7191                     _.RC:$src1)>,
7192             T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7193                                           sched.ReadAfterFold]>;
7194   }
7196 } // Constraints = "$src1 = $dst"
7198 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7199                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7200   let Predicates = [HasIFMA] in {
7201     defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7202                       EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7203   }
7204   let Predicates = [HasVLX, HasIFMA] in {
7205     defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7206                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7207     defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7208                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7209   }
7212 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7213                                          SchedWriteVecIMul, avx512vl_i64_info>,
7214                                          REX_W;
7215 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7216                                          SchedWriteVecIMul, avx512vl_i64_info>,
7217                                          REX_W;
7219 //===----------------------------------------------------------------------===//
7220 // AVX-512  Scalar convert from sign integer to float/double
7221 //===----------------------------------------------------------------------===//
7223 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7224                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
7225                     X86MemOperand x86memop, PatFrag ld_frag, string asm,
7226                     string mem, list<Register> _Uses = [MXCSR],
7227                     bit _mayRaiseFPException = 1> {
7228 let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7229     mayRaiseFPException = _mayRaiseFPException in {
7230   let hasSideEffects = 0, isCodeGenOnly = 1 in {
7231     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7232               (ins DstVT.FRC:$src1, SrcRC:$src),
7233               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7234               EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7235     let mayLoad = 1 in
7236       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7237               (ins DstVT.FRC:$src1, x86memop:$src),
7238               asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7239               EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7240   } // hasSideEffects = 0
7241   def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7242                 (ins DstVT.RC:$src1, SrcRC:$src2),
7243                 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7244                 [(set DstVT.RC:$dst,
7245                       (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7246                EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7248   def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7249                 (ins DstVT.RC:$src1, x86memop:$src2),
7250                 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7251                 [(set DstVT.RC:$dst,
7252                       (OpNode (DstVT.VT DstVT.RC:$src1),
7253                                (ld_frag addr:$src2)))]>,
7254                 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7256   def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7257                   (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7258                   DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7261 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7262                                X86FoldableSchedWrite sched, RegisterClass SrcRC,
7263                                X86VectorVTInfo DstVT, string asm,
7264                                string mem> {
7265   let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7266   def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7267               (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7268               !strconcat(asm,
7269                   "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7270               [(set DstVT.RC:$dst,
7271                     (OpNode (DstVT.VT DstVT.RC:$src1),
7272                              SrcRC:$src2,
7273                              (i32 timm:$rc)))]>,
7274               EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7275   def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7276                   (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7277                   DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7280 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7281                                 X86FoldableSchedWrite sched,
7282                                 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7283                                 X86MemOperand x86memop, PatFrag ld_frag,
7284                                 string asm, string mem> {
7285   defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7286               avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7287                             ld_frag, asm, mem>, VEX_LIG;
7290 let Predicates = [HasAVX512] in {
7291 defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7292                                  WriteCvtI2SS, GR32,
7293                                  v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7294                                  TB, XS, EVEX_CD8<32, CD8VT1>;
7295 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7296                                  WriteCvtI2SS, GR64,
7297                                  v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7298                                  TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7299 defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7300                                  v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7301                                  TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7302 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7303                                  WriteCvtI2SD, GR64,
7304                                  v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7305                                  TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7307 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7308               (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7309 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7310               (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7312 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7313           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7314 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7315           (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7316 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7317           (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7318 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7319           (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7321 def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7322           (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7323 def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7324           (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7325 def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7326           (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7327 def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7328           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7330 defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7331                                   WriteCvtI2SS, GR32,
7332                                   v4f32x_info, i32mem, loadi32,
7333                                   "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>;
7334 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7335                                   WriteCvtI2SS, GR64,
7336                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7337                                   TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7338 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7339                                   i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7340                                   TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7341 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7342                                   WriteCvtI2SD, GR64,
7343                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7344                                   TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7346 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7347               (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7348 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7349               (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7351 def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7352           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7353 def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7354           (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7355 def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7356           (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7357 def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7358           (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7360 def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7361           (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7362 def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7363           (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7364 def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7365           (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7366 def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7367           (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7370 //===----------------------------------------------------------------------===//
7371 // AVX-512  Scalar convert from float/double to integer
7372 //===----------------------------------------------------------------------===//
7374 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7375                                   X86VectorVTInfo DstVT, SDNode OpNode,
7376                                   SDNode OpNodeRnd,
7377                                   X86FoldableSchedWrite sched, string asm,
7378                                   string aliasStr, Predicate prd = HasAVX512> {
7379   let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7380     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7381                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7382                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7383                 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7384     let Uses = [MXCSR] in
7385     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7386                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7387                  [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7388                  EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7389                  Sched<[sched]>;
7390     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7391                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7392                 [(set DstVT.RC:$dst, (OpNode
7393                       (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7394                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7395   } // Predicates = [prd]
7397   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7398           (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7399   def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7400           (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7401   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7402           (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7403                                           SrcVT.IntScalarMemOp:$src), 0, "att">;
7406 // Convert float/double to signed/unsigned int 32/64
7407 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7408                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7409                                    TB, XS, EVEX_CD8<32, CD8VT1>;
7410 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7411                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7412                                    TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7413 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7414                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7415                                    TB, XS, EVEX_CD8<32, CD8VT1>;
7416 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7417                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7418                                    TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7419 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7420                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7421                                    TB, XD, EVEX_CD8<64, CD8VT1>;
7422 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7423                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7424                                    TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7425 defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7426                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7427                                    TB, XD, EVEX_CD8<64, CD8VT1>;
7428 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7429                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7430                                    TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7432 multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7433                         X86VectorVTInfo DstVT, SDNode OpNode,
7434                         X86FoldableSchedWrite sched> {
7435   let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7436     let isCodeGenOnly = 1 in {
7437     def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7438                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7439                 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7440                 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7441     def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7442                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7443                 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7444                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7445     }
7446   } // Predicates = [HasAVX512]
7449 defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7450                        lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>;
7451 defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7452                        llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7453 defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7454                        lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>;
7455 defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7456                        llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7458 let Predicates = [HasAVX512] in {
7459   def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7460   def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7462   def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7463   def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7466 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7467 // which produce unnecessary vmovs{s,d} instructions
7468 let Predicates = [HasAVX512] in {
7469 def : Pat<(v4f32 (X86Movss
7470                    (v4f32 VR128X:$dst),
7471                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7472           (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7474 def : Pat<(v4f32 (X86Movss
7475                    (v4f32 VR128X:$dst),
7476                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7477           (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7479 def : Pat<(v4f32 (X86Movss
7480                    (v4f32 VR128X:$dst),
7481                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7482           (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7484 def : Pat<(v4f32 (X86Movss
7485                    (v4f32 VR128X:$dst),
7486                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7487           (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7489 def : Pat<(v2f64 (X86Movsd
7490                    (v2f64 VR128X:$dst),
7491                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7492           (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7494 def : Pat<(v2f64 (X86Movsd
7495                    (v2f64 VR128X:$dst),
7496                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7497           (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7499 def : Pat<(v2f64 (X86Movsd
7500                    (v2f64 VR128X:$dst),
7501                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7502           (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7504 def : Pat<(v2f64 (X86Movsd
7505                    (v2f64 VR128X:$dst),
7506                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7507           (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7509 def : Pat<(v4f32 (X86Movss
7510                    (v4f32 VR128X:$dst),
7511                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7512           (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7514 def : Pat<(v4f32 (X86Movss
7515                    (v4f32 VR128X:$dst),
7516                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7517           (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7519 def : Pat<(v4f32 (X86Movss
7520                    (v4f32 VR128X:$dst),
7521                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7522           (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7524 def : Pat<(v4f32 (X86Movss
7525                    (v4f32 VR128X:$dst),
7526                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7527           (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7529 def : Pat<(v2f64 (X86Movsd
7530                    (v2f64 VR128X:$dst),
7531                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7532           (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7534 def : Pat<(v2f64 (X86Movsd
7535                    (v2f64 VR128X:$dst),
7536                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7537           (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7539 def : Pat<(v2f64 (X86Movsd
7540                    (v2f64 VR128X:$dst),
7541                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7542           (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7544 def : Pat<(v2f64 (X86Movsd
7545                    (v2f64 VR128X:$dst),
7546                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7547           (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7548 } // Predicates = [HasAVX512]
7550 // Convert float/double to signed/unsigned int 32/64 with truncation
7551 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7552                             X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7553                             SDNode OpNodeInt, SDNode OpNodeSAE,
7554                             X86FoldableSchedWrite sched, string aliasStr,
7555                             Predicate prd = HasAVX512> {
7556 let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7557   let isCodeGenOnly = 1 in {
7558   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7559               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7560               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7561               EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7562   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7563               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7564               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7565               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7566   }
7568   def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7569             !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7570            [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7571            EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7572   let Uses = [MXCSR] in
7573   def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7574             !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7575             [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7576                                   EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7577   def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7578               (ins _SrcRC.IntScalarMemOp:$src),
7579               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7580               [(set _DstRC.RC:$dst,
7581                 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7582               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7583 } // Predicates = [prd]
7585   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7586           (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7587   def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7588           (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7589   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7590           (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7591                                           _SrcRC.IntScalarMemOp:$src), 0, "att">;
7594 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7595                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7596                         "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7597 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7598                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7599                         "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7600 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7601                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7602                         "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7603 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7604                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7605                         "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7607 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7608                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7609                         "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7610 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7611                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7612                         "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>;
7613 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7614                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7615                         "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7616 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7617                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7618                         "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7620 //===----------------------------------------------------------------------===//
7621 // AVX-512  Convert form float to double and back
7622 //===----------------------------------------------------------------------===//
7624 let Uses = [MXCSR], mayRaiseFPException = 1 in
7625 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7626                                 X86VectorVTInfo _Src, SDNode OpNode,
7627                                 X86FoldableSchedWrite sched> {
7628   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7629                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7630                          "$src2, $src1", "$src1, $src2",
7631                          (_.VT (OpNode (_.VT _.RC:$src1),
7632                                        (_Src.VT _Src.RC:$src2)))>,
7633                          EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7634   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7635                          (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7636                          "$src2, $src1", "$src1, $src2",
7637                          (_.VT (OpNode (_.VT _.RC:$src1),
7638                                   (_Src.ScalarIntMemFrags addr:$src2)))>,
7639                          EVEX, VVVV, VEX_LIG,
7640                          Sched<[sched.Folded, sched.ReadAfterFold]>;
7642   let isCodeGenOnly = 1, hasSideEffects = 0 in {
7643     def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7644                (ins _.FRC:$src1, _Src.FRC:$src2),
7645                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7646                EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7647     let mayLoad = 1 in
7648     def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7649                (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7650                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7651                EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7652   }
7655 // Scalar Conversion with SAE - suppress all exceptions
7656 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7657                                     X86VectorVTInfo _Src, SDNode OpNodeSAE,
7658                                     X86FoldableSchedWrite sched> {
7659   let Uses = [MXCSR] in
7660   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7661                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7662                         "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7663                         (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7664                                          (_Src.VT _Src.RC:$src2)))>,
7665                         EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
7668 // Scalar Conversion with rounding control (RC)
7669 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7670                                    X86VectorVTInfo _Src, SDNode OpNodeRnd,
7671                                    X86FoldableSchedWrite sched> {
7672   let Uses = [MXCSR] in
7673   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7674                         (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7675                         "$rc, $src2, $src1", "$src1, $src2, $rc",
7676                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7677                                          (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7678                         EVEX, VVVV, VEX_LIG, Sched<[sched]>,
7679                         EVEX_B, EVEX_RC;
7681 multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7682                                       SDNode OpNode, SDNode OpNodeRnd,
7683                                       X86FoldableSchedWrite sched,
7684                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
7685                                       Predicate prd = HasAVX512> {
7686   let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7687     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7688              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7689                                OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7690   }
7693 multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7694                                        SDNode OpNode, SDNode OpNodeSAE,
7695                                        X86FoldableSchedWrite sched,
7696                                        X86VectorVTInfo _src, X86VectorVTInfo _dst,
7697                                        Predicate prd = HasAVX512> {
7698   let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7699     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7700              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7701              EVEX_CD8<_src.EltSize, CD8VT1>;
7702   }
7704 defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7705                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7706                                          f32x_info>, TB, XD, REX_W;
7707 defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7708                                           X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7709                                           f64x_info>, TB, XS;
7710 defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7711                                           X86froundsRnd, WriteCvtSD2SS, f64x_info,
7712                                           f16x_info, HasFP16>, T_MAP5, XD, REX_W;
7713 defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7714                                           X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7715                                           f64x_info, HasFP16>, T_MAP5, XS;
7716 defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7717                                           X86froundsRnd, WriteCvtSD2SS, f32x_info,
7718                                           f16x_info, HasFP16>, T_MAP5;
7719 defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7720                                           X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7721                                           f32x_info, HasFP16>, T_MAP6;
7723 def : Pat<(f64 (any_fpextend FR32X:$src)),
7724           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7725           Requires<[HasAVX512]>;
7726 def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7727           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7728           Requires<[HasAVX512, OptForSize]>;
7730 def : Pat<(f32 (any_fpround FR64X:$src)),
7731           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7732            Requires<[HasAVX512]>;
7734 def : Pat<(f32 (any_fpextend FR16X:$src)),
7735           (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7736           Requires<[HasFP16]>;
7737 def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
7738           (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
7739           Requires<[HasFP16, OptForSize]>;
7741 def : Pat<(f64 (any_fpextend FR16X:$src)),
7742           (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
7743           Requires<[HasFP16]>;
7744 def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
7745           (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7746           Requires<[HasFP16, OptForSize]>;
7748 def : Pat<(f16 (any_fpround FR32X:$src)),
7749           (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
7750            Requires<[HasFP16]>;
7751 def : Pat<(f16 (any_fpround FR64X:$src)),
7752           (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
7753            Requires<[HasFP16]>;
7755 def : Pat<(v4f32 (X86Movss
7756                    (v4f32 VR128X:$dst),
7757                    (v4f32 (scalar_to_vector
7758                      (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7759           (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7760           Requires<[HasAVX512]>;
7762 def : Pat<(v2f64 (X86Movsd
7763                    (v2f64 VR128X:$dst),
7764                    (v2f64 (scalar_to_vector
7765                      (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7766           (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7767           Requires<[HasAVX512]>;
7769 //===----------------------------------------------------------------------===//
7770 // AVX-512  Vector convert from signed/unsigned integer to float/double
7771 //          and from float/double to signed/unsigned integer
7772 //===----------------------------------------------------------------------===//
7774 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7775                           X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7776                           X86FoldableSchedWrite sched,
7777                           string Broadcast = _.BroadcastStr,
7778                           string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7779                           RegisterClass MaskRC = _.KRCWM,
7780                           dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7781                           dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7782 let Uses = [MXCSR], mayRaiseFPException = 1 in {
7783   defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7784                          (ins _Src.RC:$src),
7785                          (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7786                          (ins MaskRC:$mask, _Src.RC:$src),
7787                           OpcodeStr, "$src", "$src",
7788                          (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7789                          (vselect_mask MaskRC:$mask,
7790                                        (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7791                                        _.RC:$src0),
7792                          (vselect_mask MaskRC:$mask,
7793                                        (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7794                                        _.ImmAllZerosV)>,
7795                          EVEX, Sched<[sched]>;
7797   defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7798                          (ins MemOp:$src),
7799                          (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7800                          (ins MaskRC:$mask, MemOp:$src),
7801                          OpcodeStr#Alias, "$src", "$src",
7802                          LdDAG,
7803                          (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7804                          (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7805                          EVEX, Sched<[sched.Folded]>;
7807   defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7808                          (ins _Src.ScalarMemOp:$src),
7809                          (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7810                          (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7811                          OpcodeStr,
7812                          "${src}"#Broadcast, "${src}"#Broadcast,
7813                          (_.VT (OpNode (_Src.VT
7814                                   (_Src.BroadcastLdFrag addr:$src))
7815                             )),
7816                          (vselect_mask MaskRC:$mask,
7817                                        (_.VT
7818                                         (MaskOpNode
7819                                          (_Src.VT
7820                                           (_Src.BroadcastLdFrag addr:$src)))),
7821                                        _.RC:$src0),
7822                          (vselect_mask MaskRC:$mask,
7823                                        (_.VT
7824                                         (MaskOpNode
7825                                          (_Src.VT
7826                                           (_Src.BroadcastLdFrag addr:$src)))),
7827                                        _.ImmAllZerosV)>,
7828                          EVEX, EVEX_B, Sched<[sched.Folded]>;
7829   }
7831 // Conversion with SAE - suppress all exceptions
7832 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7833                               X86VectorVTInfo _Src, SDNode OpNodeSAE,
7834                               X86FoldableSchedWrite sched> {
7835   let Uses = [MXCSR] in
7836   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7837                         (ins _Src.RC:$src), OpcodeStr,
7838                         "{sae}, $src", "$src, {sae}",
7839                         (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7840                         EVEX, EVEX_B, Sched<[sched]>;
7843 // Conversion with rounding control (RC)
7844 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7845                          X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
7846                          X86FoldableSchedWrite sched> {
7847   let Uses = [MXCSR] in
7848   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7849                         (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7850                         "$rc, $src", "$src, $rc",
7851                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7852                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7855 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7856 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7857                                 X86VectorVTInfo _Src, SDPatternOperator OpNode,
7858                                 SDNode MaskOpNode,
7859                                 X86FoldableSchedWrite sched,
7860                                 string Broadcast = _.BroadcastStr,
7861                                 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7862                                 RegisterClass MaskRC = _.KRCWM>
7863   : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
7864                    Alias, MemOp, MaskRC,
7865                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
7866                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7868 // Extend [Float to Double, Half to Float]
7869 multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
7870                              AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7871                              X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
7872   let Predicates = [prd] in {
7873     defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
7874                             any_fpextend, fpextend, sched.ZMM>,
7875              avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
7876                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7877   }
7878   let Predicates = [prd, HasVLX] in {
7879     defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
7880                                X86any_vfpext, X86vfpext, sched.XMM,
7881                                _dst.info128.BroadcastStr,
7882                                "", f64mem>, EVEX_V128;
7883     defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
7884                                any_fpextend, fpextend, sched.YMM>, EVEX_V256;
7885   }
7888 // Truncate [Double to Float, Float to Half]
7889 multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
7890                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7891                             X86SchedWriteWidths sched, Predicate prd = HasAVX512,
7892                             PatFrag bcast128 = _src.info128.BroadcastLdFrag,
7893                             PatFrag loadVT128 = _src.info128.LdFrag,
7894                             RegisterClass maskRC128 = _src.info128.KRCWM> {
7895   let Predicates = [prd] in {
7896     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
7897                             X86any_vfpround, X86vfpround, sched.ZMM>,
7898              avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
7899                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
7900   }
7901   let Predicates = [prd, HasVLX] in {
7902     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
7903                                null_frag, null_frag, sched.XMM,
7904                                _src.info128.BroadcastStr, "{x}",
7905                                f128mem, maskRC128>, EVEX_V128;
7906     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
7907                                X86any_vfpround, X86vfpround,
7908                                sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
7910     // Special patterns to allow use of X86vmfpround for masking. Instruction
7911     // patterns have been disabled with null_frag.
7912     def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
7913               (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
7914     def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
7915                             maskRC128:$mask),
7916               (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
7917     def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
7918                             maskRC128:$mask),
7919               (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
7921     def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
7922               (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
7923     def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
7924                             maskRC128:$mask),
7925               (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7926     def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
7927                             maskRC128:$mask),
7928               (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
7930     def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
7931               (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
7932     def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7933                             (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
7934               (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7935     def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7936                             _dst.info128.ImmAllZerosV, maskRC128:$mask),
7937               (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
7938   }
7940   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7941                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7942   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7943                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7944                   VK2WM:$mask, VR128X:$src), 0, "att">;
7945   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
7946                   "$dst {${mask}} {z}, $src}",
7947                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7948                   VK2WM:$mask, VR128X:$src), 0, "att">;
7949   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7950                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7951   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7952                   "$dst {${mask}}, ${src}{1to2}}",
7953                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7954                   VK2WM:$mask, f64mem:$src), 0, "att">;
7955   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7956                   "$dst {${mask}} {z}, ${src}{1to2}}",
7957                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7958                   VK2WM:$mask, f64mem:$src), 0, "att">;
7960   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7961                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7962   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7963                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7964                   VK4WM:$mask, VR256X:$src), 0, "att">;
7965   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
7966                   "$dst {${mask}} {z}, $src}",
7967                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7968                   VK4WM:$mask, VR256X:$src), 0, "att">;
7969   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7970                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7971   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7972                   "$dst {${mask}}, ${src}{1to4}}",
7973                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7974                   VK4WM:$mask, f64mem:$src), 0, "att">;
7975   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7976                   "$dst {${mask}} {z}, ${src}{1to4}}",
7977                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7978                   VK4WM:$mask, f64mem:$src), 0, "att">;
7981 defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
7982                                   avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
7983                                   REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
7984 defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
7985                                    avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
7986                                    TB, EVEX_CD8<32, CD8VH>;
7988 // Extend Half to Double
7989 multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
7990                             X86SchedWriteWidths sched> {
7991   let Predicates = [HasFP16] in {
7992     defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
7993                                   any_fpextend, fpextend, sched.ZMM>,
7994              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
7995                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7996     def : Pat<(v8f64 (extloadv8f16 addr:$src)),
7997                 (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
7998   }
7999   let Predicates = [HasFP16, HasVLX] in {
8000     defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8001                                      X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8002                                      f32mem>, EVEX_V128;
8003     defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8004                                      X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8005                                      f64mem>, EVEX_V256;
8006   }
8009 // Truncate Double to Half
8010 multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8011   let Predicates = [HasFP16] in {
8012     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8013                             X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8014              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8015                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
8016   }
8017   let Predicates = [HasFP16, HasVLX] in {
8018     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8019                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8020                                VK2WM>, EVEX_V128;
8021     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8022                                null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8023                                VK4WM>, EVEX_V256;
8024   }
8025   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8026                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8027                   VR128X:$src), 0, "att">;
8028   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8029                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8030                   VK2WM:$mask, VR128X:$src), 0, "att">;
8031   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8032                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8033                   VK2WM:$mask, VR128X:$src), 0, "att">;
8034   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8035                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8036                   i64mem:$src), 0, "att">;
8037   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8038                   "$dst {${mask}}, ${src}{1to2}}",
8039                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8040                   VK2WM:$mask, i64mem:$src), 0, "att">;
8041   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8042                   "$dst {${mask}} {z}, ${src}{1to2}}",
8043                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8044                   VK2WM:$mask, i64mem:$src), 0, "att">;
8046   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8047                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8048                   VR256X:$src), 0, "att">;
8049   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8050                   "$dst {${mask}}, $src}",
8051                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8052                   VK4WM:$mask, VR256X:$src), 0, "att">;
8053   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8054                   "$dst {${mask}} {z}, $src}",
8055                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8056                   VK4WM:$mask, VR256X:$src), 0, "att">;
8057   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8058                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8059                   i64mem:$src), 0, "att">;
8060   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8061                   "$dst {${mask}}, ${src}{1to4}}",
8062                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8063                   VK4WM:$mask, i64mem:$src), 0, "att">;
8064   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8065                   "$dst {${mask}} {z}, ${src}{1to4}}",
8066                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8067                   VK4WM:$mask, i64mem:$src), 0, "att">;
8069   def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8070                   (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8071                   VR512:$src), 0, "att">;
8072   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8073                   "$dst {${mask}}, $src}",
8074                   (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8075                   VK8WM:$mask, VR512:$src), 0, "att">;
8076   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8077                   "$dst {${mask}} {z}, $src}",
8078                   (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8079                   VK8WM:$mask, VR512:$src), 0, "att">;
8080   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8081                   (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8082                   i64mem:$src), 0, "att">;
8083   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8084                   "$dst {${mask}}, ${src}{1to8}}",
8085                   (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8086                   VK8WM:$mask, i64mem:$src), 0, "att">;
8087   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8088                   "$dst {${mask}} {z}, ${src}{1to8}}",
8089                   (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8090                   VK8WM:$mask, i64mem:$src), 0, "att">;
8093 defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8094                                    avx512vl_f32_info, SchedWriteCvtPD2PS,
8095                                    HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>;
8096 defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8097                                     avx512vl_f16_info, SchedWriteCvtPS2PD,
8098                                     HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>;
8099 defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8100                                  REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>;
8101 defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8102                                  T_MAP5, EVEX_CD8<16, CD8VQ>;
8104 let Predicates = [HasFP16, HasVLX] in {
8105   // Special patterns to allow use of X86vmfpround for masking. Instruction
8106   // patterns have been disabled with null_frag.
8107   def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8108             (VCVTPD2PHZ256rr VR256X:$src)>;
8109   def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8110                           VK4WM:$mask)),
8111             (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8112   def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8113                           VK4WM:$mask),
8114             (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8116   def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8117             (VCVTPD2PHZ256rm addr:$src)>;
8118   def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8119                           VK4WM:$mask),
8120             (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8121   def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8122                           VK4WM:$mask),
8123             (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8125   def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8126             (VCVTPD2PHZ256rmb addr:$src)>;
8127   def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8128                           (v8f16 VR128X:$src0), VK4WM:$mask),
8129             (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8130   def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8131                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8132             (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8134   def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8135             (VCVTPD2PHZ128rr VR128X:$src)>;
8136   def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8137                           VK2WM:$mask),
8138             (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8139   def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8140                           VK2WM:$mask),
8141             (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8143   def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8144             (VCVTPD2PHZ128rm addr:$src)>;
8145   def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8146                           VK2WM:$mask),
8147             (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8148   def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8149                           VK2WM:$mask),
8150             (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8152   def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8153             (VCVTPD2PHZ128rmb addr:$src)>;
8154   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8155                           (v8f16 VR128X:$src0), VK2WM:$mask),
8156             (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8157   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8158                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8159             (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8162 // Convert Signed/Unsigned Doubleword to Double
8163 let Uses = []<Register>, mayRaiseFPException = 0 in
8164 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8165                            SDNode MaskOpNode, SDPatternOperator OpNode128,
8166                            SDNode MaskOpNode128,
8167                            X86SchedWriteWidths sched> {
8168   // No rounding in this op
8169   let Predicates = [HasAVX512] in
8170     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8171                             MaskOpNode, sched.ZMM>, EVEX_V512;
8173   let Predicates = [HasVLX] in {
8174     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8175                                OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8176                                "", i64mem, VK2WM,
8177                                (v2f64 (OpNode128 (bc_v4i32
8178                                 (v2i64
8179                                  (scalar_to_vector (loadi64 addr:$src)))))),
8180                                (v2f64 (MaskOpNode128 (bc_v4i32
8181                                 (v2i64
8182                                  (scalar_to_vector (loadi64 addr:$src))))))>,
8183                                EVEX_V128;
8184     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8185                                MaskOpNode, sched.YMM>, EVEX_V256;
8186   }
8189 // Convert Signed/Unsigned Doubleword to Float
8190 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8191                            SDNode MaskOpNode, SDNode OpNodeRnd,
8192                            X86SchedWriteWidths sched> {
8193   let Predicates = [HasAVX512] in
8194     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8195                             MaskOpNode, sched.ZMM>,
8196              avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8197                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8199   let Predicates = [HasVLX] in {
8200     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8201                                MaskOpNode, sched.XMM>, EVEX_V128;
8202     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8203                                MaskOpNode, sched.YMM>, EVEX_V256;
8204   }
8207 // Convert Float to Signed/Unsigned Doubleword with truncation
8208 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8209                             SDNode MaskOpNode,
8210                             SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8211   let Predicates = [HasAVX512] in {
8212     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8213                             MaskOpNode, sched.ZMM>,
8214              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8215                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
8216   }
8217   let Predicates = [HasVLX] in {
8218     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8219                                MaskOpNode, sched.XMM>, EVEX_V128;
8220     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8221                                MaskOpNode, sched.YMM>, EVEX_V256;
8222   }
8225 // Convert Float to Signed/Unsigned Doubleword
8226 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8227                            SDNode MaskOpNode, SDNode OpNodeRnd,
8228                            X86SchedWriteWidths sched> {
8229   let Predicates = [HasAVX512] in {
8230     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8231                             MaskOpNode, sched.ZMM>,
8232              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8233                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8234   }
8235   let Predicates = [HasVLX] in {
8236     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8237                                MaskOpNode, sched.XMM>, EVEX_V128;
8238     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8239                                MaskOpNode, sched.YMM>, EVEX_V256;
8240   }
8243 // Convert Double to Signed/Unsigned Doubleword with truncation
8244 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8245                             SDNode MaskOpNode, SDNode OpNodeSAE,
8246                             X86SchedWriteWidths sched> {
8247   let Predicates = [HasAVX512] in {
8248     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8249                             MaskOpNode, sched.ZMM>,
8250              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8251                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
8252   }
8253   let Predicates = [HasVLX] in {
8254     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8255     // memory forms of these instructions in Asm Parser. They have the same
8256     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8257     // due to the same reason.
8258     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8259                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8260                                VK2WM>, EVEX_V128;
8261     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8262                                MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8263   }
8265   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8266                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8267                   VR128X:$src), 0, "att">;
8268   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8269                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8270                   VK2WM:$mask, VR128X:$src), 0, "att">;
8271   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8272                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8273                   VK2WM:$mask, VR128X:$src), 0, "att">;
8274   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8275                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8276                   f64mem:$src), 0, "att">;
8277   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8278                   "$dst {${mask}}, ${src}{1to2}}",
8279                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8280                   VK2WM:$mask, f64mem:$src), 0, "att">;
8281   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8282                   "$dst {${mask}} {z}, ${src}{1to2}}",
8283                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8284                   VK2WM:$mask, f64mem:$src), 0, "att">;
8286   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8287                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8288                   VR256X:$src), 0, "att">;
8289   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8290                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8291                   VK4WM:$mask, VR256X:$src), 0, "att">;
8292   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8293                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8294                   VK4WM:$mask, VR256X:$src), 0, "att">;
8295   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8296                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8297                   f64mem:$src), 0, "att">;
8298   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8299                   "$dst {${mask}}, ${src}{1to4}}",
8300                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8301                   VK4WM:$mask, f64mem:$src), 0, "att">;
8302   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8303                   "$dst {${mask}} {z}, ${src}{1to4}}",
8304                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8305                   VK4WM:$mask, f64mem:$src), 0, "att">;
8308 // Convert Double to Signed/Unsigned Doubleword
8309 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8310                            SDNode MaskOpNode, SDNode OpNodeRnd,
8311                            X86SchedWriteWidths sched> {
8312   let Predicates = [HasAVX512] in {
8313     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8314                             MaskOpNode, sched.ZMM>,
8315              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8316                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8317   }
8318   let Predicates = [HasVLX] in {
8319     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8320     // memory forms of these instructions in Asm Parcer. They have the same
8321     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8322     // due to the same reason.
8323     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8324                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8325                                VK2WM>, EVEX_V128;
8326     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8327                                MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8328   }
8330   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8331                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8332   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8333                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8334                   VK2WM:$mask, VR128X:$src), 0, "att">;
8335   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8336                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8337                   VK2WM:$mask, VR128X:$src), 0, "att">;
8338   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8339                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8340                   f64mem:$src), 0, "att">;
8341   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8342                   "$dst {${mask}}, ${src}{1to2}}",
8343                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8344                   VK2WM:$mask, f64mem:$src), 0, "att">;
8345   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8346                   "$dst {${mask}} {z}, ${src}{1to2}}",
8347                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8348                   VK2WM:$mask, f64mem:$src), 0, "att">;
8350   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8351                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8352   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8353                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8354                   VK4WM:$mask, VR256X:$src), 0, "att">;
8355   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8356                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8357                   VK4WM:$mask, VR256X:$src), 0, "att">;
8358   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8359                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8360                   f64mem:$src), 0, "att">;
8361   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8362                   "$dst {${mask}}, ${src}{1to4}}",
8363                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8364                   VK4WM:$mask, f64mem:$src), 0, "att">;
8365   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8366                   "$dst {${mask}} {z}, ${src}{1to4}}",
8367                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8368                   VK4WM:$mask, f64mem:$src), 0, "att">;
8371 // Convert Double to Signed/Unsigned Quardword
8372 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8373                            SDNode MaskOpNode, SDNode OpNodeRnd,
8374                            X86SchedWriteWidths sched> {
8375   let Predicates = [HasDQI] in {
8376     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8377                             MaskOpNode, sched.ZMM>,
8378              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8379                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8380   }
8381   let Predicates = [HasDQI, HasVLX] in {
8382     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8383                                MaskOpNode, sched.XMM>, EVEX_V128;
8384     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8385                                MaskOpNode, sched.YMM>, EVEX_V256;
8386   }
8389 // Convert Double to Signed/Unsigned Quardword with truncation
8390 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8391                             SDNode MaskOpNode, SDNode OpNodeRnd,
8392                             X86SchedWriteWidths sched> {
8393   let Predicates = [HasDQI] in {
8394     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8395                             MaskOpNode, sched.ZMM>,
8396              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8397                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8398   }
8399   let Predicates = [HasDQI, HasVLX] in {
8400     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8401                                MaskOpNode, sched.XMM>, EVEX_V128;
8402     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8403                                MaskOpNode, sched.YMM>, EVEX_V256;
8404   }
8407 // Convert Signed/Unsigned Quardword to Double
8408 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8409                            SDNode MaskOpNode, SDNode OpNodeRnd,
8410                            X86SchedWriteWidths sched> {
8411   let Predicates = [HasDQI] in {
8412     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8413                             MaskOpNode, sched.ZMM>,
8414              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8415                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8416   }
8417   let Predicates = [HasDQI, HasVLX] in {
8418     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8419                                MaskOpNode, sched.XMM>, EVEX_V128;
8420     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8421                                MaskOpNode, sched.YMM>, EVEX_V256;
8422   }
8425 // Convert Float to Signed/Unsigned Quardword
8426 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8427                            SDNode MaskOpNode, SDNode OpNodeRnd,
8428                            X86SchedWriteWidths sched> {
8429   let Predicates = [HasDQI] in {
8430     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8431                             MaskOpNode, sched.ZMM>,
8432              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8433                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8434   }
8435   let Predicates = [HasDQI, HasVLX] in {
8436     // Explicitly specified broadcast string, since we take only 2 elements
8437     // from v4f32x_info source
8438     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8439                                MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8440                                (v2i64 (OpNode (bc_v4f32
8441                                 (v2f64
8442                                  (scalar_to_vector (loadf64 addr:$src)))))),
8443                                (v2i64 (MaskOpNode (bc_v4f32
8444                                 (v2f64
8445                                  (scalar_to_vector (loadf64 addr:$src))))))>,
8446                                EVEX_V128;
8447     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8448                                MaskOpNode, sched.YMM>, EVEX_V256;
8449   }
8452 // Convert Float to Signed/Unsigned Quardword with truncation
8453 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8454                             SDNode MaskOpNode, SDNode OpNodeRnd,
8455                             X86SchedWriteWidths sched> {
8456   let Predicates = [HasDQI] in {
8457     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8458                             MaskOpNode, sched.ZMM>,
8459              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8460                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8461   }
8462   let Predicates = [HasDQI, HasVLX] in {
8463     // Explicitly specified broadcast string, since we take only 2 elements
8464     // from v4f32x_info source
8465     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8466                                MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8467                                (v2i64 (OpNode (bc_v4f32
8468                                 (v2f64
8469                                  (scalar_to_vector (loadf64 addr:$src)))))),
8470                                (v2i64 (MaskOpNode (bc_v4f32
8471                                 (v2f64
8472                                  (scalar_to_vector (loadf64 addr:$src))))))>,
8473                                EVEX_V128;
8474     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8475                                MaskOpNode, sched.YMM>, EVEX_V256;
8476   }
8479 // Convert Signed/Unsigned Quardword to Float
8480 // Also Convert Signed/Unsigned Doubleword to Half
8481 multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8482                                  SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8483                                  SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8484                                  AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8485                                  X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8486   let Predicates = [prd] in {
8487     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8488                             MaskOpNode, sched.ZMM>,
8489              avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8490                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8491   }
8492   let Predicates = [prd, HasVLX] in {
8493     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8494     // memory forms of these instructions in Asm Parcer. They have the same
8495     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8496     // due to the same reason.
8497     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8498                                null_frag, sched.XMM, _src.info128.BroadcastStr,
8499                                "{x}", i128mem, _src.info128.KRCWM>,
8500                                EVEX_V128;
8501     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8502                                MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8503                                "{y}">, EVEX_V256;
8505     // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8506     // patterns have been disabled with null_frag.
8507     def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8508               (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8509     def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8510                              _src.info128.KRCWM:$mask),
8511               (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8512     def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8513                              _src.info128.KRCWM:$mask),
8514               (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8516     def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8517               (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8518     def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8519                              _src.info128.KRCWM:$mask),
8520               (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8521     def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8522                              _src.info128.KRCWM:$mask),
8523               (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8525     def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8526               (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8527     def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8528                              (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8529               (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8530     def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8531                              _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8532               (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8533   }
8535   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8536                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8537                   VR128X:$src), 0, "att">;
8538   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8539                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8540                   VK2WM:$mask, VR128X:$src), 0, "att">;
8541   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8542                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8543                   VK2WM:$mask, VR128X:$src), 0, "att">;
8544   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8545                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8546                   i64mem:$src), 0, "att">;
8547   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8548                   "$dst {${mask}}, ${src}{1to2}}",
8549                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8550                   VK2WM:$mask, i64mem:$src), 0, "att">;
8551   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8552                   "$dst {${mask}} {z}, ${src}{1to2}}",
8553                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8554                   VK2WM:$mask, i64mem:$src), 0, "att">;
8556   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8557                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8558                   VR256X:$src), 0, "att">;
8559   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8560                   "$dst {${mask}}, $src}",
8561                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8562                   VK4WM:$mask, VR256X:$src), 0, "att">;
8563   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8564                   "$dst {${mask}} {z}, $src}",
8565                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8566                   VK4WM:$mask, VR256X:$src), 0, "att">;
8567   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8568                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8569                   i64mem:$src), 0, "att">;
8570   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8571                   "$dst {${mask}}, ${src}{1to4}}",
8572                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8573                   VK4WM:$mask, i64mem:$src), 0, "att">;
8574   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8575                   "$dst {${mask}} {z}, ${src}{1to4}}",
8576                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8577                   VK4WM:$mask, i64mem:$src), 0, "att">;
8580 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8581                                  X86any_VSintToFP, X86VSintToFP,
8582                                  SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8584 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8585                                 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8586                                 TB, EVEX_CD8<32, CD8VF>;
8588 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8589                                  X86cvttp2si, X86cvttp2siSAE,
8590                                  SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>;
8592 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8593                                  X86cvttp2si, X86cvttp2siSAE,
8594                                  SchedWriteCvtPD2DQ>,
8595                                  TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
8597 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8598                                  X86cvttp2ui, X86cvttp2uiSAE,
8599                                  SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>;
8601 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8602                                  X86cvttp2ui, X86cvttp2uiSAE,
8603                                  SchedWriteCvtPD2DQ>,
8604                                  TB, REX_W, EVEX_CD8<64, CD8VF>;
8606 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8607                                   uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8608                                   SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8610 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8611                                  uint_to_fp, X86VUintToFpRnd,
8612                                  SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>;
8614 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8615                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8616                                  EVEX_CD8<32, CD8VF>;
8618 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8619                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD,
8620                                  REX_W, EVEX_CD8<64, CD8VF>;
8622 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8623                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8624                                  TB, EVEX_CD8<32, CD8VF>;
8626 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8627                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8628                                  TB, EVEX_CD8<64, CD8VF>;
8630 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8631                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W,
8632                                  TB, PD, EVEX_CD8<64, CD8VF>;
8634 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8635                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8636                                  EVEX_CD8<32, CD8VH>;
8638 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8639                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8640                                  TB, PD, EVEX_CD8<64, CD8VF>;
8642 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8643                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8644                                  EVEX_CD8<32, CD8VH>;
8646 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8647                                  X86cvttp2si, X86cvttp2siSAE,
8648                                  SchedWriteCvtPD2DQ>, REX_W,
8649                                  TB, PD, EVEX_CD8<64, CD8VF>;
8651 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8652                                  X86cvttp2si, X86cvttp2siSAE,
8653                                  SchedWriteCvtPS2DQ>, TB, PD,
8654                                  EVEX_CD8<32, CD8VH>;
8656 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8657                                  X86cvttp2ui, X86cvttp2uiSAE,
8658                                  SchedWriteCvtPD2DQ>, REX_W,
8659                                  TB, PD, EVEX_CD8<64, CD8VF>;
8661 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8662                                  X86cvttp2ui, X86cvttp2uiSAE,
8663                                  SchedWriteCvtPS2DQ>, TB, PD,
8664                                  EVEX_CD8<32, CD8VH>;
8666 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8667                             sint_to_fp, X86VSintToFpRnd,
8668                             SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8670 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8671                             uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8672                             REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8674 defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8675                             X86any_VSintToFP, X86VMSintToFP,
8676                             X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8677                             SchedWriteCvtDQ2PS, HasFP16>,
8678                             T_MAP5, EVEX_CD8<32, CD8VF>;
8680 defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8681                             X86any_VUintToFP, X86VMUintToFP,
8682                             X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8683                             SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD,
8684                             EVEX_CD8<32, CD8VF>;
8686 defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8687                             X86any_VSintToFP, X86VMSintToFP,
8688                             X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8689                             SchedWriteCvtDQ2PS>, REX_W, TB,
8690                             EVEX_CD8<64, CD8VF>;
8692 defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8693                             X86any_VUintToFP, X86VMUintToFP,
8694                             X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8695                             SchedWriteCvtDQ2PS>, REX_W, TB, XD,
8696                             EVEX_CD8<64, CD8VF>;
8698 let Predicates = [HasVLX] in {
8699   // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8700   // patterns have been disabled with null_frag.
8701   def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8702             (VCVTPD2DQZ128rr VR128X:$src)>;
8703   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8704                           VK2WM:$mask),
8705             (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8706   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8707                           VK2WM:$mask),
8708             (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8710   def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8711             (VCVTPD2DQZ128rm addr:$src)>;
8712   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8713                           VK2WM:$mask),
8714             (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8715   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8716                           VK2WM:$mask),
8717             (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8719   def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8720             (VCVTPD2DQZ128rmb addr:$src)>;
8721   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8722                           (v4i32 VR128X:$src0), VK2WM:$mask),
8723             (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8724   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8725                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8726             (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8728   // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8729   // patterns have been disabled with null_frag.
8730   def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8731             (VCVTTPD2DQZ128rr VR128X:$src)>;
8732   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8733                           VK2WM:$mask),
8734             (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8735   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8736                           VK2WM:$mask),
8737             (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8739   def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8740             (VCVTTPD2DQZ128rm addr:$src)>;
8741   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8742                           VK2WM:$mask),
8743             (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8744   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8745                           VK2WM:$mask),
8746             (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8748   def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8749             (VCVTTPD2DQZ128rmb addr:$src)>;
8750   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8751                           (v4i32 VR128X:$src0), VK2WM:$mask),
8752             (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8753   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8754                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8755             (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8757   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8758   // patterns have been disabled with null_frag.
8759   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8760             (VCVTPD2UDQZ128rr VR128X:$src)>;
8761   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8762                            VK2WM:$mask),
8763             (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8764   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8765                            VK2WM:$mask),
8766             (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8768   def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8769             (VCVTPD2UDQZ128rm addr:$src)>;
8770   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8771                            VK2WM:$mask),
8772             (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8773   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8774                            VK2WM:$mask),
8775             (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8777   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8778             (VCVTPD2UDQZ128rmb addr:$src)>;
8779   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8780                            (v4i32 VR128X:$src0), VK2WM:$mask),
8781             (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8782   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8783                            v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8784             (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8786   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8787   // patterns have been disabled with null_frag.
8788   def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8789             (VCVTTPD2UDQZ128rr VR128X:$src)>;
8790   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8791                           VK2WM:$mask),
8792             (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8793   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8794                           VK2WM:$mask),
8795             (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8797   def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8798             (VCVTTPD2UDQZ128rm addr:$src)>;
8799   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8800                           VK2WM:$mask),
8801             (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8802   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8803                           VK2WM:$mask),
8804             (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8806   def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8807             (VCVTTPD2UDQZ128rmb addr:$src)>;
8808   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8809                           (v4i32 VR128X:$src0), VK2WM:$mask),
8810             (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8811   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8812                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8813             (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8815   def : Pat<(v4i32 (lrint VR128X:$src)), (VCVTPS2DQZ128rr VR128X:$src)>;
8816   def : Pat<(v4i32 (lrint (loadv4f32 addr:$src))), (VCVTPS2DQZ128rm addr:$src)>;
8817   def : Pat<(v8i32 (lrint VR256X:$src)), (VCVTPS2DQZ256rr VR256X:$src)>;
8818   def : Pat<(v8i32 (lrint (loadv8f32 addr:$src))), (VCVTPS2DQZ256rm addr:$src)>;
8819   def : Pat<(v4i32 (lrint VR256X:$src)), (VCVTPD2DQZ256rr VR256X:$src)>;
8820   def : Pat<(v4i32 (lrint (loadv4f64 addr:$src))), (VCVTPD2DQZ256rm addr:$src)>;
8822 def : Pat<(v16i32 (lrint VR512:$src)), (VCVTPS2DQZrr VR512:$src)>;
8823 def : Pat<(v16i32 (lrint (loadv16f32 addr:$src))), (VCVTPS2DQZrm addr:$src)>;
8824 def : Pat<(v8i32 (lrint VR512:$src)), (VCVTPD2DQZrr VR512:$src)>;
8825 def : Pat<(v8i32 (lrint (loadv8f64 addr:$src))), (VCVTPD2DQZrm addr:$src)>;
8827 let Predicates = [HasDQI, HasVLX] in {
8828   def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8829             (VCVTPS2QQZ128rm addr:$src)>;
8830   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8831                                  (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8832                                  VR128X:$src0)),
8833             (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8834   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8835                                  (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8836                                  v2i64x_info.ImmAllZerosV)),
8837             (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8839   def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8840             (VCVTPS2UQQZ128rm addr:$src)>;
8841   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8842                                  (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8843                                  VR128X:$src0)),
8844             (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8845   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8846                                  (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8847                                  v2i64x_info.ImmAllZerosV)),
8848             (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8850   def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8851             (VCVTTPS2QQZ128rm addr:$src)>;
8852   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8853                                  (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8854                                  VR128X:$src0)),
8855             (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8856   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8857                                  (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8858                                  v2i64x_info.ImmAllZerosV)),
8859             (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8861   def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8862             (VCVTTPS2UQQZ128rm addr:$src)>;
8863   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8864                                  (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8865                                  VR128X:$src0)),
8866             (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8867   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8868                                  (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8869                                  v2i64x_info.ImmAllZerosV)),
8870             (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8872   def : Pat<(v4i64 (lrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>;
8873   def : Pat<(v4i64 (lrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>;
8874   def : Pat<(v4i64 (llrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>;
8875   def : Pat<(v4i64 (llrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>;
8876   def : Pat<(v2i64 (lrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>;
8877   def : Pat<(v2i64 (lrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>;
8878   def : Pat<(v4i64 (lrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>;
8879   def : Pat<(v4i64 (lrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>;
8880   def : Pat<(v2i64 (llrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>;
8881   def : Pat<(v2i64 (llrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>;
8882   def : Pat<(v4i64 (llrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>;
8883   def : Pat<(v4i64 (llrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>;
8886 let Predicates = [HasDQI] in {
8887   def : Pat<(v8i64 (lrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>;
8888   def : Pat<(v8i64 (lrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>;
8889   def : Pat<(v8i64 (llrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>;
8890   def : Pat<(v8i64 (llrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>;
8891   def : Pat<(v8i64 (lrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>;
8892   def : Pat<(v8i64 (lrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>;
8893   def : Pat<(v8i64 (llrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>;
8894   def : Pat<(v8i64 (llrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>;
8897 let Predicates = [HasVLX] in {
8898   def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8899             (VCVTDQ2PDZ128rm addr:$src)>;
8900   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8901                                  (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8902                                  VR128X:$src0)),
8903             (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8904   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8905                                  (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8906                                  v2f64x_info.ImmAllZerosV)),
8907             (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8909   def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8910             (VCVTUDQ2PDZ128rm addr:$src)>;
8911   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8912                                  (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8913                                  VR128X:$src0)),
8914             (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8915   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8916                                  (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8917                                  v2f64x_info.ImmAllZerosV)),
8918             (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8921 //===----------------------------------------------------------------------===//
8922 // Half precision conversion instructions
8923 //===----------------------------------------------------------------------===//
8925 let Uses = [MXCSR], mayRaiseFPException = 1 in
8926 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8927                            X86MemOperand x86memop, dag ld_dag,
8928                            X86FoldableSchedWrite sched> {
8929   defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8930                             (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8931                             (X86any_cvtph2ps (_src.VT _src.RC:$src)),
8932                             (X86cvtph2ps (_src.VT _src.RC:$src))>,
8933                             T8, PD, Sched<[sched]>;
8934   defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8935                             (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8936                             (X86any_cvtph2ps (_src.VT ld_dag)),
8937                             (X86cvtph2ps (_src.VT ld_dag))>,
8938                             T8, PD, Sched<[sched.Folded]>;
8941 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8942                                X86FoldableSchedWrite sched> {
8943   let Uses = [MXCSR] in
8944   defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8945                              (ins _src.RC:$src), "vcvtph2ps",
8946                              "{sae}, $src", "$src, {sae}",
8947                              (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8948                              T8, PD, EVEX_B, Sched<[sched]>;
8951 let Predicates = [HasAVX512] in
8952   defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
8953                                     (load addr:$src), WriteCvtPH2PSZ>,
8954                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8955                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8957 let Predicates = [HasVLX] in {
8958   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8959                        (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
8960                        EVEX_CD8<32, CD8VH>;
8961   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8962                        (bitconvert (v2i64 (X86vzload64 addr:$src))),
8963                        WriteCvtPH2PS>, EVEX, EVEX_V128,
8964                        EVEX_CD8<32, CD8VH>;
8966   // Pattern match vcvtph2ps of a scalar i64 load.
8967   def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
8968               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8969             (VCVTPH2PSZ128rm addr:$src)>;
8972 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8973                            X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8974 let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8975   def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8976              (ins _src.RC:$src1, i32u8imm:$src2),
8977              "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8978              [(set _dest.RC:$dst,
8979                    (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8980              Sched<[RR]>;
8981   let Constraints = "$src0 = $dst" in
8982   def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8983              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8984              "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8985              [(set _dest.RC:$dst,
8986                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8987                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
8988              Sched<[RR]>, EVEX_K;
8989   def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8990              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8991              "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8992              [(set _dest.RC:$dst,
8993                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8994                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8995              Sched<[RR]>, EVEX_KZ;
8996   let hasSideEffects = 0, mayStore = 1 in {
8997     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8998                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8999                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9000                Sched<[MR]>;
9001     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
9002                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9003                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
9004                 EVEX_K, Sched<[MR]>;
9005   }
9009 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9010                                SchedWrite Sched> {
9011   let hasSideEffects = 0, Uses = [MXCSR] in {
9012     def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9013               (ins _src.RC:$src1, i32u8imm:$src2),
9014               "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}",
9015               [(set _dest.RC:$dst,
9016                     (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9017               EVEX_B, Sched<[Sched]>;
9018     let Constraints = "$src0 = $dst" in
9019     def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9020               (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9021               "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}",
9022               [(set _dest.RC:$dst,
9023                     (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9024                                   _dest.RC:$src0, _src.KRCWM:$mask))]>,
9025               EVEX_B, Sched<[Sched]>, EVEX_K;
9026     def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9027               (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9028               "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}",
9029               [(set _dest.RC:$dst,
9030                     (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9031                                   _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9032               EVEX_B, Sched<[Sched]>, EVEX_KZ;
9036 let Predicates = [HasAVX512] in {
9037   defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9038                                     WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9039                     avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9040                                         EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9042   def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9043             (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9046 let Predicates = [HasVLX] in {
9047   defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9048                                        WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9049                                        EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9050   defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9051                                        WriteCvtPS2PH, WriteCvtPS2PHSt>,
9052                                        EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9054   def : Pat<(store (f64 (extractelt
9055                          (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9056                          (iPTR 0))), addr:$dst),
9057             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9058   def : Pat<(store (i64 (extractelt
9059                          (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9060                          (iPTR 0))), addr:$dst),
9061             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9062   def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9063             (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9066 //  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9067 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9068                               string OpcodeStr, Domain d,
9069                               X86FoldableSchedWrite sched = WriteFComX> {
9070   let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9071   def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9072                   !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9073                   EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9076 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9077   defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9078                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9079   defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9080                                    AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9081   defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9082                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9083   defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9084                                    AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9087 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9088   defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9089                                  "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9090                                  EVEX_CD8<32, CD8VT1>;
9091   defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9092                                   "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9093                                   VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9094   defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9095                                  "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9096                                  EVEX_CD8<32, CD8VT1>;
9097   defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9098                                  "comisd", SSEPackedDouble>, TB, PD, EVEX,
9099                                   VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9100   let isCodeGenOnly = 1 in {
9101     defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9102                           sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9103                           EVEX_CD8<32, CD8VT1>;
9104     defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9105                           sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9106                           VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9108     defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9109                           sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9110                           EVEX_CD8<32, CD8VT1>;
9111     defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9112                           sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX,
9113                           VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9114   }
9117 let Defs = [EFLAGS], Predicates = [HasFP16] in {
9118   defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9119                                 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9120                                 EVEX_CD8<16, CD8VT1>;
9121   defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9122                                 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9123                                 EVEX_CD8<16, CD8VT1>;
9124   defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9125                                 "ucomish", SSEPackedSingle>, T_MAP5, EVEX,
9126                                 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9127   defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9128                                 "comish", SSEPackedSingle>, T_MAP5, EVEX,
9129                                 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9130   let isCodeGenOnly = 1 in {
9131     defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9132                                 sse_load_f16, "ucomish", SSEPackedSingle>,
9133                                 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9135     defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9136                                 sse_load_f16, "comish", SSEPackedSingle>,
9137                                 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9138   }
9141 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9142 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9143                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
9144                          Predicate prd = HasAVX512> {
9145   let Predicates = [prd], ExeDomain = _.ExeDomain in {
9146   defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9147                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9148                            "$src2, $src1", "$src1, $src2",
9149                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9150                            EVEX, VVVV, VEX_LIG, Sched<[sched]>;
9151   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9152                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9153                          "$src2, $src1", "$src1, $src2",
9154                          (OpNode (_.VT _.RC:$src1),
9155                           (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG,
9156                           Sched<[sched.Folded, sched.ReadAfterFold]>;
9160 defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9161                                f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9162                                T_MAP6, PD;
9163 defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9164                                  SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9165                                  EVEX_CD8<16, CD8VT1>, T_MAP6, PD;
9166 let Uses = [MXCSR] in {
9167 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9168                                f32x_info>, EVEX_CD8<32, CD8VT1>,
9169                                T8, PD;
9170 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9171                                f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>,
9172                                T8, PD;
9173 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9174                                  SchedWriteFRsqrt.Scl, f32x_info>,
9175                                  EVEX_CD8<32, CD8VT1>, T8, PD;
9176 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9177                                  SchedWriteFRsqrt.Scl, f64x_info>, REX_W,
9178                                  EVEX_CD8<64, CD8VT1>, T8, PD;
9181 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9182 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9183                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9184   let ExeDomain = _.ExeDomain in {
9185   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9186                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
9187                          (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD,
9188                          Sched<[sched]>;
9189   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9190                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9191                          (OpNode (_.VT
9192                            (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD,
9193                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9194   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9195                           (ins _.ScalarMemOp:$src), OpcodeStr,
9196                           "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9197                           (OpNode (_.VT
9198                             (_.BroadcastLdFrag addr:$src)))>,
9199                           EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9200   }
9203 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9204                                 X86SchedWriteWidths sched> {
9205   let Uses = [MXCSR] in {
9206   defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9207                              v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9208   defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9209                              v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9210   }
9211   let Predicates = [HasFP16] in
9212   defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9213                            v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>;
9215   // Define only if AVX512VL feature is present.
9216   let Predicates = [HasVLX], Uses = [MXCSR] in {
9217     defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9218                                   OpNode, sched.XMM, v4f32x_info>,
9219                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
9220     defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9221                                   OpNode, sched.YMM, v8f32x_info>,
9222                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
9223     defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9224                                   OpNode, sched.XMM, v2f64x_info>,
9225                                   EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>;
9226     defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9227                                   OpNode, sched.YMM, v4f64x_info>,
9228                                   EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>;
9229   }
9230   let Predicates = [HasFP16, HasVLX] in {
9231     defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9232                                 OpNode, sched.XMM, v8f16x_info>,
9233                                 EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>;
9234     defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9235                                 OpNode, sched.YMM, v16f16x_info>,
9236                                 EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>;
9237   }
9240 defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9241 defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9243 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9244 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9245                          SDNode OpNode, SDNode OpNodeSAE,
9246                          X86FoldableSchedWrite sched> {
9247   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9248   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9249                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9250                            "$src2, $src1", "$src1, $src2",
9251                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9252                            Sched<[sched]>, SIMD_EXC;
9254   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9255                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9256                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9257                             (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9258                             EVEX_B, Sched<[sched]>;
9260   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9261                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9262                          "$src2, $src1", "$src1, $src2",
9263                          (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9264                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9265   }
9268 multiclass avx512_fp28_s_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9269                              X86FoldableSchedWrite sched> {
9270   let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in {
9271   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9272                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9273                            "$src2, $src1", "$src1, $src2",
9274                            (null_frag)>, Sched<[sched]>, SIMD_EXC;
9275   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9276                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9277                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9278                             (null_frag)>, EVEX_B, Sched<[sched]>;
9279   let mayLoad = 1 in
9280   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9281                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9282                          "$src2, $src1", "$src1, $src2",
9283                          (null_frag)>,
9284                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9285   }
9288 multiclass avx512_eri_s_ass<bits<8> opc, string OpcodeStr,
9289                             X86FoldableSchedWrite sched> {
9290   defm SSZ : avx512_fp28_s_ass<opc, OpcodeStr#"ss", f32x_info, sched>,
9291              EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
9292   defm SDZ : avx512_fp28_s_ass<opc, OpcodeStr#"sd", f64x_info, sched>,
9293              EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
9296 defm VRCP28   : avx512_eri_s_ass<0xCB, "vrcp28", SchedWriteFRcp.Scl>;
9297 defm VRSQRT28 : avx512_eri_s_ass<0xCD, "vrsqrt28", SchedWriteFRsqrt.Scl>;
9299 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9300                         SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9301   defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9302                            sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
9303   defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9304                            sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
9307 multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9308                         SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9309   let Predicates = [HasFP16] in
9310   defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode,  OpNodeSAE, sched>,
9311                EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV;
9314 defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9315                               SchedWriteFRnd.Scl>,
9316                  avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9317                                   SchedWriteFRnd.Scl>;
9318 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9320 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9321                          SDNode OpNode, X86FoldableSchedWrite sched> {
9322   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9323   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9324                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
9325                          (OpNode (_.VT _.RC:$src))>,
9326                          Sched<[sched]>;
9328   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9329                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9330                          (OpNode (_.VT
9331                              (bitconvert (_.LdFrag addr:$src))))>,
9332                           Sched<[sched.Folded, sched.ReadAfterFold]>;
9334   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9335                          (ins _.ScalarMemOp:$src), OpcodeStr,
9336                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9337                          (OpNode (_.VT
9338                                   (_.BroadcastLdFrag addr:$src)))>,
9339                          EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9340   }
9342 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9343                          SDNode OpNode, X86FoldableSchedWrite sched> {
9344   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9345   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9346                         (ins _.RC:$src), OpcodeStr,
9347                         "{sae}, $src", "$src, {sae}",
9348                         (OpNode (_.VT _.RC:$src))>,
9349                         EVEX_B, Sched<[sched]>;
9352 multiclass avx512_fp28_p_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9353                              X86FoldableSchedWrite sched> {
9354   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1,
9355     hasSideEffects = 0 in {
9356   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9357                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
9358                          (null_frag)>, Sched<[sched]>;
9359   let mayLoad = 1 in
9360   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9361                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9362                          (null_frag)>,
9363                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9364   let mayLoad = 1 in
9365   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9366                          (ins _.ScalarMemOp:$src), OpcodeStr,
9367                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9368                          (null_frag)>,
9369                          EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9370   }
9372 multiclass avx512_fp28_p_sae_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9373                                 X86FoldableSchedWrite sched> {
9374   let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in
9375   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9376                         (ins _.RC:$src), OpcodeStr,
9377                         "{sae}, $src", "$src, {sae}",
9378                         (null_frag)>, Sched<[sched]>, EVEX_B;
9381 multiclass  avx512_eri_ass<bits<8> opc, string OpcodeStr,
9382                            X86SchedWriteWidths sched> {
9383    defm PSZ : avx512_fp28_p_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>,
9384               avx512_fp28_p_sae_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>,
9385               T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9386    defm PDZ : avx512_fp28_p_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>,
9387               avx512_fp28_p_sae_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>,
9388               T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9391 defm VRSQRT28 : avx512_eri_ass<0xCC, "vrsqrt28", SchedWriteFRsqrt>, EVEX;
9392 defm VRCP28   : avx512_eri_ass<0xCA, "vrcp28", SchedWriteFRcp>, EVEX;
9393 defm VEXP2    : avx512_eri_ass<0xC8, "vexp2", SchedWriteFAdd>, EVEX;
9395 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9396                        SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9397    defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9398               avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9399               T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9400    defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9401               avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9402               T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9405 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9406                                   SDNode OpNode, X86SchedWriteWidths sched> {
9407   // Define only if AVX512VL feature is present.
9408   let Predicates = [HasVLX] in {
9409     defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9410                                 sched.XMM>,
9411                                 EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>;
9412     defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9413                                 sched.YMM>,
9414                                 EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>;
9415     defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9416                                 sched.XMM>,
9417                                 EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9418     defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9419                                 sched.YMM>,
9420                                 EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9421   }
9424 multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9425                        SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9426   let Predicates = [HasFP16] in
9427   defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9428               avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9429               T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9430   let Predicates = [HasFP16, HasVLX] in {
9431     defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9432                                      EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9433     defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9434                                      EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9435   }
9437 defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9438                             SchedWriteFRnd>,
9439                  avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9440                                      SchedWriteFRnd>,
9441                  avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9442                                           SchedWriteFRnd>, EVEX;
9444 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9445                                     X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9446   let ExeDomain = _.ExeDomain in
9447   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9448                          (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9449                          (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9450                          EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9453 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9454                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9455   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9456   defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9457                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
9458                          (_.VT (any_fsqrt _.RC:$src)),
9459                          (_.VT (fsqrt _.RC:$src))>, EVEX,
9460                          Sched<[sched]>;
9461   defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9462                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9463                          (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9464                          (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9465                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9466   defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9467                           (ins _.ScalarMemOp:$src), OpcodeStr,
9468                           "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9469                           (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9470                           (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9471                           EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9472   }
9475 let Uses = [MXCSR], mayRaiseFPException = 1 in
9476 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9477                                   X86SchedWriteSizes sched> {
9478   let Predicates = [HasFP16] in
9479   defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9480                                 sched.PH.ZMM, v32f16_info>,
9481                                 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9482   let Predicates = [HasFP16, HasVLX] in {
9483     defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9484                                      sched.PH.XMM, v8f16x_info>,
9485                                      EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>;
9486     defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9487                                      sched.PH.YMM, v16f16x_info>,
9488                                      EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>;
9489   }
9490   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9491                                 sched.PS.ZMM, v16f32_info>,
9492                                 EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9493   defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9494                                 sched.PD.ZMM, v8f64_info>,
9495                                 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9496   // Define only if AVX512VL feature is present.
9497   let Predicates = [HasVLX] in {
9498     defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9499                                      sched.PS.XMM, v4f32x_info>,
9500                                      EVEX_V128, TB, EVEX_CD8<32, CD8VF>;
9501     defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9502                                      sched.PS.YMM, v8f32x_info>,
9503                                      EVEX_V256, TB, EVEX_CD8<32, CD8VF>;
9504     defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9505                                      sched.PD.XMM, v2f64x_info>,
9506                                      EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9507     defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9508                                      sched.PD.YMM, v4f64x_info>,
9509                                      EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9510   }
9513 let Uses = [MXCSR] in
9514 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9515                                         X86SchedWriteSizes sched> {
9516   let Predicates = [HasFP16] in
9517   defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9518                                       sched.PH.ZMM, v32f16_info>,
9519                                       EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9520   defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9521                                       sched.PS.ZMM, v16f32_info>,
9522                                       EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9523   defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9524                                       sched.PD.ZMM, v8f64_info>,
9525                                       EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9528 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9529                               X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9530   let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9531     defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9532                          (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9533                          "$src2, $src1", "$src1, $src2",
9534                          (X86fsqrts (_.VT _.RC:$src1),
9535                                     (_.VT _.RC:$src2))>,
9536                          Sched<[sched]>, SIMD_EXC;
9537     defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9538                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9539                          "$src2, $src1", "$src1, $src2",
9540                          (X86fsqrts (_.VT _.RC:$src1),
9541                                     (_.ScalarIntMemFrags addr:$src2))>,
9542                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9543     let Uses = [MXCSR] in
9544     defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9545                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9546                          "$rc, $src2, $src1", "$src1, $src2, $rc",
9547                          (X86fsqrtRnds (_.VT _.RC:$src1),
9548                                      (_.VT _.RC:$src2),
9549                                      (i32 timm:$rc))>,
9550                          EVEX_B, EVEX_RC, Sched<[sched]>;
9552     let isCodeGenOnly = 1, hasSideEffects = 0 in {
9553       def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9554                 (ins _.FRC:$src1, _.FRC:$src2),
9555                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9556                 Sched<[sched]>, SIMD_EXC;
9557       let mayLoad = 1 in
9558         def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9559                   (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9560                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9561                   Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9562     }
9563   }
9565   let Predicates = [prd] in {
9566     def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9567               (!cast<Instruction>(Name#Zr)
9568                   (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9569   }
9571   let Predicates = [prd, OptForSize] in {
9572     def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9573               (!cast<Instruction>(Name#Zm)
9574                   (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9575   }
9578 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9579                                   X86SchedWriteSizes sched> {
9580   defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9581                         EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS;
9582   defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9583                         EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS;
9584   defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9585                         EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W;
9588 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9589              avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9591 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9593 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9594                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9595   let ExeDomain = _.ExeDomain in {
9596   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9597                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9598                            "$src3, $src2, $src1", "$src1, $src2, $src3",
9599                            (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9600                            (i32 timm:$src3)))>,
9601                            Sched<[sched]>, SIMD_EXC;
9603   let Uses = [MXCSR] in
9604   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9605                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9606                          "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9607                          (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9608                          (i32 timm:$src3)))>, EVEX_B,
9609                          Sched<[sched]>;
9611   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9612                          (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9613                          OpcodeStr,
9614                          "$src3, $src2, $src1", "$src1, $src2, $src3",
9615                          (_.VT (X86RndScales _.RC:$src1,
9616                                 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9617                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9619   let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9620     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9621                (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9622                OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9623                []>, Sched<[sched]>, SIMD_EXC;
9625     let mayLoad = 1 in
9626       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9627                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9628                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9629                  []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9630   }
9631   }
9633   let Predicates = [HasAVX512] in {
9634     def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9635               (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9636                _.FRC:$src1, timm:$src2))>;
9637   }
9639   let Predicates = [HasAVX512, OptForSize] in {
9640     def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9641               (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9642                addr:$src1, timm:$src2))>;
9643   }
9646 let Predicates = [HasFP16] in
9647 defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9648                                            SchedWriteFRnd.Scl, f16x_info>,
9649                                            AVX512PSIi8Base, TA, EVEX, VVVV,
9650                                            EVEX_CD8<16, CD8VT1>;
9652 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9653                                            SchedWriteFRnd.Scl, f32x_info>,
9654                                            AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9655                                            EVEX_CD8<32, CD8VT1>;
9657 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9658                                            SchedWriteFRnd.Scl, f64x_info>,
9659                                            REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9660                                            EVEX_CD8<64, CD8VT1>;
9662 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9663                                 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9664                                 dag OutMask, Predicate BasePredicate> {
9665   let Predicates = [BasePredicate] in {
9666     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9667                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9668                (extractelt _.VT:$dst, (iPTR 0))))),
9669               (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9670                _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9672     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9673                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9674                ZeroFP))),
9675               (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9676                OutMask, _.VT:$src2, _.VT:$src1)>;
9677   }
9680 defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9681                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9682                             fp16imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasFP16>;
9683 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9684                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9685                             fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9686 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9687                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9688                             fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9691 //-------------------------------------------------
9692 // Integer truncate and extend operations
9693 //-------------------------------------------------
9695 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9696                               SDPatternOperator MaskNode,
9697                               X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9698                               X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9699   let ExeDomain = DestInfo.ExeDomain in {
9700   def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9701              (ins SrcInfo.RC:$src),
9702              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9703              [(set DestInfo.RC:$dst,
9704                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9705              EVEX, Sched<[sched]>;
9706   let Constraints = "$src0 = $dst" in
9707   def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9708              (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9709              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9710              [(set DestInfo.RC:$dst,
9711                    (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9712                              (DestInfo.VT DestInfo.RC:$src0),
9713                              SrcInfo.KRCWM:$mask))]>,
9714              EVEX, EVEX_K, Sched<[sched]>;
9715   def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9716              (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9717              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9718              [(set DestInfo.RC:$dst,
9719                    (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9720                              DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9721              EVEX, EVEX_KZ, Sched<[sched]>;
9722   }
9724   let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9725     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9726                (ins x86memop:$dst, SrcInfo.RC:$src),
9727                OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9728                EVEX, Sched<[sched.Folded]>;
9730     def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9731                (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9732                OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9733                EVEX, EVEX_K, Sched<[sched.Folded]>;
9734   }//mayStore = 1, hasSideEffects = 0
9737 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9738                                     PatFrag truncFrag, PatFrag mtruncFrag,
9739                                     string Name> {
9741   def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9742             (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9743                                     addr:$dst, SrcInfo.RC:$src)>;
9745   def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9746                         SrcInfo.KRCWM:$mask),
9747             (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9748                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9751 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9752                         SDNode OpNode256, SDNode OpNode512,
9753                         SDPatternOperator MaskNode128,
9754                         SDPatternOperator MaskNode256,
9755                         SDPatternOperator MaskNode512,
9756                         X86SchedWriteWidths sched,
9757                         AVX512VLVectorVTInfo VTSrcInfo,
9758                         X86VectorVTInfo DestInfoZ128,
9759                         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9760                         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9761                         X86MemOperand x86memopZ, PatFrag truncFrag,
9762                         PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9764   let Predicates = [HasVLX, prd] in {
9765     defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
9766                              VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9767                 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9768                                          mtruncFrag, NAME>, EVEX_V128;
9770     defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
9771                              VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9772                 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9773                                          mtruncFrag, NAME>, EVEX_V256;
9774   }
9775   let Predicates = [prd] in
9776     defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
9777                              VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9778                 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9779                                          mtruncFrag, NAME>, EVEX_V512;
9782 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9783                            X86SchedWriteWidths sched, PatFrag StoreNode,
9784                            PatFrag MaskedStoreNode, SDNode InVecNode,
9785                            SDPatternOperator InVecMaskNode> {
9786   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9787                           InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9788                           avx512vl_i64_info, v16i8x_info, v16i8x_info,
9789                           v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9790                           MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9793 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9794                            SDPatternOperator MaskNode,
9795                            X86SchedWriteWidths sched, PatFrag StoreNode,
9796                            PatFrag MaskedStoreNode, SDNode InVecNode,
9797                            SDPatternOperator InVecMaskNode> {
9798   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9799                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9800                           avx512vl_i64_info, v8i16x_info, v8i16x_info,
9801                           v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9802                           MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9805 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9806                            SDPatternOperator MaskNode,
9807                            X86SchedWriteWidths sched, PatFrag StoreNode,
9808                            PatFrag MaskedStoreNode, SDNode InVecNode,
9809                            SDPatternOperator InVecMaskNode> {
9810   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9811                           InVecMaskNode, MaskNode, MaskNode, sched,
9812                           avx512vl_i64_info, v4i32x_info, v4i32x_info,
9813                           v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9814                           MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9817 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9818                            SDPatternOperator MaskNode,
9819                            X86SchedWriteWidths sched, PatFrag StoreNode,
9820                            PatFrag MaskedStoreNode, SDNode InVecNode,
9821                            SDPatternOperator InVecMaskNode> {
9822   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9823                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9824                           avx512vl_i32_info, v16i8x_info, v16i8x_info,
9825                           v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9826                           MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9829 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9830                            SDPatternOperator MaskNode,
9831                            X86SchedWriteWidths sched, PatFrag StoreNode,
9832                            PatFrag MaskedStoreNode, SDNode InVecNode,
9833                            SDPatternOperator InVecMaskNode> {
9834   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9835                           InVecMaskNode, MaskNode, MaskNode, sched,
9836                           avx512vl_i32_info, v8i16x_info, v8i16x_info,
9837                           v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9838                           MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9841 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9842                            SDPatternOperator MaskNode,
9843                            X86SchedWriteWidths sched, PatFrag StoreNode,
9844                            PatFrag MaskedStoreNode, SDNode InVecNode,
9845                            SDPatternOperator InVecMaskNode> {
9846   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9847                           InVecMaskNode, MaskNode, MaskNode, sched,
9848                           avx512vl_i16_info, v16i8x_info, v16i8x_info,
9849                           v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9850                           MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9853 defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",
9854                                   SchedWriteVecTruncate, truncstorevi8,
9855                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9856 defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",
9857                                   SchedWriteVecTruncate, truncstore_s_vi8,
9858                                   masked_truncstore_s_vi8, X86vtruncs,
9859                                   X86vmtruncs>;
9860 defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb",
9861                                   SchedWriteVecTruncate, truncstore_us_vi8,
9862                                   masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
9864 defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9865                                   SchedWriteVecTruncate, truncstorevi16,
9866                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9867 defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9868                                   SchedWriteVecTruncate, truncstore_s_vi16,
9869                                   masked_truncstore_s_vi16, X86vtruncs,
9870                                   X86vmtruncs>;
9871 defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9872                                   select_truncus, SchedWriteVecTruncate,
9873                                   truncstore_us_vi16, masked_truncstore_us_vi16,
9874                                   X86vtruncus, X86vmtruncus>;
9876 defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9877                                   SchedWriteVecTruncate, truncstorevi32,
9878                                   masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9879 defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9880                                   SchedWriteVecTruncate, truncstore_s_vi32,
9881                                   masked_truncstore_s_vi32, X86vtruncs,
9882                                   X86vmtruncs>;
9883 defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9884                                   select_truncus, SchedWriteVecTruncate,
9885                                   truncstore_us_vi32, masked_truncstore_us_vi32,
9886                                   X86vtruncus, X86vmtruncus>;
9888 defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9889                                   SchedWriteVecTruncate, truncstorevi8,
9890                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9891 defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9892                                   SchedWriteVecTruncate, truncstore_s_vi8,
9893                                   masked_truncstore_s_vi8, X86vtruncs,
9894                                   X86vmtruncs>;
9895 defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9896                                   select_truncus, SchedWriteVecTruncate,
9897                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9898                                   X86vtruncus, X86vmtruncus>;
9900 defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9901                                   SchedWriteVecTruncate, truncstorevi16,
9902                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9903 defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9904                                   SchedWriteVecTruncate, truncstore_s_vi16,
9905                                   masked_truncstore_s_vi16, X86vtruncs,
9906                                   X86vmtruncs>;
9907 defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9908                                   select_truncus, SchedWriteVecTruncate,
9909                                   truncstore_us_vi16, masked_truncstore_us_vi16,
9910                                   X86vtruncus, X86vmtruncus>;
9912 defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9913                                   SchedWriteVecTruncate, truncstorevi8,
9914                                   masked_truncstorevi8, X86vtrunc,
9915                                   X86vmtrunc>;
9916 defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9917                                   SchedWriteVecTruncate, truncstore_s_vi8,
9918                                   masked_truncstore_s_vi8, X86vtruncs,
9919                                   X86vmtruncs>;
9920 defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9921                                   select_truncus, SchedWriteVecTruncate,
9922                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9923                                   X86vtruncus, X86vmtruncus>;
9925 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
9926 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9927          (v8i16 (EXTRACT_SUBREG
9928                  (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9929                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9930 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9931          (v4i32 (EXTRACT_SUBREG
9932                  (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9933                                            VR256X:$src, sub_ymm)))), sub_xmm))>;
9936 let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
9937 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9938          (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9939                                             VR256X:$src, sub_ymm))), sub_xmm))>;
9942 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9943 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9944                            X86VectorVTInfo DestInfo,
9945                            X86VectorVTInfo SrcInfo> {
9946   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9947                                  DestInfo.RC:$src0,
9948                                  SrcInfo.KRCWM:$mask)),
9949             (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9950                                                  SrcInfo.KRCWM:$mask,
9951                                                  SrcInfo.RC:$src)>;
9953   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9954                                  DestInfo.ImmAllZerosV,
9955                                  SrcInfo.KRCWM:$mask)),
9956             (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9957                                                   SrcInfo.RC:$src)>;
9960 let Predicates = [HasVLX] in {
9961 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9962 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9963 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9966 let Predicates = [HasAVX512] in {
9967 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9968 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9969 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9971 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9972 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9973 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9975 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9976 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9977 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9980 multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9981               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9982               X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9983   let ExeDomain = DestInfo.ExeDomain in {
9984   defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9985                     (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9986                     (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9987                   EVEX, Sched<[sched]>;
9989   defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9990                   (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9991                   (DestInfo.VT (LdFrag addr:$src))>,
9992                 EVEX, Sched<[sched.Folded]>;
9993   }
9996 multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
9997           SDNode OpNode, SDNode InVecNode, string ExtTy,
9998           X86SchedWriteWidths sched,
9999           PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10000   let Predicates = [HasVLX, HasBWI] in {
10001     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
10002                     v16i8x_info, i64mem, LdFrag, InVecNode>,
10003                      EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG;
10005     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
10006                     v16i8x_info, i128mem, LdFrag, OpNode>,
10007                      EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG;
10008   }
10009   let Predicates = [HasBWI] in {
10010     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
10011                     v32i8x_info, i256mem, LdFrag, OpNode>,
10012                      EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG;
10013   }
10016 multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
10017           SDNode OpNode, SDNode InVecNode, string ExtTy,
10018           X86SchedWriteWidths sched,
10019           PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10020   let Predicates = [HasVLX, HasAVX512] in {
10021     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10022                    v16i8x_info, i32mem, LdFrag, InVecNode>,
10023                          EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG;
10025     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10026                    v16i8x_info, i64mem, LdFrag, InVecNode>,
10027                          EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG;
10028   }
10029   let Predicates = [HasAVX512] in {
10030     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10031                    v16i8x_info, i128mem, LdFrag, OpNode>,
10032                          EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG;
10033   }
10036 multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
10037                               SDNode InVecNode, string ExtTy,
10038                               X86SchedWriteWidths sched,
10039                               PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10040   let Predicates = [HasVLX, HasAVX512] in {
10041     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10042                    v16i8x_info, i16mem, LdFrag, InVecNode>,
10043                      EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG;
10045     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10046                    v16i8x_info, i32mem, LdFrag, InVecNode>,
10047                      EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG;
10048   }
10049   let Predicates = [HasAVX512] in {
10050     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10051                    v16i8x_info, i64mem, LdFrag, InVecNode>,
10052                      EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG;
10053   }
10056 multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
10057          SDNode OpNode, SDNode InVecNode, string ExtTy,
10058          X86SchedWriteWidths sched,
10059          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10060   let Predicates = [HasVLX, HasAVX512] in {
10061     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10062                    v8i16x_info, i64mem, LdFrag, InVecNode>,
10063                      EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG;
10065     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10066                    v8i16x_info, i128mem, LdFrag, OpNode>,
10067                      EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG;
10068   }
10069   let Predicates = [HasAVX512] in {
10070     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10071                    v16i16x_info, i256mem, LdFrag, OpNode>,
10072                      EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG;
10073   }
10076 multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
10077          SDNode OpNode, SDNode InVecNode, string ExtTy,
10078          X86SchedWriteWidths sched,
10079          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10080   let Predicates = [HasVLX, HasAVX512] in {
10081     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10082                    v8i16x_info, i32mem, LdFrag, InVecNode>,
10083                      EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG;
10085     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10086                    v8i16x_info, i64mem, LdFrag, InVecNode>,
10087                      EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG;
10088   }
10089   let Predicates = [HasAVX512] in {
10090     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10091                    v8i16x_info, i128mem, LdFrag, OpNode>,
10092                      EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG;
10093   }
10096 multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
10097          SDNode OpNode, SDNode InVecNode, string ExtTy,
10098          X86SchedWriteWidths sched,
10099          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10101   let Predicates = [HasVLX, HasAVX512] in {
10102     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10103                    v4i32x_info, i64mem, LdFrag, InVecNode>,
10104                      EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128;
10106     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10107                    v4i32x_info, i128mem, LdFrag, OpNode>,
10108                      EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256;
10109   }
10110   let Predicates = [HasAVX512] in {
10111     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10112                    v8i32x_info, i256mem, LdFrag, OpNode>,
10113                      EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512;
10114   }
10117 defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
10118 defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
10119 defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq",       zext_invec, "z", SchedWriteVecExtend>;
10120 defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
10121 defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
10122 defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
10124 defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
10125 defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
10126 defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq",       sext_invec, "s", SchedWriteVecExtend>;
10127 defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
10128 defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
10129 defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
10132 // Patterns that we also need any extend versions of. aext_vector_inreg
10133 // is currently legalized to zext_vector_inreg.
10134 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10135   // 256-bit patterns
10136   let Predicates = [HasVLX, HasBWI] in {
10137     def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10138               (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10139   }
10141   let Predicates = [HasVLX] in {
10142     def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10143               (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10145     def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10146               (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10147   }
10149   // 512-bit patterns
10150   let Predicates = [HasBWI] in {
10151     def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10152               (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10153   }
10154   let Predicates = [HasAVX512] in {
10155     def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10156               (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10157     def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10158               (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10160     def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10161               (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10163     def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10164               (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10165   }
10168 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10169                                  SDNode InVecOp> :
10170     AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10171   // 128-bit patterns
10172   let Predicates = [HasVLX, HasBWI] in {
10173   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10174             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10175   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10176             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10177   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10178             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10179   }
10180   let Predicates = [HasVLX] in {
10181   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10182             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10183   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10184             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10186   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10187             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10189   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10190             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10191   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10192             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10193   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10194             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10196   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10197             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10198   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10199             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10201   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10202             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10203   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10204             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10205   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10206             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10207   }
10208   let Predicates = [HasVLX] in {
10209   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10210             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10211   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10212             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10213   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10214             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10216   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10217             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10218   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10219             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10221   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10222             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10223   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10224             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10225   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10226             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10227   }
10228   // 512-bit patterns
10229   let Predicates = [HasAVX512] in {
10230   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10231             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10232   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10233             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10234   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10235             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10236   }
10239 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10240 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10242 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10243 // ext+trunc aggressively making it impossible to legalize the DAG to this
10244 // pattern directly.
10245 let Predicates = [HasAVX512, NoBWI] in {
10246 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10247          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10248 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10249          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10252 //===----------------------------------------------------------------------===//
10253 // GATHER - SCATTER Operations
10255 // FIXME: Improve scheduling of gather/scatter instructions.
10256 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10257                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10258   let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10259       ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10260   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10261             (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10262             !strconcat(OpcodeStr#_.Suffix,
10263             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10264             []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10265             Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10268 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10269                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10270   defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10271                                       vy512xmem>, EVEX_V512, REX_W;
10272   defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10273                                       vz512mem>, EVEX_V512, REX_W;
10274 let Predicates = [HasVLX] in {
10275   defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10276                               vx256xmem>, EVEX_V256, REX_W;
10277   defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10278                               vy256xmem>, EVEX_V256, REX_W;
10279   defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10280                               vx128xmem>, EVEX_V128, REX_W;
10281   defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10282                               vx128xmem>, EVEX_V128, REX_W;
10286 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10287                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10288   defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10289                                        EVEX_V512;
10290   defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10291                                        EVEX_V512;
10292 let Predicates = [HasVLX] in {
10293   defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10294                                           vy256xmem>, EVEX_V256;
10295   defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10296                                           vy128xmem>, EVEX_V256;
10297   defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10298                                           vx128xmem>, EVEX_V128;
10299   defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10300                                           vx64xmem, VK2WM>, EVEX_V128;
10305 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10306                avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10308 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10309                 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10311 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10312                           X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10314 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10315     hasSideEffects = 0 in
10317   def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10318             (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10319             !strconcat(OpcodeStr#_.Suffix,
10320             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10321             []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10322             Sched<[WriteStore]>;
10325 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10326                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10327   defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10328                                       vy512xmem>, EVEX_V512, REX_W;
10329   defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10330                                       vz512mem>, EVEX_V512, REX_W;
10331 let Predicates = [HasVLX] in {
10332   defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10333                               vx256xmem>, EVEX_V256, REX_W;
10334   defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10335                               vy256xmem>, EVEX_V256, REX_W;
10336   defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10337                               vx128xmem>, EVEX_V128, REX_W;
10338   defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10339                               vx128xmem>, EVEX_V128, REX_W;
10343 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10344                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10345   defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10346                                        EVEX_V512;
10347   defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10348                                        EVEX_V512;
10349 let Predicates = [HasVLX] in {
10350   defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10351                                           vy256xmem>, EVEX_V256;
10352   defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10353                                           vy128xmem>, EVEX_V256;
10354   defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10355                                           vx128xmem>, EVEX_V128;
10356   defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10357                                           vx64xmem, VK2WM>, EVEX_V128;
10361 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10362                avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10364 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10365                 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10367 // prefetch
10368 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10369                        RegisterClass KRC, X86MemOperand memop> {
10370   let mayLoad = 1, mayStore = 1 in
10371   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10372             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10373             EVEX, EVEX_K, Sched<[WriteLoad]>;
10376 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10377                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10379 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10380                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10382 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10383                      VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10385 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10386                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10388 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10389                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10391 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10392                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10394 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10395                      VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10397 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10398                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10400 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10401                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10403 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10404                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10406 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10407                      VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10409 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10410                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10412 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10413                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10415 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10416                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10418 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10419                      VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10421 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10422                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10424 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10425 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10426                   !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10427                   [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10428                   EVEX, Sched<[Sched]>;
10431 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10432                                  string OpcodeStr, Predicate prd> {
10433 let Predicates = [prd] in
10434   defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10436   let Predicates = [prd, HasVLX] in {
10437     defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10438     defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10439   }
10442 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10443 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W;
10444 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10445 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
10447 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10448     def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10449                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10450                         [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10451                         EVEX, Sched<[WriteMove]>;
10454 // Use 512bit version to implement 128/256 bit in case NoVLX.
10455 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10456                                            X86VectorVTInfo _,
10457                                            string Name> {
10459   def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10460             (_.KVT (COPY_TO_REGCLASS
10461                      (!cast<Instruction>(Name#"Zrr")
10462                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10463                                       _.RC:$src, _.SubRegIdx)),
10464                    _.KRC))>;
10467 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10468                                    AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10469   let Predicates = [prd] in
10470     defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10471                                             EVEX_V512;
10473   let Predicates = [prd, HasVLX] in {
10474     defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10475                                               EVEX_V256;
10476     defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10477                                                EVEX_V128;
10478   }
10479   let Predicates = [prd, NoVLX, HasEVEX512] in {
10480     defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10481     defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10482   }
10485 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10486                                               avx512vl_i8_info, HasBWI>;
10487 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10488                                               avx512vl_i16_info, HasBWI>, REX_W;
10489 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10490                                               avx512vl_i32_info, HasDQI>;
10491 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10492                                               avx512vl_i64_info, HasDQI>, REX_W;
10494 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10495 // is available, but BWI is not. We can't handle this in lowering because
10496 // a target independent DAG combine likes to combine sext and trunc.
10497 let Predicates = [HasDQI, NoBWI] in {
10498   def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10499             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10500   def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10501             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10504 let Predicates = [HasDQI, NoBWI, HasVLX] in {
10505   def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10506             (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10509 //===----------------------------------------------------------------------===//
10510 // AVX-512 - COMPRESS and EXPAND
10513 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10514                                  string OpcodeStr, X86FoldableSchedWrite sched> {
10515   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10516               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10517               (null_frag)>, AVX5128IBase,
10518               Sched<[sched]>;
10520   let mayStore = 1, hasSideEffects = 0 in
10521   def mr : AVX5128I<opc, MRMDestMem, (outs),
10522               (ins _.MemOp:$dst, _.RC:$src),
10523               OpcodeStr # "\t{$src, $dst|$dst, $src}",
10524               []>, EVEX_CD8<_.EltSize, CD8VT1>,
10525               Sched<[sched.Folded]>;
10527   def mrk : AVX5128I<opc, MRMDestMem, (outs),
10528               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10529               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10530               []>,
10531               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10532               Sched<[sched.Folded]>;
10535 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10536   def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10537             (!cast<Instruction>(Name#_.ZSuffix#mrk)
10538                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10540   def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10541             (!cast<Instruction>(Name#_.ZSuffix#rrk)
10542                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10543   def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10544             (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10545                             _.KRCWM:$mask, _.RC:$src)>;
10548 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10549                                  X86FoldableSchedWrite sched,
10550                                  AVX512VLVectorVTInfo VTInfo,
10551                                  Predicate Pred = HasAVX512> {
10552   let Predicates = [Pred] in
10553   defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10554            compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10556   let Predicates = [Pred, HasVLX] in {
10557     defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10558                 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10559     defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10560                 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10561   }
10564 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10565 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10566                                           avx512vl_i32_info>, EVEX;
10567 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10568                                           avx512vl_i64_info>, EVEX, REX_W;
10569 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10570                                           avx512vl_f32_info>, EVEX;
10571 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10572                                           avx512vl_f64_info>, EVEX, REX_W;
10574 // expand
10575 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10576                                  string OpcodeStr, X86FoldableSchedWrite sched> {
10577   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10578               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10579               (null_frag)>, AVX5128IBase,
10580               Sched<[sched]>;
10582   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10583               (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10584               (null_frag)>,
10585             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10586             Sched<[sched.Folded, sched.ReadAfterFold]>;
10589 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10591   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10592             (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10593                                         _.KRCWM:$mask, addr:$src)>;
10595   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10596             (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10597                                         _.KRCWM:$mask, addr:$src)>;
10599   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10600                                                (_.VT _.RC:$src0))),
10601             (!cast<Instruction>(Name#_.ZSuffix#rmk)
10602                             _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10604   def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10605             (!cast<Instruction>(Name#_.ZSuffix#rrk)
10606                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10607   def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10608             (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10609                             _.KRCWM:$mask, _.RC:$src)>;
10612 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10613                                X86FoldableSchedWrite sched,
10614                                AVX512VLVectorVTInfo VTInfo,
10615                                Predicate Pred = HasAVX512> {
10616   let Predicates = [Pred] in
10617   defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10618            expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10620   let Predicates = [Pred, HasVLX] in {
10621     defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10622                 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10623     defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10624                 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10625   }
10628 // FIXME: Is there a better scheduler class for VPEXPAND?
10629 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10630                                       avx512vl_i32_info>, EVEX;
10631 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10632                                       avx512vl_i64_info>, EVEX, REX_W;
10633 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10634                                       avx512vl_f32_info>, EVEX;
10635 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10636                                       avx512vl_f64_info>, EVEX, REX_W;
10638 //handle instruction  reg_vec1 = op(reg_vec,imm)
10639 //                               op(mem_vec,imm)
10640 //                               op(broadcast(eltVt),imm)
10641 //all instruction created with FROUND_CURRENT
10642 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10643                                       SDPatternOperator OpNode,
10644                                       SDPatternOperator MaskOpNode,
10645                                       X86FoldableSchedWrite sched,
10646                                       X86VectorVTInfo _> {
10647   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10648   defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10649                       (ins _.RC:$src1, i32u8imm:$src2),
10650                       OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10651                       (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10652                       (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10653                       Sched<[sched]>;
10654   defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10655                     (ins _.MemOp:$src1, i32u8imm:$src2),
10656                     OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10657                     (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10658                             (i32 timm:$src2)),
10659                     (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10660                                 (i32 timm:$src2))>,
10661                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10662   defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10663                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10664                     OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10665                     "${src1}"#_.BroadcastStr#", $src2",
10666                     (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10667                             (i32 timm:$src2)),
10668                     (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10669                                 (i32 timm:$src2))>, EVEX_B,
10670                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10671   }
10674 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10675 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10676                                           SDNode OpNode, X86FoldableSchedWrite sched,
10677                                           X86VectorVTInfo _> {
10678   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10679   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10680                       (ins _.RC:$src1, i32u8imm:$src2),
10681                       OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10682                       "$src1, {sae}, $src2",
10683                       (OpNode (_.VT _.RC:$src1),
10684                               (i32 timm:$src2))>,
10685                       EVEX_B, Sched<[sched]>;
10688 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10689             AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10690             SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10691             Predicate prd>{
10692   let Predicates = [prd] in {
10693     defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10694                                            sched.ZMM, _.info512>,
10695                 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10696                                                sched.ZMM, _.info512>, EVEX_V512;
10697   }
10698   let Predicates = [prd, HasVLX] in {
10699     defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10700                                            sched.XMM, _.info128>, EVEX_V128;
10701     defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10702                                            sched.YMM, _.info256>, EVEX_V256;
10703   }
10706 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10707 //                               op(reg_vec2,mem_vec,imm)
10708 //                               op(reg_vec2,broadcast(eltVt),imm)
10709 //all instruction created with FROUND_CURRENT
10710 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10711                                 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10712   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10713   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10714                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10715                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10716                       (OpNode (_.VT _.RC:$src1),
10717                               (_.VT _.RC:$src2),
10718                               (i32 timm:$src3))>,
10719                       Sched<[sched]>;
10720   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10721                     (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10722                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10723                     (OpNode (_.VT _.RC:$src1),
10724                             (_.VT (bitconvert (_.LdFrag addr:$src2))),
10725                             (i32 timm:$src3))>,
10726                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10727   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10728                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10729                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10730                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
10731                     (OpNode (_.VT _.RC:$src1),
10732                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10733                             (i32 timm:$src3))>, EVEX_B,
10734                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10735   }
10738 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10739 //                               op(reg_vec2,mem_vec,imm)
10740 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10741                               X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10742                               X86VectorVTInfo SrcInfo>{
10743   let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in {
10744   defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10745                   (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10746                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10747                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10748                                (SrcInfo.VT SrcInfo.RC:$src2),
10749                                (i8 timm:$src3)))>,
10750                   Sched<[sched]>;
10751   defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10752                 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10753                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10754                 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10755                              (SrcInfo.VT (bitconvert
10756                                                 (SrcInfo.LdFrag addr:$src2))),
10757                              (i8 timm:$src3)))>,
10758                 Sched<[sched.Folded, sched.ReadAfterFold]>;
10759   }
10762 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10763 //                               op(reg_vec2,mem_vec,imm)
10764 //                               op(reg_vec2,broadcast(eltVt),imm)
10765 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10766                            X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10767   avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10769   let ExeDomain = _.ExeDomain, ImmT = Imm8 in
10770   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10771                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10772                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10773                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
10774                     (OpNode (_.VT _.RC:$src1),
10775                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10776                             (i8 timm:$src3))>, EVEX_B,
10777                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10780 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10781 //                                      op(reg_vec2,mem_scalar,imm)
10782 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10783                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10784   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10785   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10786                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10787                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10788                       (OpNode (_.VT _.RC:$src1),
10789                               (_.VT _.RC:$src2),
10790                               (i32 timm:$src3))>,
10791                       Sched<[sched]>;
10792   defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10793                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10794                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10795                     (OpNode (_.VT _.RC:$src1),
10796                             (_.ScalarIntMemFrags addr:$src2),
10797                             (i32 timm:$src3))>,
10798                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10799   }
10802 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10803 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10804                                     SDNode OpNode, X86FoldableSchedWrite sched,
10805                                     X86VectorVTInfo _> {
10806   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10807   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10808                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10809                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10810                       "$src1, $src2, {sae}, $src3",
10811                       (OpNode (_.VT _.RC:$src1),
10812                               (_.VT _.RC:$src2),
10813                               (i32 timm:$src3))>,
10814                       EVEX_B, Sched<[sched]>;
10817 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10818 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10819                                     X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10820   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10821   defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10822                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10823                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10824                       "$src1, $src2, {sae}, $src3",
10825                       (OpNode (_.VT _.RC:$src1),
10826                               (_.VT _.RC:$src2),
10827                               (i32 timm:$src3))>,
10828                       EVEX_B, Sched<[sched]>;
10831 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10832             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10833             SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10834   let Predicates = [prd] in {
10835     defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10836                 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10837                                   EVEX_V512;
10839   }
10840   let Predicates = [prd, HasVLX] in {
10841     defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10842                                   EVEX_V128;
10843     defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10844                                   EVEX_V256;
10845   }
10848 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10849                    X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10850                    AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10851   let Predicates = [Pred] in {
10852     defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10853                            SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV;
10854   }
10855   let Predicates = [Pred, HasVLX] in {
10856     defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10857                            SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV;
10858     defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10859                            SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV;
10860   }
10863 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10864                                   bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10865                                   Predicate Pred = HasAVX512> {
10866   let Predicates = [Pred] in {
10867     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10868                                 EVEX_V512;
10869   }
10870   let Predicates = [Pred, HasVLX] in {
10871     defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10872                                 EVEX_V128;
10873     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10874                                 EVEX_V256;
10875   }
10878 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10879                   X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10880                   SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10881   let Predicates = [prd] in {
10882      defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10883               avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10884   }
10887 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10888                     bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10889                     SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
10890                     X86SchedWriteWidths sched, Predicate prd>{
10891   defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
10892                             opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
10893                             AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
10894   defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10895                             opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10896                             AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
10897   defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10898                             opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10899                             AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W;
10902 defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10903                               X86VReduce, X86VReduce, X86VReduceSAE,
10904                               SchedWriteFRnd, HasDQI>;
10905 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10906                               X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10907                               SchedWriteFRnd, HasAVX512>;
10908 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10909                               X86VGetMant, X86VGetMant, X86VGetMantSAE,
10910                               SchedWriteFRnd, HasAVX512>;
10912 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10913                                                 0x50, X86VRange, X86VRangeSAE,
10914                                                 SchedWriteFAdd, HasDQI>,
10915       AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10916 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10917                                                 0x50, X86VRange, X86VRangeSAE,
10918                                                 SchedWriteFAdd, HasDQI>,
10919       AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10921 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10922       f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10923       AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10924 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10925       0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10926       AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10928 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10929       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10930       AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10931 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10932       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10933       AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10934 defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
10935       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
10936       AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10938 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10939       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10940       AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10941 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10942       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10943       AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10944 defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
10945       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
10946       AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10948 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10949                                           X86FoldableSchedWrite sched,
10950                                           X86VectorVTInfo _,
10951                                           X86VectorVTInfo CastInfo> {
10952   let ExeDomain = _.ExeDomain in {
10953   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10954                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10955                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10956                   (_.VT (bitconvert
10957                          (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10958                                                   (i8 timm:$src3)))))>,
10959                   Sched<[sched]>;
10960   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10961                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10962                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10963                 (_.VT
10964                  (bitconvert
10965                   (CastInfo.VT (X86Shuf128 _.RC:$src1,
10966                                            (CastInfo.LdFrag addr:$src2),
10967                                            (i8 timm:$src3)))))>,
10968                 Sched<[sched.Folded, sched.ReadAfterFold]>;
10969   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10970                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10971                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10972                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
10973                     (_.VT
10974                      (bitconvert
10975                       (CastInfo.VT
10976                        (X86Shuf128 _.RC:$src1,
10977                                    (_.BroadcastLdFrag addr:$src2),
10978                                    (i8 timm:$src3)))))>, EVEX_B,
10979                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10980   }
10983 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10984                                    AVX512VLVectorVTInfo _,
10985                                    AVX512VLVectorVTInfo CastInfo, bits<8> opc>{
10986   let Predicates = [HasAVX512] in
10987   defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10988                                           _.info512, CastInfo.info512>, EVEX_V512;
10990   let Predicates = [HasAVX512, HasVLX] in
10991   defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10992                                              _.info256, CastInfo.info256>, EVEX_V256;
10995 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10996       avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10997 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10998       avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10999 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11000       avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
11001 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11002       avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
11004 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11005                          X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11006   let ExeDomain = _.ExeDomain in {
11007   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11008                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11009                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11010                   (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
11011                   Sched<[sched]>;
11012   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11013                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11014                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11015                 (_.VT (X86VAlign _.RC:$src1,
11016                                  (bitconvert (_.LdFrag addr:$src2)),
11017                                  (i8 timm:$src3)))>,
11018                 Sched<[sched.Folded, sched.ReadAfterFold]>;
11020   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11021                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11022                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11023                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
11024                    (X86VAlign _.RC:$src1,
11025                               (_.VT (_.BroadcastLdFrag addr:$src2)),
11026                               (i8 timm:$src3))>, EVEX_B,
11027                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11028   }
11031 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11032                                 AVX512VLVectorVTInfo _> {
11033   let Predicates = [HasAVX512] in {
11034     defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11035                                 AVX512AIi8Base, EVEX, VVVV, EVEX_V512;
11036   }
11037   let Predicates = [HasAVX512, HasVLX] in {
11038     defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11039                                 AVX512AIi8Base, EVEX, VVVV, EVEX_V128;
11040     // We can't really override the 256-bit version so change it back to unset.
11041     defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11042                                 AVX512AIi8Base, EVEX, VVVV, EVEX_V256;
11043   }
11046 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11047                                    avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11048 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11049                                    avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11050                                    REX_W;
11052 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11053                                          SchedWriteShuffle, avx512vl_i8_info,
11054                                          avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11056 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
11057 // into vpalignr.
11058 def ValignqImm32XForm : SDNodeXForm<timm, [{
11059   return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11060 }]>;
11061 def ValignqImm8XForm : SDNodeXForm<timm, [{
11062   return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11063 }]>;
11064 def ValigndImm8XForm : SDNodeXForm<timm, [{
11065   return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11066 }]>;
11068 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11069                                         X86VectorVTInfo From, X86VectorVTInfo To,
11070                                         SDNodeXForm ImmXForm> {
11071   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11072                                  (bitconvert
11073                                   (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11074                                                    timm:$src3))),
11075                                  To.RC:$src0)),
11076             (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11077                                                   To.RC:$src1, To.RC:$src2,
11078                                                   (ImmXForm timm:$src3))>;
11080   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11081                                  (bitconvert
11082                                   (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11083                                                    timm:$src3))),
11084                                  To.ImmAllZerosV)),
11085             (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11086                                                    To.RC:$src1, To.RC:$src2,
11087                                                    (ImmXForm timm:$src3))>;
11089   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11090                                  (bitconvert
11091                                   (From.VT (OpNode From.RC:$src1,
11092                                                    (From.LdFrag addr:$src2),
11093                                            timm:$src3))),
11094                                  To.RC:$src0)),
11095             (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11096                                                   To.RC:$src1, addr:$src2,
11097                                                   (ImmXForm timm:$src3))>;
11099   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11100                                  (bitconvert
11101                                   (From.VT (OpNode From.RC:$src1,
11102                                                    (From.LdFrag addr:$src2),
11103                                            timm:$src3))),
11104                                  To.ImmAllZerosV)),
11105             (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11106                                                    To.RC:$src1, addr:$src2,
11107                                                    (ImmXForm timm:$src3))>;
11110 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11111                                            X86VectorVTInfo From,
11112                                            X86VectorVTInfo To,
11113                                            SDNodeXForm ImmXForm> :
11114       avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11115   def : Pat<(From.VT (OpNode From.RC:$src1,
11116                              (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11117                              timm:$src3)),
11118             (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11119                                                   (ImmXForm timm:$src3))>;
11121   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11122                                  (bitconvert
11123                                   (From.VT (OpNode From.RC:$src1,
11124                                            (bitconvert
11125                                             (To.VT (To.BroadcastLdFrag addr:$src2))),
11126                                            timm:$src3))),
11127                                  To.RC:$src0)),
11128             (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11129                                                    To.RC:$src1, addr:$src2,
11130                                                    (ImmXForm timm:$src3))>;
11132   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11133                                  (bitconvert
11134                                   (From.VT (OpNode From.RC:$src1,
11135                                            (bitconvert
11136                                             (To.VT (To.BroadcastLdFrag addr:$src2))),
11137                                            timm:$src3))),
11138                                  To.ImmAllZerosV)),
11139             (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11140                                                     To.RC:$src1, addr:$src2,
11141                                                     (ImmXForm timm:$src3))>;
11144 let Predicates = [HasAVX512] in {
11145   // For 512-bit we lower to the widest element type we can. So we only need
11146   // to handle converting valignq to valignd.
11147   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11148                                          v16i32_info, ValignqImm32XForm>;
11151 let Predicates = [HasVLX] in {
11152   // For 128-bit we lower to the widest element type we can. So we only need
11153   // to handle converting valignq to valignd.
11154   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11155                                          v4i32x_info, ValignqImm32XForm>;
11156   // For 256-bit we lower to the widest element type we can. So we only need
11157   // to handle converting valignq to valignd.
11158   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11159                                          v8i32x_info, ValignqImm32XForm>;
11162 let Predicates = [HasVLX, HasBWI] in {
11163   // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11164   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11165                                       v16i8x_info, ValignqImm8XForm>;
11166   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11167                                       v16i8x_info, ValigndImm8XForm>;
11170 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11171                 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11172                 EVEX_CD8<8, CD8VF>;
11174 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11175                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11176   let ExeDomain = _.ExeDomain in {
11177   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11178                     (ins _.RC:$src1), OpcodeStr,
11179                     "$src1", "$src1",
11180                     (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11181                     Sched<[sched]>;
11183   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11184                   (ins _.MemOp:$src1), OpcodeStr,
11185                   "$src1", "$src1",
11186                   (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11187             EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11188             Sched<[sched.Folded]>;
11189   }
11192 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11193                             X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11194            avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11195   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11196                   (ins _.ScalarMemOp:$src1), OpcodeStr,
11197                   "${src1}"#_.BroadcastStr,
11198                   "${src1}"#_.BroadcastStr,
11199                   (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11200              EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11201              Sched<[sched.Folded]>;
11204 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11205                               X86SchedWriteWidths sched,
11206                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11207   let Predicates = [prd] in
11208     defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11209                              EVEX_V512;
11211   let Predicates = [prd, HasVLX] in {
11212     defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11213                               EVEX_V256;
11214     defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11215                               EVEX_V128;
11216   }
11219 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11220                                X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11221                                Predicate prd> {
11222   let Predicates = [prd] in
11223     defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11224                               EVEX_V512;
11226   let Predicates = [prd, HasVLX] in {
11227     defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11228                                  EVEX_V256;
11229     defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11230                                  EVEX_V128;
11231   }
11234 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11235                                  SDNode OpNode, X86SchedWriteWidths sched,
11236                                  Predicate prd> {
11237   defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11238                                avx512vl_i64_info, prd>, REX_W;
11239   defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11240                                avx512vl_i32_info, prd>;
11243 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11244                                  SDNode OpNode, X86SchedWriteWidths sched,
11245                                  Predicate prd> {
11246   defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11247                               avx512vl_i16_info, prd>, WIG;
11248   defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11249                               avx512vl_i8_info, prd>, WIG;
11252 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11253                                   bits<8> opc_d, bits<8> opc_q,
11254                                   string OpcodeStr, SDNode OpNode,
11255                                   X86SchedWriteWidths sched> {
11256   defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11257                                     HasAVX512>,
11258               avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11259                                     HasBWI>;
11262 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11263                                     SchedWriteVecALU>;
11265 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11266 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
11267   def : Pat<(v4i64 (abs VR256X:$src)),
11268             (EXTRACT_SUBREG
11269                 (VPABSQZrr
11270                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11271              sub_ymm)>;
11272   def : Pat<(v2i64 (abs VR128X:$src)),
11273             (EXTRACT_SUBREG
11274                 (VPABSQZrr
11275                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11276              sub_xmm)>;
11279 // Use 512bit version to implement 128/256 bit.
11280 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11281                                  AVX512VLVectorVTInfo _, Predicate prd> {
11282   let Predicates = [prd, NoVLX, HasEVEX512] in {
11283     def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11284               (EXTRACT_SUBREG
11285                 (!cast<Instruction>(InstrStr # "Zrr")
11286                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11287                                  _.info256.RC:$src1,
11288                                  _.info256.SubRegIdx)),
11289               _.info256.SubRegIdx)>;
11291     def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11292               (EXTRACT_SUBREG
11293                 (!cast<Instruction>(InstrStr # "Zrr")
11294                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11295                                  _.info128.RC:$src1,
11296                                  _.info128.SubRegIdx)),
11297               _.info128.SubRegIdx)>;
11298   }
11301 defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11302                                         SchedWriteVecIMul, HasCDI>;
11304 // FIXME: Is there a better scheduler class for VPCONFLICT?
11305 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11306                                         SchedWriteVecALU, HasCDI>;
11308 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11309 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11310 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11312 //===---------------------------------------------------------------------===//
11313 // Counts number of ones - VPOPCNTD and VPOPCNTQ
11314 //===---------------------------------------------------------------------===//
11316 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11317 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11318                                      SchedWriteVecALU, HasVPOPCNTDQ>;
11320 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11321 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11323 //===---------------------------------------------------------------------===//
11324 // Replicate Single FP - MOVSHDUP and MOVSLDUP
11325 //===---------------------------------------------------------------------===//
11327 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11328                             X86SchedWriteWidths sched> {
11329   defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11330                                       avx512vl_f32_info, HasAVX512>, TB, XS;
11333 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11334                                   SchedWriteFShuffle>;
11335 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11336                                   SchedWriteFShuffle>;
11338 //===----------------------------------------------------------------------===//
11339 // AVX-512 - MOVDDUP
11340 //===----------------------------------------------------------------------===//
11342 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11343                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11344   let ExeDomain = _.ExeDomain in {
11345   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11346                    (ins _.RC:$src), OpcodeStr, "$src", "$src",
11347                    (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11348                    Sched<[sched]>;
11349   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11350                  (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11351                  (_.VT (_.BroadcastLdFrag addr:$src))>,
11352                  EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11353                  Sched<[sched.Folded]>;
11354   }
11357 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11358                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11359   defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11360                            VTInfo.info512>, EVEX_V512;
11362   let Predicates = [HasAVX512, HasVLX] in {
11363     defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11364                                 VTInfo.info256>, EVEX_V256;
11365     defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11366                                    VTInfo.info128>, EVEX_V128;
11367   }
11370 multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11371                           X86SchedWriteWidths sched> {
11372   defm NAME:      avx512_movddup_common<opc, OpcodeStr, sched,
11373                                         avx512vl_f64_info>, TB, XD, REX_W;
11376 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11378 let Predicates = [HasVLX] in {
11379 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11380           (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11382 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11383                         (v2f64 VR128X:$src0)),
11384           (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11385                            (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11386 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11387                         immAllZerosV),
11388           (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11391 //===----------------------------------------------------------------------===//
11392 // AVX-512 - Unpack Instructions
11393 //===----------------------------------------------------------------------===//
11395 let Uses = []<Register>, mayRaiseFPException = 0 in {
11396 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11397                                  SchedWriteFShuffleSizes, 0, 1>;
11398 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11399                                  SchedWriteFShuffleSizes>;
11402 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11403                                        SchedWriteShuffle, HasBWI>;
11404 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11405                                        SchedWriteShuffle, HasBWI>;
11406 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11407                                        SchedWriteShuffle, HasBWI>;
11408 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11409                                        SchedWriteShuffle, HasBWI>;
11411 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11412                                        SchedWriteShuffle, HasAVX512>;
11413 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11414                                        SchedWriteShuffle, HasAVX512>;
11415 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11416                                         SchedWriteShuffle, HasAVX512>;
11417 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11418                                         SchedWriteShuffle, HasAVX512>;
11420 //===----------------------------------------------------------------------===//
11421 // AVX-512 - Extract & Insert Integer Instructions
11422 //===----------------------------------------------------------------------===//
11424 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11425                                                             X86VectorVTInfo _> {
11426   def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11427               (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11428               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11429               [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11430                        addr:$dst)]>,
11431               EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11434 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11435   let Predicates = [HasBWI] in {
11436     def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11437                   (ins _.RC:$src1, u8imm:$src2),
11438                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11439                   [(set GR32orGR64:$dst,
11440                         (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11441                   EVEX, TA, PD, Sched<[WriteVecExtract]>;
11443     defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD;
11444   }
11447 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11448   let Predicates = [HasBWI] in {
11449     def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11450                   (ins _.RC:$src1, u8imm:$src2),
11451                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11452                   [(set GR32orGR64:$dst,
11453                         (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11454                   EVEX, TB, PD, Sched<[WriteVecExtract]>;
11456     let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11457     def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11458                    (ins _.RC:$src1, u8imm:$src2),
11459                    OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11460                    EVEX, TA, PD, Sched<[WriteVecExtract]>;
11462     defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD;
11463   }
11466 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11467                                                             RegisterClass GRC> {
11468   let Predicates = [HasDQI] in {
11469     def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11470                   (ins _.RC:$src1, u8imm:$src2),
11471                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11472                   [(set GRC:$dst,
11473                       (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11474                   EVEX, TA, PD, Sched<[WriteVecExtract]>;
11476     def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11477                 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11478                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11479                 [(store (extractelt (_.VT _.RC:$src1),
11480                                     imm:$src2),addr:$dst)]>,
11481                 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD,
11482                 Sched<[WriteVecExtractSt]>;
11483   }
11486 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG;
11487 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG;
11488 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11489 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W;
11491 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11492                                             X86VectorVTInfo _, PatFrag LdFrag,
11493                                             SDPatternOperator immoperator> {
11494   def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11495       (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11496       OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11497       [(set _.RC:$dst,
11498           (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11499       EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11502 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11503                                             X86VectorVTInfo _, PatFrag LdFrag> {
11504   let Predicates = [HasBWI] in {
11505     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11506         (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11507         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11508         [(set _.RC:$dst,
11509             (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV,
11510         Sched<[WriteVecInsert]>;
11512     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11513   }
11516 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11517                                          X86VectorVTInfo _, RegisterClass GRC> {
11518   let Predicates = [HasDQI] in {
11519     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11520         (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11521         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11522         [(set _.RC:$dst,
11523             (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11524         EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>;
11526     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11527                                     _.ScalarLdFrag, imm>, TA, PD;
11528   }
11531 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11532                                      extloadi8>, TA, PD, WIG;
11533 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11534                                      extloadi16>, TB, PD, WIG;
11535 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11536 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W;
11538 let Predicates = [HasAVX512, NoBWI] in {
11539   def : Pat<(X86pinsrb VR128:$src1,
11540                        (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11541                        timm:$src3),
11542             (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11543                        timm:$src3)>;
11546 let Predicates = [HasBWI] in {
11547   def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
11548             (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
11549                         GR8:$src2, sub_8bit), timm:$src3)>;
11550   def : Pat<(X86pinsrb VR128:$src1,
11551                        (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11552                        timm:$src3),
11553             (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11554                         timm:$src3)>;
11557 // Always select FP16 instructions if available.
11558 let Predicates = [HasBWI], AddedComplexity = -10 in {
11559   def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
11560   def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
11561   def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
11562   def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
11565 //===----------------------------------------------------------------------===//
11566 // VSHUFPS - VSHUFPD Operations
11567 //===----------------------------------------------------------------------===//
11569 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11570   defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11571                                     SchedWriteFShuffle>,
11572                                     EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11573                                     TA, EVEX, VVVV;
11576 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB;
11577 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W;
11579 //===----------------------------------------------------------------------===//
11580 // AVX-512 - Byte shift Left/Right
11581 //===----------------------------------------------------------------------===//
11583 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11584                                Format MRMm, string OpcodeStr,
11585                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11586   def ri : AVX512<opc, MRMr,
11587              (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11588              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11589              [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11590              Sched<[sched]>;
11591   def mi : AVX512<opc, MRMm,
11592            (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11593            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11594            [(set _.RC:$dst,(_.VT (OpNode
11595                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
11596                                  (i8 timm:$src2))))]>,
11597            Sched<[sched.Folded, sched.ReadAfterFold]>;
11600 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11601                                    Format MRMm, string OpcodeStr,
11602                                    X86SchedWriteWidths sched, Predicate prd>{
11603   let Predicates = [prd] in
11604     defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11605                                  sched.ZMM, v64i8_info>, EVEX_V512;
11606   let Predicates = [prd, HasVLX] in {
11607     defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11608                                     sched.YMM, v32i8x_info>, EVEX_V256;
11609     defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11610                                     sched.XMM, v16i8x_info>, EVEX_V128;
11611   }
11613 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11614                                        SchedWriteShuffle, HasBWI>,
11615                                        AVX512PDIi8Base, EVEX, VVVV, WIG;
11616 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11617                                        SchedWriteShuffle, HasBWI>,
11618                                        AVX512PDIi8Base, EVEX, VVVV, WIG;
11620 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11621                                 string OpcodeStr, X86FoldableSchedWrite sched,
11622                                 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11623   let isCommutable = 1 in
11624   def rr : AVX512BI<opc, MRMSrcReg,
11625              (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11626              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11627              [(set _dst.RC:$dst,(_dst.VT
11628                                 (OpNode (_src.VT _src.RC:$src1),
11629                                         (_src.VT _src.RC:$src2))))]>,
11630              Sched<[sched]>;
11631   def rm : AVX512BI<opc, MRMSrcMem,
11632            (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11633            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11634            [(set _dst.RC:$dst,(_dst.VT
11635                               (OpNode (_src.VT _src.RC:$src1),
11636                               (_src.VT (bitconvert
11637                                         (_src.LdFrag addr:$src2))))))]>,
11638            Sched<[sched.Folded, sched.ReadAfterFold]>;
11641 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11642                                     string OpcodeStr, X86SchedWriteWidths sched,
11643                                     Predicate prd> {
11644   let Predicates = [prd] in
11645     defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11646                                   v8i64_info, v64i8_info>, EVEX_V512;
11647   let Predicates = [prd, HasVLX] in {
11648     defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11649                                      v4i64x_info, v32i8x_info>, EVEX_V256;
11650     defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11651                                      v2i64x_info, v16i8x_info>, EVEX_V128;
11652   }
11655 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11656                                         SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG;
11658 // Transforms to swizzle an immediate to enable better matching when
11659 // memory operand isn't in the right place.
11660 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11661   // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11662   uint8_t Imm = N->getZExtValue();
11663   // Swap bits 1/4 and 3/6.
11664   uint8_t NewImm = Imm & 0xa5;
11665   if (Imm & 0x02) NewImm |= 0x10;
11666   if (Imm & 0x10) NewImm |= 0x02;
11667   if (Imm & 0x08) NewImm |= 0x40;
11668   if (Imm & 0x40) NewImm |= 0x08;
11669   return getI8Imm(NewImm, SDLoc(N));
11670 }]>;
11671 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11672   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11673   uint8_t Imm = N->getZExtValue();
11674   // Swap bits 2/4 and 3/5.
11675   uint8_t NewImm = Imm & 0xc3;
11676   if (Imm & 0x04) NewImm |= 0x10;
11677   if (Imm & 0x10) NewImm |= 0x04;
11678   if (Imm & 0x08) NewImm |= 0x20;
11679   if (Imm & 0x20) NewImm |= 0x08;
11680   return getI8Imm(NewImm, SDLoc(N));
11681 }]>;
11682 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11683   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11684   uint8_t Imm = N->getZExtValue();
11685   // Swap bits 1/2 and 5/6.
11686   uint8_t NewImm = Imm & 0x99;
11687   if (Imm & 0x02) NewImm |= 0x04;
11688   if (Imm & 0x04) NewImm |= 0x02;
11689   if (Imm & 0x20) NewImm |= 0x40;
11690   if (Imm & 0x40) NewImm |= 0x20;
11691   return getI8Imm(NewImm, SDLoc(N));
11692 }]>;
11693 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11694   // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11695   uint8_t Imm = N->getZExtValue();
11696   // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11697   uint8_t NewImm = Imm & 0x81;
11698   if (Imm & 0x02) NewImm |= 0x04;
11699   if (Imm & 0x04) NewImm |= 0x10;
11700   if (Imm & 0x08) NewImm |= 0x40;
11701   if (Imm & 0x10) NewImm |= 0x02;
11702   if (Imm & 0x20) NewImm |= 0x08;
11703   if (Imm & 0x40) NewImm |= 0x20;
11704   return getI8Imm(NewImm, SDLoc(N));
11705 }]>;
11706 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11707   // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11708   uint8_t Imm = N->getZExtValue();
11709   // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11710   uint8_t NewImm = Imm & 0x81;
11711   if (Imm & 0x02) NewImm |= 0x10;
11712   if (Imm & 0x04) NewImm |= 0x02;
11713   if (Imm & 0x08) NewImm |= 0x20;
11714   if (Imm & 0x10) NewImm |= 0x04;
11715   if (Imm & 0x20) NewImm |= 0x40;
11716   if (Imm & 0x40) NewImm |= 0x08;
11717   return getI8Imm(NewImm, SDLoc(N));
11718 }]>;
11720 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11721                           X86FoldableSchedWrite sched, X86VectorVTInfo _,
11722                           string Name>{
11723   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11724   defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11725                       (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11726                       OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11727                       (OpNode (_.VT _.RC:$src1),
11728                               (_.VT _.RC:$src2),
11729                               (_.VT _.RC:$src3),
11730                               (i8 timm:$src4)), 1, 1>,
11731                       AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
11732   defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11733                     (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11734                     OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11735                     (OpNode (_.VT _.RC:$src1),
11736                             (_.VT _.RC:$src2),
11737                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
11738                             (i8 timm:$src4)), 1, 0>,
11739                     AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11740                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11741   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11742                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11743                     OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11744                     "$src2, ${src3}"#_.BroadcastStr#", $src4",
11745                     (OpNode (_.VT _.RC:$src1),
11746                             (_.VT _.RC:$src2),
11747                             (_.VT (_.BroadcastLdFrag addr:$src3)),
11748                             (i8 timm:$src4)), 1, 0>, EVEX_B,
11749                     AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11750                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11751   }// Constraints = "$src1 = $dst"
11753   // Additional patterns for matching passthru operand in other positions.
11754   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11755                    (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11756                    _.RC:$src1)),
11757             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11758              _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11759   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11760                    (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11761                    _.RC:$src1)),
11762             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11763              _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11765   // Additional patterns for matching zero masking with loads in other
11766   // positions.
11767   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11768                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11769                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11770                    _.ImmAllZerosV)),
11771             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11772              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11773   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11774                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11775                     _.RC:$src2, (i8 timm:$src4)),
11776                    _.ImmAllZerosV)),
11777             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11778              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11780   // Additional patterns for matching masked loads with different
11781   // operand orders.
11782   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11783                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11784                     _.RC:$src2, (i8 timm:$src4)),
11785                    _.RC:$src1)),
11786             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11787              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11788   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11789                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11790                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11791                    _.RC:$src1)),
11792             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11793              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11794   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11795                    (OpNode _.RC:$src2, _.RC:$src1,
11796                     (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11797                    _.RC:$src1)),
11798             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11799              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11800   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11801                    (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11802                     _.RC:$src1, (i8 timm:$src4)),
11803                    _.RC:$src1)),
11804             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11805              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11806   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11807                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11808                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11809                    _.RC:$src1)),
11810             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11811              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11813   // Additional patterns for matching zero masking with broadcasts in other
11814   // positions.
11815   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11816                    (OpNode (_.BroadcastLdFrag addr:$src3),
11817                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11818                    _.ImmAllZerosV)),
11819             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11820              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11821              (VPTERNLOG321_imm8 timm:$src4))>;
11822   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11823                    (OpNode _.RC:$src1,
11824                     (_.BroadcastLdFrag addr:$src3),
11825                     _.RC:$src2, (i8 timm:$src4)),
11826                    _.ImmAllZerosV)),
11827             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11828              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11829              (VPTERNLOG132_imm8 timm:$src4))>;
11831   // Additional patterns for matching masked broadcasts with different
11832   // operand orders.
11833   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11834                    (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11835                     _.RC:$src2, (i8 timm:$src4)),
11836                    _.RC:$src1)),
11837             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11838              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11839   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11840                    (OpNode (_.BroadcastLdFrag addr:$src3),
11841                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11842                    _.RC:$src1)),
11843             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11844              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11845   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11846                    (OpNode _.RC:$src2, _.RC:$src1,
11847                     (_.BroadcastLdFrag addr:$src3),
11848                     (i8 timm:$src4)), _.RC:$src1)),
11849             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11850              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11851   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11852                    (OpNode _.RC:$src2,
11853                     (_.BroadcastLdFrag addr:$src3),
11854                     _.RC:$src1, (i8 timm:$src4)),
11855                    _.RC:$src1)),
11856             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11857              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11858   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11859                    (OpNode (_.BroadcastLdFrag addr:$src3),
11860                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11861                    _.RC:$src1)),
11862             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11863              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11866 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11867                                  AVX512VLVectorVTInfo _> {
11868   let Predicates = [HasAVX512] in
11869     defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11870                                _.info512, NAME>, EVEX_V512;
11871   let Predicates = [HasAVX512, HasVLX] in {
11872     defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11873                                _.info128, NAME>, EVEX_V128;
11874     defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11875                                _.info256, NAME>, EVEX_V256;
11876   }
11879 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11880                                         avx512vl_i32_info>;
11881 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11882                                         avx512vl_i64_info>, REX_W;
11884 // Patterns to implement vnot using vpternlog instead of creating all ones
11885 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11886 // so that the result is only dependent on src0. But we use the same source
11887 // for all operands to prevent a false dependency.
11888 // TODO: We should maybe have a more generalized algorithm for folding to
11889 // vpternlog.
11890 let Predicates = [HasAVX512] in {
11891   def : Pat<(v64i8 (vnot VR512:$src)),
11892             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11893   def : Pat<(v32i16 (vnot VR512:$src)),
11894             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11895   def : Pat<(v16i32 (vnot VR512:$src)),
11896             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11897   def : Pat<(v8i64 (vnot VR512:$src)),
11898             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11901 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
11902   def : Pat<(v16i8 (vnot VR128X:$src)),
11903             (EXTRACT_SUBREG
11904              (VPTERNLOGQZrri
11905               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11906               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11907               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11908               (i8 15)), sub_xmm)>;
11909   def : Pat<(v8i16 (vnot VR128X:$src)),
11910             (EXTRACT_SUBREG
11911              (VPTERNLOGQZrri
11912               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11913               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11914               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11915               (i8 15)), sub_xmm)>;
11916   def : Pat<(v4i32 (vnot VR128X:$src)),
11917             (EXTRACT_SUBREG
11918              (VPTERNLOGQZrri
11919               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11920               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11921               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11922               (i8 15)), sub_xmm)>;
11923   def : Pat<(v2i64 (vnot VR128X:$src)),
11924             (EXTRACT_SUBREG
11925              (VPTERNLOGQZrri
11926               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11927               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11928               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11929               (i8 15)), sub_xmm)>;
11931   def : Pat<(v32i8 (vnot VR256X:$src)),
11932             (EXTRACT_SUBREG
11933              (VPTERNLOGQZrri
11934               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11935               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11936               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11937               (i8 15)), sub_ymm)>;
11938   def : Pat<(v16i16 (vnot VR256X:$src)),
11939             (EXTRACT_SUBREG
11940              (VPTERNLOGQZrri
11941               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11942               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11943               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11944               (i8 15)), sub_ymm)>;
11945   def : Pat<(v8i32 (vnot VR256X:$src)),
11946             (EXTRACT_SUBREG
11947              (VPTERNLOGQZrri
11948               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11949               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11950               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11951               (i8 15)), sub_ymm)>;
11952   def : Pat<(v4i64 (vnot VR256X:$src)),
11953             (EXTRACT_SUBREG
11954              (VPTERNLOGQZrri
11955               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11956               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11957               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11958               (i8 15)), sub_ymm)>;
11961 let Predicates = [HasVLX] in {
11962   def : Pat<(v16i8 (vnot VR128X:$src)),
11963             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11964   def : Pat<(v8i16 (vnot VR128X:$src)),
11965             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11966   def : Pat<(v4i32 (vnot VR128X:$src)),
11967             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11968   def : Pat<(v2i64 (vnot VR128X:$src)),
11969             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11971   def : Pat<(v32i8 (vnot VR256X:$src)),
11972             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11973   def : Pat<(v16i16 (vnot VR256X:$src)),
11974             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11975   def : Pat<(v8i32 (vnot VR256X:$src)),
11976             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11977   def : Pat<(v4i64 (vnot VR256X:$src)),
11978             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11981 //===----------------------------------------------------------------------===//
11982 // AVX-512 - FixupImm
11983 //===----------------------------------------------------------------------===//
11985 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11986                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
11987                                   X86VectorVTInfo TblVT>{
11988   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11989       Uses = [MXCSR], mayRaiseFPException = 1 in {
11990     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11991                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11992                          OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11993                         (X86VFixupimm (_.VT _.RC:$src1),
11994                                       (_.VT _.RC:$src2),
11995                                       (TblVT.VT _.RC:$src3),
11996                                       (i32 timm:$src4))>, Sched<[sched]>;
11997     defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11998                       (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11999                       OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12000                       (X86VFixupimm (_.VT _.RC:$src1),
12001                                     (_.VT _.RC:$src2),
12002                                     (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12003                                     (i32 timm:$src4))>,
12004                       Sched<[sched.Folded, sched.ReadAfterFold]>;
12005     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12006                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12007                     OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
12008                     "$src2, ${src3}"#_.BroadcastStr#", $src4",
12009                       (X86VFixupimm (_.VT _.RC:$src1),
12010                                     (_.VT _.RC:$src2),
12011                                     (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
12012                                     (i32 timm:$src4))>,
12013                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12014   } // Constraints = "$src1 = $dst"
12017 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12018                                       X86FoldableSchedWrite sched,
12019                                       X86VectorVTInfo _, X86VectorVTInfo TblVT>
12020   : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
12021 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
12022   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12023                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12024                       OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12025                       "$src2, $src3, {sae}, $src4",
12026                       (X86VFixupimmSAE (_.VT _.RC:$src1),
12027                                        (_.VT _.RC:$src2),
12028                                        (TblVT.VT _.RC:$src3),
12029                                        (i32 timm:$src4))>,
12030                       EVEX_B, Sched<[sched]>;
12031   }
12034 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
12035                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
12036                                   X86VectorVTInfo _src3VT> {
12037   let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12038       ExeDomain = _.ExeDomain in {
12039     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12040                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12041                       OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12042                       (X86VFixupimms (_.VT _.RC:$src1),
12043                                      (_.VT _.RC:$src2),
12044                                      (_src3VT.VT _src3VT.RC:$src3),
12045                                      (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
12046     let Uses = [MXCSR] in
12047     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12048                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12049                       OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12050                       "$src2, $src3, {sae}, $src4",
12051                       (X86VFixupimmSAEs (_.VT _.RC:$src1),
12052                                         (_.VT _.RC:$src2),
12053                                         (_src3VT.VT _src3VT.RC:$src3),
12054                                         (i32 timm:$src4))>,
12055                       EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12056     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12057                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12058                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12059                      (X86VFixupimms (_.VT _.RC:$src1),
12060                                     (_.VT _.RC:$src2),
12061                                     (_src3VT.VT (scalar_to_vector
12062                                               (_src3VT.ScalarLdFrag addr:$src3))),
12063                                     (i32 timm:$src4))>,
12064                      Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
12065   }
12068 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12069                                       AVX512VLVectorVTInfo _Vec,
12070                                       AVX512VLVectorVTInfo _Tbl> {
12071   let Predicates = [HasAVX512] in
12072     defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
12073                                 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12074                                 EVEX, VVVV, EVEX_V512;
12075   let Predicates = [HasAVX512, HasVLX] in {
12076     defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
12077                             _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12078                             EVEX, VVVV, EVEX_V128;
12079     defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
12080                             _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12081                             EVEX, VVVV, EVEX_V256;
12082   }
12085 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12086                                            SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12087                           AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
12088 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12089                                            SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12090                           AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
12091 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12092                          avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12093 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12094                          avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
12096 // Patterns used to select SSE scalar fp arithmetic instructions from
12097 // either:
12099 // (1) a scalar fp operation followed by a blend
12101 // The effect is that the backend no longer emits unnecessary vector
12102 // insert instructions immediately after SSE scalar fp instructions
12103 // like addss or mulss.
12105 // For example, given the following code:
12106 //   __m128 foo(__m128 A, __m128 B) {
12107 //     A[0] += B[0];
12108 //     return A;
12109 //   }
12111 // Previously we generated:
12112 //   addss %xmm0, %xmm1
12113 //   movss %xmm1, %xmm0
12115 // We now generate:
12116 //   addss %xmm1, %xmm0
12118 // (2) a vector packed single/double fp operation followed by a vector insert
12120 // The effect is that the backend converts the packed fp instruction
12121 // followed by a vector insert into a single SSE scalar fp instruction.
12123 // For example, given the following code:
12124 //   __m128 foo(__m128 A, __m128 B) {
12125 //     __m128 C = A + B;
12126 //     return (__m128) {c[0], a[1], a[2], a[3]};
12127 //   }
12129 // Previously we generated:
12130 //   addps %xmm0, %xmm1
12131 //   movss %xmm1, %xmm0
12133 // We now generate:
12134 //   addss %xmm1, %xmm0
12136 // TODO: Some canonicalization in lowering would simplify the number of
12137 // patterns we have to try to match.
12138 multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12139                                           string OpcPrefix, SDNode MoveNode,
12140                                           X86VectorVTInfo _, PatLeaf ZeroFP> {
12141   let Predicates = [HasAVX512] in {
12142     // extracted scalar math op with insert via movss
12143     def : Pat<(MoveNode
12144                (_.VT VR128X:$dst),
12145                (_.VT (scalar_to_vector
12146                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12147                           _.FRC:$src)))),
12148               (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12149                (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12150     def : Pat<(MoveNode
12151                (_.VT VR128X:$dst),
12152                (_.VT (scalar_to_vector
12153                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12154                           (_.ScalarLdFrag addr:$src))))),
12155               (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12157     // extracted masked scalar math op with insert via movss
12158     def : Pat<(MoveNode (_.VT VR128X:$src1),
12159                (scalar_to_vector
12160                 (X86selects_mask VK1WM:$mask,
12161                             (MaskedOp (_.EltVT
12162                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12163                                       _.FRC:$src2),
12164                             _.FRC:$src0))),
12165               (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12166                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12167                VK1WM:$mask, _.VT:$src1,
12168                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12169     def : Pat<(MoveNode (_.VT VR128X:$src1),
12170                (scalar_to_vector
12171                 (X86selects_mask VK1WM:$mask,
12172                             (MaskedOp (_.EltVT
12173                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12174                                       (_.ScalarLdFrag addr:$src2)),
12175                             _.FRC:$src0))),
12176               (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12177                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12178                VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12180     // extracted masked scalar math op with insert via movss
12181     def : Pat<(MoveNode (_.VT VR128X:$src1),
12182                (scalar_to_vector
12183                 (X86selects_mask VK1WM:$mask,
12184                             (MaskedOp (_.EltVT
12185                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12186                                       _.FRC:$src2), (_.EltVT ZeroFP)))),
12187       (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12188           VK1WM:$mask, _.VT:$src1,
12189           (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12190     def : Pat<(MoveNode (_.VT VR128X:$src1),
12191                (scalar_to_vector
12192                 (X86selects_mask VK1WM:$mask,
12193                             (MaskedOp (_.EltVT
12194                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12195                                       (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12196       (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12197   }
12200 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12201 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12202 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12203 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12205 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12206 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12207 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12208 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12210 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12211 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12212 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12213 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12215 multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12216                                              SDNode Move, X86VectorVTInfo _> {
12217   let Predicates = [HasAVX512] in {
12218     def : Pat<(_.VT (Move _.VT:$dst,
12219                      (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12220               (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12221   }
12224 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12225 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12226 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12228 //===----------------------------------------------------------------------===//
12229 // AES instructions
12230 //===----------------------------------------------------------------------===//
12232 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12233   let Predicates = [HasVLX, HasVAES] in {
12234     defm Z128 : AESI_binop_rm_int<Op, OpStr,
12235                                   !cast<Intrinsic>(IntPrefix),
12236                                   loadv2i64, 0, VR128X, i128mem>,
12237                   EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
12238     defm Z256 : AESI_binop_rm_int<Op, OpStr,
12239                                   !cast<Intrinsic>(IntPrefix#"_256"),
12240                                   loadv4i64, 0, VR256X, i256mem>,
12241                   EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
12242     }
12243     let Predicates = [HasAVX512, HasVAES] in
12244     defm Z    : AESI_binop_rm_int<Op, OpStr,
12245                                   !cast<Intrinsic>(IntPrefix#"_512"),
12246                                   loadv8i64, 0, VR512, i512mem>,
12247                   EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
12250 defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12251 defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12252 defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12253 defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12255 //===----------------------------------------------------------------------===//
12256 // PCLMUL instructions - Carry less multiplication
12257 //===----------------------------------------------------------------------===//
12259 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12260 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12261                               EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
12263 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12264 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12265                               EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
12267 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12268                                 int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256,
12269                                 EVEX_CD8<64, CD8VF>, WIG;
12272 // Aliases
12273 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12274 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12275 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12277 //===----------------------------------------------------------------------===//
12278 // VBMI2
12279 //===----------------------------------------------------------------------===//
12281 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12282                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12283   let Constraints = "$src1 = $dst",
12284       ExeDomain   = VTI.ExeDomain in {
12285     defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12286                 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12287                 "$src3, $src2", "$src2, $src3",
12288                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12289                 T8, PD, EVEX, VVVV, Sched<[sched]>;
12290     defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12291                 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12292                 "$src3, $src2", "$src2, $src3",
12293                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12294                         (VTI.VT (VTI.LdFrag addr:$src3))))>,
12295                 T8, PD, EVEX, VVVV,
12296                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12297   }
12300 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12301                                X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12302          : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12303   let Constraints = "$src1 = $dst",
12304       ExeDomain   = VTI.ExeDomain in
12305   defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12306               (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12307               "${src3}"#VTI.BroadcastStr#", $src2",
12308               "$src2, ${src3}"#VTI.BroadcastStr,
12309               (OpNode VTI.RC:$src1, VTI.RC:$src2,
12310                (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12311               T8, PD, EVEX, VVVV, EVEX_B,
12312               Sched<[sched.Folded, sched.ReadAfterFold]>;
12315 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12316                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12317   let Predicates = [HasVBMI2] in
12318   defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12319                                    EVEX_V512;
12320   let Predicates = [HasVBMI2, HasVLX] in {
12321     defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12322                                    EVEX_V256;
12323     defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12324                                    EVEX_V128;
12325   }
12328 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12329                                       X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12330   let Predicates = [HasVBMI2] in
12331   defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12332                                     EVEX_V512;
12333   let Predicates = [HasVBMI2, HasVLX] in {
12334     defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12335                                     EVEX_V256;
12336     defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12337                                     EVEX_V128;
12338   }
12340 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12341                            SDNode OpNode, X86SchedWriteWidths sched> {
12342   defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12343              avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
12344   defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12345              avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12346   defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12347              avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
12350 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12351                            SDNode OpNode, X86SchedWriteWidths sched> {
12352   defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12353              avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12354              REX_W, EVEX_CD8<16, CD8VF>;
12355   defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12356              OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
12357   defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12358              sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
12361 // Concat & Shift
12362 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12363 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12364 defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12365 defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12367 // Compress
12368 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12369                                          avx512vl_i8_info, HasVBMI2>, EVEX;
12370 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12371                                           avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12372 // Expand
12373 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12374                                       avx512vl_i8_info, HasVBMI2>, EVEX;
12375 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12376                                       avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12378 //===----------------------------------------------------------------------===//
12379 // VNNI
12380 //===----------------------------------------------------------------------===//
12382 let Constraints = "$src1 = $dst" in
12383 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12384                     X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12385                     bit IsCommutable> {
12386   let ExeDomain = VTI.ExeDomain in {
12387   defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12388                                    (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12389                                    "$src3, $src2", "$src2, $src3",
12390                                    (VTI.VT (OpNode VTI.RC:$src1,
12391                                             VTI.RC:$src2, VTI.RC:$src3)),
12392                                    IsCommutable, IsCommutable>,
12393                                    EVEX, VVVV, T8, PD, Sched<[sched]>;
12394   defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12395                                    (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12396                                    "$src3, $src2", "$src2, $src3",
12397                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12398                                             (VTI.VT (VTI.LdFrag addr:$src3))))>,
12399                                    EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8, PD,
12400                                    Sched<[sched.Folded, sched.ReadAfterFold,
12401                                           sched.ReadAfterFold]>;
12402   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12403                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12404                                    OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12405                                    "$src2, ${src3}"#VTI.BroadcastStr,
12406                                    (OpNode VTI.RC:$src1, VTI.RC:$src2,
12407                                     (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12408                                    EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B,
12409                                    T8, PD, Sched<[sched.Folded, sched.ReadAfterFold,
12410                                                 sched.ReadAfterFold]>;
12411   }
12414 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12415                        X86SchedWriteWidths sched, bit IsCommutable> {
12416   let Predicates = [HasVNNI] in
12417   defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12418                            IsCommutable>, EVEX_V512;
12419   let Predicates = [HasVNNI, HasVLX] in {
12420     defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12421                            IsCommutable>, EVEX_V256;
12422     defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12423                            IsCommutable>, EVEX_V128;
12424   }
12427 // FIXME: Is there a better scheduler class for VPDP?
12428 defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12429 defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12430 defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12431 defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12433 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
12434 let Predicates = [HasVNNI] in {
12435   def : Pat<(v16i32 (add VR512:$src1,
12436                          (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12437             (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12438   def : Pat<(v16i32 (add VR512:$src1,
12439                          (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12440             (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12442 let Predicates = [HasVNNI,HasVLX] in {
12443   def : Pat<(v8i32 (add VR256X:$src1,
12444                         (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12445             (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12446   def : Pat<(v8i32 (add VR256X:$src1,
12447                         (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12448             (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12449   def : Pat<(v4i32 (add VR128X:$src1,
12450                         (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12451             (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12452   def : Pat<(v4i32 (add VR128X:$src1,
12453                         (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12454             (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12457 //===----------------------------------------------------------------------===//
12458 // Bit Algorithms
12459 //===----------------------------------------------------------------------===//
12461 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12462 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12463                                    avx512vl_i8_info, HasBITALG>;
12464 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12465                                    avx512vl_i16_info, HasBITALG>, REX_W;
12467 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12468 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12470 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12471   defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12472                                 (ins VTI.RC:$src1, VTI.RC:$src2),
12473                                 "vpshufbitqmb",
12474                                 "$src2, $src1", "$src1, $src2",
12475                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12476                                 (VTI.VT VTI.RC:$src2)),
12477                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12478                                 (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD,
12479                                 Sched<[sched]>;
12480   defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12481                                 (ins VTI.RC:$src1, VTI.MemOp:$src2),
12482                                 "vpshufbitqmb",
12483                                 "$src2, $src1", "$src1, $src2",
12484                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12485                                 (VTI.VT (VTI.LdFrag addr:$src2))),
12486                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12487                                 (VTI.VT (VTI.LdFrag addr:$src2)))>,
12488                                 EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD,
12489                                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12492 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12493   let Predicates = [HasBITALG] in
12494   defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12495   let Predicates = [HasBITALG, HasVLX] in {
12496     defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12497     defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12498   }
12501 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12502 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12504 //===----------------------------------------------------------------------===//
12505 // GFNI
12506 //===----------------------------------------------------------------------===//
12508 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12509                                    X86SchedWriteWidths sched> {
12510   let Predicates = [HasGFNI, HasAVX512] in
12511   defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12512                                 EVEX_V512;
12513   let Predicates = [HasGFNI, HasVLX] in {
12514     defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12515                                 EVEX_V256;
12516     defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12517                                 EVEX_V128;
12518   }
12521 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12522                                           SchedWriteVecALU>,
12523                                           EVEX_CD8<8, CD8VF>, T8;
12525 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12526                                       X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12527                                       X86VectorVTInfo BcstVTI>
12528            : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12529   let ExeDomain = VTI.ExeDomain in
12530   defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12531                 (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3),
12532                 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12533                 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12534                 (OpNode (VTI.VT VTI.RC:$src1),
12535                  (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12536                  (i8 timm:$src3))>, EVEX_B,
12537                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12540 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12541                                      X86SchedWriteWidths sched> {
12542   let Predicates = [HasGFNI, HasAVX512] in
12543   defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12544                                            v64i8_info, v8i64_info>, EVEX_V512;
12545   let Predicates = [HasGFNI, HasVLX] in {
12546     defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12547                                            v32i8x_info, v4i64x_info>, EVEX_V256;
12548     defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12549                                            v16i8x_info, v2i64x_info>, EVEX_V128;
12550   }
12553 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12554                          X86GF2P8affineinvqb, SchedWriteVecIMul>,
12555                          EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12556 defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12557                          X86GF2P8affineqb, SchedWriteVecIMul>,
12558                          EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12561 //===----------------------------------------------------------------------===//
12562 // AVX5124FMAPS
12563 //===----------------------------------------------------------------------===//
12565 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12566     Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12567 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12568                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12569                     "v4fmaddps", "$src3, $src2", "$src2, $src3",
12570                     []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12571                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12573 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12574                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12575                      "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12576                      []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12577                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12579 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12580                     (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12581                     "v4fmaddss", "$src3, $src2", "$src2, $src3",
12582                     []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12583                     Sched<[SchedWriteFMA.Scl.Folded]>;
12585 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12586                      (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12587                      "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12588                      []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12589                      Sched<[SchedWriteFMA.Scl.Folded]>;
12592 //===----------------------------------------------------------------------===//
12593 // AVX5124VNNIW
12594 //===----------------------------------------------------------------------===//
12596 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12597     Constraints = "$src1 = $dst" in {
12598 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12599                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12600                      "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12601                     []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12602                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12604 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12605                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12606                      "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12607                      []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12608                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12611 let hasSideEffects = 0 in {
12612   let mayStore = 1, SchedRW = [WriteFStoreX] in
12613   def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12614   let mayLoad = 1, SchedRW = [WriteFLoadX] in
12615   def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12618 //===----------------------------------------------------------------------===//
12619 // VP2INTERSECT
12620 //===----------------------------------------------------------------------===//
12622 multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12623   def rr : I<0x68, MRMSrcReg,
12624                   (outs _.KRPC:$dst),
12625                   (ins _.RC:$src1, _.RC:$src2),
12626                   !strconcat("vp2intersect", _.Suffix,
12627                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12628                   [(set _.KRPC:$dst, (X86vp2intersect
12629                             _.RC:$src1, (_.VT _.RC:$src2)))]>,
12630                   EVEX, VVVV, T8, XD, Sched<[sched]>;
12632   def rm : I<0x68, MRMSrcMem,
12633                   (outs _.KRPC:$dst),
12634                   (ins  _.RC:$src1, _.MemOp:$src2),
12635                   !strconcat("vp2intersect", _.Suffix,
12636                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12637                   [(set _.KRPC:$dst, (X86vp2intersect
12638                             _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12639                   EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>,
12640                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12642   def rmb : I<0x68, MRMSrcMem,
12643                   (outs _.KRPC:$dst),
12644                   (ins _.RC:$src1, _.ScalarMemOp:$src2),
12645                   !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12646                              ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12647                   [(set _.KRPC:$dst, (X86vp2intersect
12648                              _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12649                   EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12650                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12653 multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12654   let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12655     defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12657   let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12658     defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12659     defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12660   }
12663 let ExeDomain = SSEPackedInt in {
12664 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12665 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
12668 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12669                              X86SchedWriteWidths sched,
12670                              AVX512VLVectorVTInfo _SrcVTInfo,
12671                              AVX512VLVectorVTInfo _DstVTInfo,
12672                              SDNode OpNode, Predicate prd,
12673                              bit IsCommutable = 0> {
12674   let Predicates = [prd] in
12675     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12676                                    _SrcVTInfo.info512, _DstVTInfo.info512,
12677                                    _SrcVTInfo.info512, IsCommutable>,
12678                                    EVEX_V512, EVEX_CD8<32, CD8VF>;
12679   let Predicates = [HasVLX, prd] in {
12680     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12681                                       _SrcVTInfo.info256, _DstVTInfo.info256,
12682                                       _SrcVTInfo.info256, IsCommutable>,
12683                                      EVEX_V256, EVEX_CD8<32, CD8VF>;
12684     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12685                                       _SrcVTInfo.info128, _DstVTInfo.info128,
12686                                       _SrcVTInfo.info128, IsCommutable>,
12687                                       EVEX_V128, EVEX_CD8<32, CD8VF>;
12688   }
12691 let ExeDomain = SSEPackedSingle in
12692 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12693                                         SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12694                                         avx512vl_f32_info, avx512vl_bf16_info,
12695                                         X86cvtne2ps2bf16, HasBF16, 0>, T8, XD;
12697 // Truncate Float to BFloat16
12698 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12699                              X86SchedWriteWidths sched> {
12700   let ExeDomain = SSEPackedSingle in {
12701   let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12702     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info,
12703                             X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12704   }
12705   let Predicates = [HasBF16, HasVLX] in {
12706     let Uses = []<Register>, mayRaiseFPException = 0 in {
12707     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info,
12708                                null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12709                                VK4WM>, EVEX_V128;
12710     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info,
12711                                X86cvtneps2bf16, X86cvtneps2bf16,
12712                                sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12713     }
12714   } // Predicates = [HasBF16, HasVLX]
12715   } // ExeDomain = SSEPackedSingle
12717   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12718                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12719                   VR128X:$src), 0>;
12720   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12721                   (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12722                   f128mem:$src), 0, "intel">;
12723   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12724                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12725                   VR256X:$src), 0>;
12726   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12727                   (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12728                   f256mem:$src), 0, "intel">;
12731 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12732                                        SchedWriteCvtPD2PS>, T8, XS,
12733                                        EVEX_CD8<32, CD8VF>;
12735 let Predicates = [HasBF16, HasVLX] in {
12736   // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12737   // patterns have been disabled with null_frag.
12738   def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12739             (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12740   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0),
12741                               VK4WM:$mask),
12742             (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12743   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV,
12744                               VK4WM:$mask),
12745             (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12747   def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12748             (VCVTNEPS2BF16Z128rm addr:$src)>;
12749   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0),
12750                               VK4WM:$mask),
12751             (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12752   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV,
12753                               VK4WM:$mask),
12754             (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12756   def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32
12757                                      (X86VBroadcastld32 addr:$src)))),
12758             (VCVTNEPS2BF16Z128rmb addr:$src)>;
12759   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12760                               (v8bf16 VR128X:$src0), VK4WM:$mask),
12761             (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12762   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12763                               v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
12764             (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12766   def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
12767             (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12768   def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
12769             (VCVTNEPS2BF16Z128rm addr:$src)>;
12771   def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
12772             (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12773   def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
12774             (VCVTNEPS2BF16Z256rm addr:$src)>;
12776   def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
12777             (VPBROADCASTWZ128rm addr:$src)>;
12778   def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
12779             (VPBROADCASTWZ256rm addr:$src)>;
12781   def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12782             (VPBROADCASTWZ128rr VR128X:$src)>;
12783   def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12784             (VPBROADCASTWZ256rr VR128X:$src)>;
12786   def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))),
12787             (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12788   def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
12789             (VCVTNEPS2BF16Z256rm addr:$src)>;
12791   // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12794 let Predicates = [HasBF16] in {
12795   def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
12796             (VPBROADCASTWZrm addr:$src)>;
12798   def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12799             (VPBROADCASTWZrr VR128X:$src)>;
12801   def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))),
12802             (VCVTNEPS2BF16Zrr VR512:$src)>;
12803   def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))),
12804             (VCVTNEPS2BF16Zrm addr:$src)>;
12805   // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12808 let Constraints = "$src1 = $dst" in {
12809 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12810                               X86FoldableSchedWrite sched,
12811                               X86VectorVTInfo _, X86VectorVTInfo src_v> {
12812   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12813                            (ins src_v.RC:$src2, src_v.RC:$src3),
12814                            OpcodeStr, "$src3, $src2", "$src2, $src3",
12815                            (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12816                            EVEX, VVVV, Sched<[sched]>;
12818   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12819                                (ins src_v.RC:$src2, src_v.MemOp:$src3),
12820                                OpcodeStr, "$src3, $src2", "$src2, $src3",
12821                                (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12822                                (src_v.LdFrag addr:$src3)))>, EVEX, VVVV,
12823                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12825   let mayLoad = 1, hasSideEffects = 0 in
12826   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12827                   (ins src_v.RC:$src2, f32mem:$src3),
12828                   OpcodeStr,
12829                   !strconcat("${src3}", _.BroadcastStr,", $src2"),
12830                   !strconcat("$src2, ${src3}", _.BroadcastStr),
12831                   (null_frag)>,
12832                   EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
12835 } // Constraints = "$src1 = $dst"
12837 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12838                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12839                                  AVX512VLVectorVTInfo src_v, Predicate prd> {
12840   let Predicates = [prd] in {
12841     defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12842                                    src_v.info512>, EVEX_V512;
12843   }
12844   let Predicates = [HasVLX, prd] in {
12845     defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12846                                    src_v.info256>, EVEX_V256;
12847     defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12848                                    src_v.info128>, EVEX_V128;
12849   }
12852 let ExeDomain = SSEPackedSingle in
12853 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12854                                        avx512vl_f32_info, avx512vl_bf16_info,
12855                                        HasBF16>, T8, XS, EVEX_CD8<32, CD8VF>;
12857 //===----------------------------------------------------------------------===//
12858 // AVX512FP16
12859 //===----------------------------------------------------------------------===//
12861 let Predicates = [HasFP16] in {
12862 // Move word ( r/m16) to Packed word
12863 def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
12864                       "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
12865 def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
12866                       "vmovw\t{$src, $dst|$dst, $src}",
12867                       [(set VR128X:$dst,
12868                         (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
12869                       T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
12871 def : Pat<(f16 (bitconvert GR16:$src)),
12872           (f16 (COPY_TO_REGCLASS
12873                 (VMOVW2SHrr
12874                  (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
12875                 FR16X))>;
12876 def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
12877           (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12878 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
12879           (VMOVW2SHrr GR32:$src)>;
12880 // FIXME: We should really find a way to improve these patterns.
12881 def : Pat<(v8i32 (X86vzmovl
12882                   (insert_subvector undef,
12883                                     (v4i32 (scalar_to_vector
12884                                             (and GR32:$src, 0xffff))),
12885                                     (iPTR 0)))),
12886           (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12887 def : Pat<(v16i32 (X86vzmovl
12888                    (insert_subvector undef,
12889                                      (v4i32 (scalar_to_vector
12890                                              (and GR32:$src, 0xffff))),
12891                                      (iPTR 0)))),
12892           (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12894 def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
12895           (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12897 // AVX 128-bit movw instruction write zeros in the high 128-bit part.
12898 def : Pat<(v8i16 (X86vzload16 addr:$src)),
12899           (VMOVWrm addr:$src)>;
12900 def : Pat<(v16i16 (X86vzload16 addr:$src)),
12901           (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12903 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
12904 def : Pat<(v32i16 (X86vzload16 addr:$src)),
12905           (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12907 def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
12908           (VMOVWrm addr:$src)>;
12909 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
12910           (VMOVWrm addr:$src)>;
12911 def : Pat<(v8i32 (X86vzmovl
12912                   (insert_subvector undef,
12913                                     (v4i32 (scalar_to_vector
12914                                             (i32 (zextloadi16 addr:$src)))),
12915                                     (iPTR 0)))),
12916           (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12917 def : Pat<(v16i32 (X86vzmovl
12918                    (insert_subvector undef,
12919                                      (v4i32 (scalar_to_vector
12920                                              (i32 (zextloadi16 addr:$src)))),
12921                                      (iPTR 0)))),
12922           (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12924 // Move word from xmm register to r/m16
12925 def VMOVSH2Wrr  : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
12926                        "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>;
12927 def VMOVWmr  : AVX512<0x7E, MRMDestMem, (outs),
12928                        (ins i16mem:$dst, VR128X:$src),
12929                        "vmovw\t{$src, $dst|$dst, $src}",
12930                        [(store (i16 (extractelt (v8i16 VR128X:$src),
12931                                      (iPTR 0))), addr:$dst)]>,
12932                        T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
12934 def : Pat<(i16 (bitconvert FR16X:$src)),
12935           (i16 (EXTRACT_SUBREG
12936                 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
12937                 sub_16bit))>;
12938 def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
12939           (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
12941 // Allow "vmovw" to use GR64
12942 let hasSideEffects = 0 in {
12943   def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
12944                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
12945   def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
12946                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
12950 // Convert 16-bit float to i16/u16
12951 multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12952                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12953                           AVX512VLVectorVTInfo _Dst,
12954                           AVX512VLVectorVTInfo _Src,
12955                           X86SchedWriteWidths sched> {
12956   let Predicates = [HasFP16] in {
12957     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12958                             OpNode, MaskOpNode, sched.ZMM>,
12959              avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
12960                                OpNodeRnd, sched.ZMM>, EVEX_V512;
12961   }
12962   let Predicates = [HasFP16, HasVLX] in {
12963     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12964                                OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12965     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12966                                OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12967   }
12970 // Convert 16-bit float to i16/u16 truncate
12971 multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12972                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12973                            AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
12974                            X86SchedWriteWidths sched> {
12975   let Predicates = [HasFP16] in {
12976     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12977                             OpNode, MaskOpNode, sched.ZMM>,
12978              avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
12979                                OpNodeRnd, sched.ZMM>, EVEX_V512;
12980   }
12981   let Predicates = [HasFP16, HasVLX] in {
12982     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12983                                OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12984     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12985                                OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12986   }
12989 defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
12990                                 X86cvtp2UIntRnd, avx512vl_i16_info,
12991                                 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
12992                                 T_MAP5, EVEX_CD8<16, CD8VF>;
12993 defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
12994                                 X86VUintToFpRnd, avx512vl_f16_info,
12995                                 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
12996                                 T_MAP5, XD, EVEX_CD8<16, CD8VF>;
12997 defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
12998                                 X86cvttp2si, X86cvttp2siSAE,
12999                                 avx512vl_i16_info, avx512vl_f16_info,
13000                                 SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>;
13001 defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
13002                                 X86cvttp2ui, X86cvttp2uiSAE,
13003                                 avx512vl_i16_info, avx512vl_f16_info,
13004                                 SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>;
13005 defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
13006                                 X86cvtp2IntRnd, avx512vl_i16_info,
13007                                 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13008                                 T_MAP5, PD, EVEX_CD8<16, CD8VF>;
13009 defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
13010                                 X86VSintToFpRnd, avx512vl_f16_info,
13011                                 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13012                                 T_MAP5, XS, EVEX_CD8<16, CD8VF>;
13014 // Convert Half to Signed/Unsigned Doubleword
13015 multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13016                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13017                            X86SchedWriteWidths sched> {
13018   let Predicates = [HasFP16] in {
13019     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13020                             MaskOpNode, sched.ZMM>,
13021              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
13022                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
13023   }
13024   let Predicates = [HasFP16, HasVLX] in {
13025     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13026                                MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13027     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13028                                MaskOpNode, sched.YMM>, EVEX_V256;
13029   }
13032 // Convert Half to Signed/Unsigned Doubleword with truncation
13033 multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13034                             SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13035                             X86SchedWriteWidths sched> {
13036   let Predicates = [HasFP16] in {
13037     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13038                             MaskOpNode, sched.ZMM>,
13039              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
13040                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
13041   }
13042   let Predicates = [HasFP16, HasVLX] in {
13043     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13044                                MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13045     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13046                                MaskOpNode, sched.YMM>, EVEX_V256;
13047   }
13051 defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
13052                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13053                                  EVEX_CD8<16, CD8VH>;
13054 defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
13055                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5,
13056                                  EVEX_CD8<16, CD8VH>;
13058 defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
13059                                 X86cvttp2si, X86cvttp2siSAE,
13060                                 SchedWriteCvtPS2DQ>, T_MAP5, XS,
13061                                 EVEX_CD8<16, CD8VH>;
13063 defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
13064                                  X86cvttp2ui, X86cvttp2uiSAE,
13065                                  SchedWriteCvtPS2DQ>, T_MAP5,
13066                                  EVEX_CD8<16, CD8VH>;
13068 // Convert Half to Signed/Unsigned Quardword
13069 multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13070                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13071                            X86SchedWriteWidths sched> {
13072   let Predicates = [HasFP16] in {
13073     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13074                             MaskOpNode, sched.ZMM>,
13075              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
13076                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13077   }
13078   let Predicates = [HasFP16, HasVLX] in {
13079     // Explicitly specified broadcast string, since we take only 2 elements
13080     // from v8f16x_info source
13081     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13082                                MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
13083                                EVEX_V128;
13084     // Explicitly specified broadcast string, since we take only 4 elements
13085     // from v8f16x_info source
13086     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13087                                MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
13088                                EVEX_V256;
13089   }
13092 // Convert Half to Signed/Unsigned Quardword with truncation
13093 multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13094                             SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13095                             X86SchedWriteWidths sched> {
13096   let Predicates = [HasFP16] in {
13097     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13098                             MaskOpNode, sched.ZMM>,
13099              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13100                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
13101   }
13102   let Predicates = [HasFP16, HasVLX] in {
13103     // Explicitly specified broadcast string, since we take only 2 elements
13104     // from v8f16x_info source
13105     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13106                                MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13107     // Explicitly specified broadcast string, since we take only 4 elements
13108     // from v8f16x_info source
13109     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13110                                MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13111   }
13114 defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13115                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13116                                  EVEX_CD8<16, CD8VQ>;
13118 defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13119                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13120                                  EVEX_CD8<16, CD8VQ>;
13122 defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13123                                  X86cvttp2si, X86cvttp2siSAE,
13124                                  SchedWriteCvtPS2DQ>, T_MAP5, PD,
13125                                  EVEX_CD8<16, CD8VQ>;
13127 defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13128                                  X86cvttp2ui, X86cvttp2uiSAE,
13129                                  SchedWriteCvtPS2DQ>, T_MAP5, PD,
13130                                  EVEX_CD8<16, CD8VQ>;
13132 // Convert Signed/Unsigned Quardword to Half
13133 multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13134                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13135                            X86SchedWriteWidths sched> {
13136   // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13137   // 512 memory forms of these instructions in Asm Parcer. They have the same
13138   // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13139   // due to the same reason.
13140   let Predicates = [HasFP16] in {
13141     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13142                             MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13143              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13144                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13145   }
13146   let Predicates = [HasFP16, HasVLX] in {
13147     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13148                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13149                                i128mem, VK2WM>, EVEX_V128;
13150     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13151                                null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13152                                i256mem, VK4WM>, EVEX_V256;
13153   }
13155   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13156                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13157                   VR128X:$src), 0, "att">;
13158   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13159                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13160                   VK2WM:$mask, VR128X:$src), 0, "att">;
13161   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13162                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13163                   VK2WM:$mask, VR128X:$src), 0, "att">;
13164   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13165                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13166                   i64mem:$src), 0, "att">;
13167   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13168                   "$dst {${mask}}, ${src}{1to2}}",
13169                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13170                   VK2WM:$mask, i64mem:$src), 0, "att">;
13171   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13172                   "$dst {${mask}} {z}, ${src}{1to2}}",
13173                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13174                   VK2WM:$mask, i64mem:$src), 0, "att">;
13176   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13177                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13178                   VR256X:$src), 0, "att">;
13179   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13180                   "$dst {${mask}}, $src}",
13181                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13182                   VK4WM:$mask, VR256X:$src), 0, "att">;
13183   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13184                   "$dst {${mask}} {z}, $src}",
13185                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13186                   VK4WM:$mask, VR256X:$src), 0, "att">;
13187   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13188                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13189                   i64mem:$src), 0, "att">;
13190   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13191                   "$dst {${mask}}, ${src}{1to4}}",
13192                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13193                   VK4WM:$mask, i64mem:$src), 0, "att">;
13194   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13195                   "$dst {${mask}} {z}, ${src}{1to4}}",
13196                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13197                   VK4WM:$mask, i64mem:$src), 0, "att">;
13199   def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13200                   (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13201                   VR512:$src), 0, "att">;
13202   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13203                   "$dst {${mask}}, $src}",
13204                   (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13205                   VK8WM:$mask, VR512:$src), 0, "att">;
13206   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13207                   "$dst {${mask}} {z}, $src}",
13208                   (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13209                   VK8WM:$mask, VR512:$src), 0, "att">;
13210   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13211                   (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13212                   i64mem:$src), 0, "att">;
13213   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13214                   "$dst {${mask}}, ${src}{1to8}}",
13215                   (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13216                   VK8WM:$mask, i64mem:$src), 0, "att">;
13217   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13218                   "$dst {${mask}} {z}, ${src}{1to8}}",
13219                   (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13220                   VK8WM:$mask, i64mem:$src), 0, "att">;
13223 defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13224                             X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5,
13225                             EVEX_CD8<64, CD8VF>;
13227 defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13228                             X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD,
13229                             EVEX_CD8<64, CD8VF>;
13231 // Convert half to signed/unsigned int 32/64
13232 defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13233                                    X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13234                                    T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13235 defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13236                                    X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13237                                    T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13238 defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13239                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13240                                    T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13241 defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13242                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13243                                    T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13245 defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13246                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13247                         "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13248 defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13249                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13250                         "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13251 defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13252                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13253                         "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13254 defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13255                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13256                         "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13258 let Predicates = [HasFP16] in {
13259   defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13260                                    v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13261                                    T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13262   defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13263                                    v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13264                                    T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13265   defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13266                                     v8f16x_info, i32mem, loadi32,
13267                                     "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13268   defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13269                                     v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13270                                     T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13271   def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13272               (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13274   def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13275               (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13278   def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13279             (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13280   def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13281             (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13283   def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13284             (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13285   def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13286             (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13288   def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13289             (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13290   def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13291             (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13293   def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13294             (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13295   def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13296             (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13298   // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13299   // which produce unnecessary vmovsh instructions
13300   def : Pat<(v8f16 (X86Movsh
13301                      (v8f16 VR128X:$dst),
13302                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13303             (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13305   def : Pat<(v8f16 (X86Movsh
13306                      (v8f16 VR128X:$dst),
13307                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13308             (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13310   def : Pat<(v8f16 (X86Movsh
13311                      (v8f16 VR128X:$dst),
13312                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13313             (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13315   def : Pat<(v8f16 (X86Movsh
13316                      (v8f16 VR128X:$dst),
13317                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13318             (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13320   def : Pat<(v8f16 (X86Movsh
13321                      (v8f16 VR128X:$dst),
13322                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13323             (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13325   def : Pat<(v8f16 (X86Movsh
13326                      (v8f16 VR128X:$dst),
13327                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13328             (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13330   def : Pat<(v8f16 (X86Movsh
13331                      (v8f16 VR128X:$dst),
13332                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13333             (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13335   def : Pat<(v8f16 (X86Movsh
13336                      (v8f16 VR128X:$dst),
13337                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13338             (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13339 } // Predicates = [HasFP16]
13341 let Predicates = [HasFP16, HasVLX] in {
13342   // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13343   // patterns have been disabled with null_frag.
13344   def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13345             (VCVTQQ2PHZ256rr VR256X:$src)>;
13346   def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13347                            VK4WM:$mask),
13348             (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13349   def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13350                            VK4WM:$mask),
13351             (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13353   def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13354             (VCVTQQ2PHZ256rm addr:$src)>;
13355   def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13356                            VK4WM:$mask),
13357             (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13358   def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13359                            VK4WM:$mask),
13360             (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13362   def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13363             (VCVTQQ2PHZ256rmb addr:$src)>;
13364   def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13365                            (v8f16 VR128X:$src0), VK4WM:$mask),
13366             (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13367   def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13368                            v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13369             (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13371   def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13372             (VCVTQQ2PHZ128rr VR128X:$src)>;
13373   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13374                            VK2WM:$mask),
13375             (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13376   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13377                            VK2WM:$mask),
13378             (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13380   def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13381             (VCVTQQ2PHZ128rm addr:$src)>;
13382   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13383                            VK2WM:$mask),
13384             (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13385   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13386                            VK2WM:$mask),
13387             (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13389   def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13390             (VCVTQQ2PHZ128rmb addr:$src)>;
13391   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13392                            (v8f16 VR128X:$src0), VK2WM:$mask),
13393             (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13394   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13395                            v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13396             (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13398   // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13399   // patterns have been disabled with null_frag.
13400   def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13401             (VCVTUQQ2PHZ256rr VR256X:$src)>;
13402   def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13403                            VK4WM:$mask),
13404             (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13405   def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13406                            VK4WM:$mask),
13407             (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13409   def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13410             (VCVTUQQ2PHZ256rm addr:$src)>;
13411   def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13412                            VK4WM:$mask),
13413             (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13414   def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13415                            VK4WM:$mask),
13416             (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13418   def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13419             (VCVTUQQ2PHZ256rmb addr:$src)>;
13420   def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13421                            (v8f16 VR128X:$src0), VK4WM:$mask),
13422             (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13423   def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13424                            v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13425             (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13427   def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13428             (VCVTUQQ2PHZ128rr VR128X:$src)>;
13429   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13430                            VK2WM:$mask),
13431             (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13432   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13433                            VK2WM:$mask),
13434             (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13436   def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13437             (VCVTUQQ2PHZ128rm addr:$src)>;
13438   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13439                            VK2WM:$mask),
13440             (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13441   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13442                            VK2WM:$mask),
13443             (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13445   def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13446             (VCVTUQQ2PHZ128rmb addr:$src)>;
13447   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13448                            (v8f16 VR128X:$src0), VK2WM:$mask),
13449             (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13450   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13451                            v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13452             (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13455 let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13456   multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13457     defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13458             (ins _.RC:$src2, _.RC:$src3),
13459             OpcodeStr, "$src3, $src2", "$src2, $src3",
13460             (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV;
13462     defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13463             (ins _.RC:$src2, _.MemOp:$src3),
13464             OpcodeStr, "$src3, $src2", "$src2, $src3",
13465             (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV;
13467     defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13468             (ins _.RC:$src2, _.ScalarMemOp:$src3),
13469             OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13470             (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV;
13471   }
13472 } // Constraints = "@earlyclobber $dst, $src1 = $dst"
13474 multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13475                                  X86VectorVTInfo _> {
13476   let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13477   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13478           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13479           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13480           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13481           EVEX, VVVV, EVEX_B, EVEX_RC;
13485 multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13486   let Predicates = [HasFP16] in {
13487     defm Z    : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13488                 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13489                       EVEX_V512, Sched<[WriteFMAZ]>;
13490   }
13491   let Predicates = [HasVLX, HasFP16] in {
13492     defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13493     defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13494   }
13497 multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13498                                  SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13499   let Predicates = [HasFP16] in {
13500     defm Z    : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13501                                  WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13502                 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13503                                        "", "@earlyclobber $dst">, EVEX_V512;
13504   }
13505   let Predicates = [HasVLX, HasFP16] in {
13506     defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13507                                  WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13508     defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13509                                  WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13510   }
13514 let Uses = [MXCSR] in {
13515   defm VFMADDCPH  : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13516                                     T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13517   defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13518                                     T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13520   defm VFMULCPH  : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13521                                          x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13522   defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13523                                          x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13527 multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13528                                    bit IsCommutable> {
13529   let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13530     defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13531                         (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13532                         "$src3, $src2", "$src2, $src3",
13533                         (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13534                         Sched<[WriteFMAX]>;
13535     defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13536                         (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13537                         "$src3, $src2", "$src2, $src3",
13538                         (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13539                         Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13540     defm rb : AVX512_maskable_3src<opc,  MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13541                         (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13542                         "$rc, $src3, $src2", "$src2, $src3, $rc",
13543                         (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13544                         EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13545   }
13548 multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13549                                      SDNode OpNodeRnd, bit IsCommutable> {
13550   let Predicates = [HasFP16] in {
13551     defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13552                         (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13553                         "$src2, $src1", "$src1, $src2",
13554                         (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13555                         IsCommutable, IsCommutable, IsCommutable,
13556                         X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13557     defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13558                         (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13559                         "$src2, $src1", "$src1, $src2",
13560                         (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13561                         0, 0, 0, X86selects, "@earlyclobber $dst">,
13562                         Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13563     defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13564                         (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13565                         "$rc, $src2, $src1", "$src1, $src2, $rc",
13566                         (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13567                         0, 0, 0, X86selects, "@earlyclobber $dst">,
13568                         EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13569   }
13572 let Uses = [MXCSR] in {
13573   defm VFMADDCSHZ  : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13574                                     T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13575   defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13576                                     T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13578   defm VFMULCSHZ  : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13579                                     T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
13580   defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13581                                     T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;