[llvm-shlib] Fix the version naming style of libLLVM for Windows (#85710)
[llvm-project.git] / llvm / lib / Target / X86 / X86InstrAVX512.td
blobbb5e22c714279395adfd1a0aefd111ac030b19c0
1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // This multiclass generates the masking variants from the non-masking
16 // variant.  It only provides the assembly pieces for the masking variants.
17 // It assumes custom ISel patterns for masking which can be provided as
18 // template arguments.
19 multiclass AVX512_maskable_custom<bits<8> O, Format F,
20                                   dag Outs,
21                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
22                                   string OpcodeStr,
23                                   string AttSrcAsm, string IntelSrcAsm,
24                                   list<dag> Pattern,
25                                   list<dag> MaskingPattern,
26                                   list<dag> ZeroMaskingPattern,
27                                   string MaskingConstraint = "",
28                                   bit IsCommutable = 0,
29                                   bit IsKCommutable = 0,
30                                   bit IsKZCommutable = IsCommutable,
31                                   string ClobberConstraint = ""> {
32   let isCommutable = IsCommutable, Constraints = ClobberConstraint in
33     def NAME: AVX512<O, F, Outs, Ins,
34                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
35                                      "$dst, "#IntelSrcAsm#"}",
36                        Pattern>;
38   // Prefer over VMOV*rrk Pat<>
39   let isCommutable = IsKCommutable in
40     def NAME#k: AVX512<O, F, Outs, MaskingIns,
41                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
42                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
43                        MaskingPattern>,
44               EVEX_K {
45       // In case of the 3src subclass this is overridden with a let.
46       string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
47                                !if(!eq(MaskingConstraint, ""), ClobberConstraint,
48                                    !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
49     }
51   // Zero mask does not add any restrictions to commute operands transformation.
52   // So, it is Ok to use IsCommutable instead of IsKCommutable.
53   let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
54       Constraints = ClobberConstraint in
55     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
56                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
57                                      "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
58                        ZeroMaskingPattern>,
59               EVEX_KZ;
63 // Common base class of AVX512_maskable and AVX512_maskable_3src.
64 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
65                                   dag Outs,
66                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
67                                   string OpcodeStr,
68                                   string AttSrcAsm, string IntelSrcAsm,
69                                   dag RHS, dag MaskingRHS,
70                                   SDPatternOperator Select = vselect_mask,
71                                   string MaskingConstraint = "",
72                                   bit IsCommutable = 0,
73                                   bit IsKCommutable = 0,
74                                   bit IsKZCommutable = IsCommutable,
75                                   string ClobberConstraint = ""> :
76   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
77                          AttSrcAsm, IntelSrcAsm,
78                          [(set _.RC:$dst, RHS)],
79                          [(set _.RC:$dst, MaskingRHS)],
80                          [(set _.RC:$dst,
81                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
82                          MaskingConstraint, IsCommutable,
83                          IsKCommutable, IsKZCommutable, ClobberConstraint>;
85 // This multiclass generates the unconditional/non-masking, the masking and
86 // the zero-masking variant of the vector instruction.  In the masking case, the
87 // preserved vector elements come from a new dummy input operand tied to $dst.
88 // This version uses a separate dag for non-masking and masking.
89 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
90                            dag Outs, dag Ins, string OpcodeStr,
91                            string AttSrcAsm, string IntelSrcAsm,
92                            dag RHS, dag MaskRHS,
93                            string ClobberConstraint = "",
94                            bit IsCommutable = 0, bit IsKCommutable = 0,
95                            bit IsKZCommutable = IsCommutable> :
96    AVX512_maskable_custom<O, F, Outs, Ins,
97                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
98                           !con((ins _.KRCWM:$mask), Ins),
99                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
100                           [(set _.RC:$dst, RHS)],
101                           [(set _.RC:$dst,
102                               (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
103                           [(set _.RC:$dst,
104                               (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
105                           "$src0 = $dst", IsCommutable, IsKCommutable,
106                           IsKZCommutable, ClobberConstraint>;
108 // This multiclass generates the unconditional/non-masking, the masking and
109 // the zero-masking variant of the vector instruction.  In the masking case, the
110 // preserved vector elements come from a new dummy input operand tied to $dst.
111 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
112                            dag Outs, dag Ins, string OpcodeStr,
113                            string AttSrcAsm, string IntelSrcAsm,
114                            dag RHS,
115                            bit IsCommutable = 0, bit IsKCommutable = 0,
116                            bit IsKZCommutable = IsCommutable,
117                            SDPatternOperator Select = vselect_mask,
118                            string ClobberConstraint = ""> :
119    AVX512_maskable_common<O, F, _, Outs, Ins,
120                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
121                           !con((ins _.KRCWM:$mask), Ins),
122                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
123                           (Select _.KRCWM:$mask, RHS, _.RC:$src0),
124                           Select, "$src0 = $dst", IsCommutable, IsKCommutable,
125                           IsKZCommutable, ClobberConstraint>;
127 // This multiclass generates the unconditional/non-masking, the masking and
128 // the zero-masking variant of the scalar instruction.
129 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
130                            dag Outs, dag Ins, string OpcodeStr,
131                            string AttSrcAsm, string IntelSrcAsm,
132                            dag RHS> :
133    AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
134                    RHS, 0, 0, 0, X86selects_mask>;
136 // Similar to AVX512_maskable but in this case one of the source operands
137 // ($src1) is already tied to $dst so we just use that for the preserved
138 // vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
139 // $src1.
140 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
141                                 dag Outs, dag NonTiedIns, string OpcodeStr,
142                                 string AttSrcAsm, string IntelSrcAsm,
143                                 dag RHS,
144                                 bit IsCommutable = 0,
145                                 bit IsKCommutable = 0,
146                                 SDPatternOperator Select = vselect_mask,
147                                 bit MaskOnly = 0> :
148    AVX512_maskable_common<O, F, _, Outs,
149                           !con((ins _.RC:$src1), NonTiedIns),
150                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
151                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
152                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
153                           !if(MaskOnly, (null_frag), RHS),
154                           (Select _.KRCWM:$mask, RHS, _.RC:$src1),
155                           Select, "", IsCommutable, IsKCommutable>;
157 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
158 // operand differs from the output VT. This requires a bitconvert on
159 // the preserved vector going into the vselect.
160 // NOTE: The unmasked pattern is disabled.
161 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
162                                      X86VectorVTInfo InVT,
163                                      dag Outs, dag NonTiedIns, string OpcodeStr,
164                                      string AttSrcAsm, string IntelSrcAsm,
165                                      dag RHS, bit IsCommutable = 0> :
166    AVX512_maskable_common<O, F, OutVT, Outs,
167                           !con((ins InVT.RC:$src1), NonTiedIns),
168                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
169                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
170                           OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
171                           (vselect_mask InVT.KRCWM:$mask, RHS,
172                            (bitconvert InVT.RC:$src1)),
173                            vselect_mask, "", IsCommutable>;
175 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
176                                      dag Outs, dag NonTiedIns, string OpcodeStr,
177                                      string AttSrcAsm, string IntelSrcAsm,
178                                      dag RHS,
179                                      bit IsCommutable = 0,
180                                      bit IsKCommutable = 0,
181                                      bit MaskOnly = 0> :
182    AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
183                         IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
184                         X86selects_mask, MaskOnly>;
186 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
187                                   dag Outs, dag Ins,
188                                   string OpcodeStr,
189                                   string AttSrcAsm, string IntelSrcAsm,
190                                   list<dag> Pattern> :
191    AVX512_maskable_custom<O, F, Outs, Ins,
192                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
193                           !con((ins _.KRCWM:$mask), Ins),
194                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
195                           "$src0 = $dst">;
197 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
198                                        dag Outs, dag NonTiedIns,
199                                        string OpcodeStr,
200                                        string AttSrcAsm, string IntelSrcAsm,
201                                        list<dag> Pattern> :
202    AVX512_maskable_custom<O, F, Outs,
203                           !con((ins _.RC:$src1), NonTiedIns),
204                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
205                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
206                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
207                           "">;
209 // Instruction with mask that puts result in mask register,
210 // like "compare" and "vptest"
211 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
212                                   dag Outs,
213                                   dag Ins, dag MaskingIns,
214                                   string OpcodeStr,
215                                   string AttSrcAsm, string IntelSrcAsm,
216                                   list<dag> Pattern,
217                                   list<dag> MaskingPattern,
218                                   bit IsCommutable = 0> {
219     let isCommutable = IsCommutable in {
220     def NAME: AVX512<O, F, Outs, Ins,
221                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
222                                      "$dst, "#IntelSrcAsm#"}",
223                        Pattern>;
225     def NAME#k: AVX512<O, F, Outs, MaskingIns,
226                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
227                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
228                        MaskingPattern>, EVEX_K;
229     }
232 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
233                                   dag Outs,
234                                   dag Ins, dag MaskingIns,
235                                   string OpcodeStr,
236                                   string AttSrcAsm, string IntelSrcAsm,
237                                   dag RHS, dag MaskingRHS,
238                                   bit IsCommutable = 0> :
239   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
240                          AttSrcAsm, IntelSrcAsm,
241                          [(set _.KRC:$dst, RHS)],
242                          [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
244 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
245                            dag Outs, dag Ins, string OpcodeStr,
246                            string AttSrcAsm, string IntelSrcAsm,
247                            dag RHS, dag RHS_su, bit IsCommutable = 0> :
248    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
249                           !con((ins _.KRCWM:$mask), Ins),
250                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
251                           (and _.KRCWM:$mask, RHS_su), IsCommutable>;
253 // Used by conversion instructions.
254 multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
255                                   dag Outs,
256                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
257                                   string OpcodeStr,
258                                   string AttSrcAsm, string IntelSrcAsm,
259                                   dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
260   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261                          AttSrcAsm, IntelSrcAsm,
262                          [(set _.RC:$dst, RHS)],
263                          [(set _.RC:$dst, MaskingRHS)],
264                          [(set _.RC:$dst, ZeroMaskingRHS)],
265                          "$src0 = $dst">;
267 multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
268                                dag Outs, dag NonTiedIns, string OpcodeStr,
269                                string AttSrcAsm, string IntelSrcAsm,
270                                dag RHS, dag MaskingRHS, bit IsCommutable,
271                                bit IsKCommutable> :
272    AVX512_maskable_custom<O, F, Outs,
273                           !con((ins _.RC:$src1), NonTiedIns),
274                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
275                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
276                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
277                           [(set _.RC:$dst, RHS)],
278                           [(set _.RC:$dst,
279                             (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
280                           [(set _.RC:$dst,
281                             (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
282                           "", IsCommutable, IsKCommutable>;
284 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
285 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
286 // swizzled by ExecutionDomainFix to pxor.
287 // We set canFoldAsLoad because this can be converted to a constant-pool
288 // load of an all-zeros value if folding it would be beneficial.
289 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
290     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
291 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
292                [(set VR512:$dst, (v16i32 immAllZerosV))]>;
293 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
294                [(set VR512:$dst, (v16i32 immAllOnesV))]>;
297 let Predicates = [HasAVX512] in {
298 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
299 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
300 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
301 def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
302 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
303 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
306 // Alias instructions that allow VPTERNLOG to be used with a mask to create
307 // a mix of all ones and all zeros elements. This is done this way to force
308 // the same register to be used as input for all three sources.
309 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
310 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
311                                 (ins VK16WM:$mask), "",
312                            [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
313                                                       (v16i32 immAllOnesV),
314                                                       (v16i32 immAllZerosV)))]>;
315 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
316                                 (ins VK8WM:$mask), "",
317                 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
318                                            (v8i64 immAllOnesV),
319                                            (v8i64 immAllZerosV)))]>;
322 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
323     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
324 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
325                [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
326 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
327                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
330 let Predicates = [HasAVX512] in {
331 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
332 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
333 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
334 def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
335 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
336 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
337 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
338 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
339 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
340 def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
341 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
342 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
345 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
346 // This is expanded by ExpandPostRAPseudos.
347 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
348     isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
349   def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
350                           [(set FR16X:$dst, fp16imm0)]>;
351   def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
352                           [(set FR32X:$dst, fp32imm0)]>;
353   def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
354                           [(set FR64X:$dst, fp64imm0)]>;
355   def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
356                             [(set VR128X:$dst, fp128imm0)]>;
359 //===----------------------------------------------------------------------===//
360 // AVX-512 - VECTOR INSERT
363 // Supports two different pattern operators for mask and unmasked ops. Allows
364 // null_frag to be passed for one.
365 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
366                                   X86VectorVTInfo To,
367                                   SDPatternOperator vinsert_insert,
368                                   SDPatternOperator vinsert_for_mask,
369                                   X86FoldableSchedWrite sched> {
370   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
371     defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
372                    (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
373                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
374                    "$src3, $src2, $src1", "$src1, $src2, $src3",
375                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
376                                          (From.VT From.RC:$src2),
377                                          (iPTR imm)),
378                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
379                                            (From.VT From.RC:$src2),
380                                            (iPTR imm))>,
381                    AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
382     let mayLoad = 1 in
383     defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
384                    (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
385                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
386                    "$src3, $src2, $src1", "$src1, $src2, $src3",
387                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
388                                (From.VT (From.LdFrag addr:$src2)),
389                                (iPTR imm)),
390                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
391                                (From.VT (From.LdFrag addr:$src2)),
392                                (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV,
393                    EVEX_CD8<From.EltSize, From.CD8TupleForm>,
394                    Sched<[sched.Folded, sched.ReadAfterFold]>;
395   }
398 // Passes the same pattern operator for masked and unmasked ops.
399 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
400                             X86VectorVTInfo To,
401                             SDPatternOperator vinsert_insert,
402                             X86FoldableSchedWrite sched> :
403   vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
405 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
406                        X86VectorVTInfo To, PatFrag vinsert_insert,
407                        SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
408   let Predicates = p in {
409     def : Pat<(vinsert_insert:$ins
410                      (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
411               (To.VT (!cast<Instruction>(InstrStr#"rr")
412                      To.RC:$src1, From.RC:$src2,
413                      (INSERT_get_vinsert_imm To.RC:$ins)))>;
415     def : Pat<(vinsert_insert:$ins
416                   (To.VT To.RC:$src1),
417                   (From.VT (From.LdFrag addr:$src2)),
418                   (iPTR imm)),
419               (To.VT (!cast<Instruction>(InstrStr#"rm")
420                   To.RC:$src1, addr:$src2,
421                   (INSERT_get_vinsert_imm To.RC:$ins)))>;
422   }
425 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
426                             ValueType EltVT64, int Opcode256,
427                             X86FoldableSchedWrite sched> {
429   let Predicates = [HasVLX] in
430     defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
431                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
432                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
433                                  vinsert128_insert, sched>, EVEX_V256;
435   defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
436                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
437                                  X86VectorVTInfo<16, EltVT32, VR512>,
438                                  vinsert128_insert, sched>, EVEX_V512;
440   defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
441                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
442                                  X86VectorVTInfo< 8, EltVT64, VR512>,
443                                  vinsert256_insert, sched>, REX_W, EVEX_V512;
445   // Even with DQI we'd like to only use these instructions for masking.
446   let Predicates = [HasVLX, HasDQI] in
447     defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
448                                    X86VectorVTInfo< 2, EltVT64, VR128X>,
449                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
450                                    null_frag, vinsert128_insert, sched>,
451                                    EVEX_V256, REX_W;
453   // Even with DQI we'd like to only use these instructions for masking.
454   let Predicates = [HasDQI] in {
455     defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
456                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
457                                  X86VectorVTInfo< 8, EltVT64, VR512>,
458                                  null_frag, vinsert128_insert, sched>,
459                                  REX_W, EVEX_V512;
461     defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
462                                    X86VectorVTInfo< 8, EltVT32, VR256X>,
463                                    X86VectorVTInfo<16, EltVT32, VR512>,
464                                    null_frag, vinsert256_insert, sched>,
465                                    EVEX_V512;
466   }
469 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
470 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
471 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
473 // Codegen pattern with the alternative types,
474 // Even with AVX512DQ we'll still use these for unmasked operations.
475 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
476               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
477 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
478               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
480 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
481               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
482 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
483               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
485 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
486               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
487 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
488               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
490 // Codegen pattern with the alternative types insert VEC128 into VEC256
491 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
492               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
493 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
494               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
495 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
496               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
497 // Codegen pattern with the alternative types insert VEC128 into VEC512
498 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
499               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
500 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
501                vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
502 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
503               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
504 // Codegen pattern with the alternative types insert VEC256 into VEC512
505 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
506               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
507 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
508               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
509 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
510               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
513 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
514                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
515                                  PatFrag vinsert_insert,
516                                  SDNodeXForm INSERT_get_vinsert_imm,
517                                  list<Predicate> p> {
518 let Predicates = p in {
519   def : Pat<(Cast.VT
520              (vselect_mask Cast.KRCWM:$mask,
521                            (bitconvert
522                             (vinsert_insert:$ins (To.VT To.RC:$src1),
523                                                  (From.VT From.RC:$src2),
524                                                  (iPTR imm))),
525                            Cast.RC:$src0)),
526             (!cast<Instruction>(InstrStr#"rrk")
527              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
528              (INSERT_get_vinsert_imm To.RC:$ins))>;
529   def : Pat<(Cast.VT
530              (vselect_mask Cast.KRCWM:$mask,
531                            (bitconvert
532                             (vinsert_insert:$ins (To.VT To.RC:$src1),
533                                                  (From.VT
534                                                   (bitconvert
535                                                    (From.LdFrag addr:$src2))),
536                                                  (iPTR imm))),
537                            Cast.RC:$src0)),
538             (!cast<Instruction>(InstrStr#"rmk")
539              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
540              (INSERT_get_vinsert_imm To.RC:$ins))>;
542   def : Pat<(Cast.VT
543              (vselect_mask Cast.KRCWM:$mask,
544                            (bitconvert
545                             (vinsert_insert:$ins (To.VT To.RC:$src1),
546                                                  (From.VT From.RC:$src2),
547                                                  (iPTR imm))),
548                            Cast.ImmAllZerosV)),
549             (!cast<Instruction>(InstrStr#"rrkz")
550              Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
551              (INSERT_get_vinsert_imm To.RC:$ins))>;
552   def : Pat<(Cast.VT
553              (vselect_mask Cast.KRCWM:$mask,
554                            (bitconvert
555                             (vinsert_insert:$ins (To.VT To.RC:$src1),
556                                                  (From.VT (From.LdFrag addr:$src2)),
557                                                  (iPTR imm))),
558                            Cast.ImmAllZerosV)),
559             (!cast<Instruction>(InstrStr#"rmkz")
560              Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
561              (INSERT_get_vinsert_imm To.RC:$ins))>;
565 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
566                              v8f32x_info, vinsert128_insert,
567                              INSERT_get_vinsert128_imm, [HasVLX]>;
568 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
569                              v4f64x_info, vinsert128_insert,
570                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
572 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
573                              v8i32x_info, vinsert128_insert,
574                              INSERT_get_vinsert128_imm, [HasVLX]>;
575 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
576                              v8i32x_info, vinsert128_insert,
577                              INSERT_get_vinsert128_imm, [HasVLX]>;
578 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
579                              v8i32x_info, vinsert128_insert,
580                              INSERT_get_vinsert128_imm, [HasVLX]>;
581 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
582                              v4i64x_info, vinsert128_insert,
583                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
584 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
585                              v4i64x_info, vinsert128_insert,
586                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
587 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
588                              v4i64x_info, vinsert128_insert,
589                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
591 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
592                              v16f32_info, vinsert128_insert,
593                              INSERT_get_vinsert128_imm, [HasAVX512]>;
594 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
595                              v8f64_info, vinsert128_insert,
596                              INSERT_get_vinsert128_imm, [HasDQI]>;
598 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
599                              v16i32_info, vinsert128_insert,
600                              INSERT_get_vinsert128_imm, [HasAVX512]>;
601 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
602                              v16i32_info, vinsert128_insert,
603                              INSERT_get_vinsert128_imm, [HasAVX512]>;
604 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
605                              v16i32_info, vinsert128_insert,
606                              INSERT_get_vinsert128_imm, [HasAVX512]>;
607 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
608                              v8i64_info, vinsert128_insert,
609                              INSERT_get_vinsert128_imm, [HasDQI]>;
610 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
611                              v8i64_info, vinsert128_insert,
612                              INSERT_get_vinsert128_imm, [HasDQI]>;
613 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
614                              v8i64_info, vinsert128_insert,
615                              INSERT_get_vinsert128_imm, [HasDQI]>;
617 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
618                              v16f32_info, vinsert256_insert,
619                              INSERT_get_vinsert256_imm, [HasDQI]>;
620 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
621                              v8f64_info, vinsert256_insert,
622                              INSERT_get_vinsert256_imm, [HasAVX512]>;
624 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
625                              v16i32_info, vinsert256_insert,
626                              INSERT_get_vinsert256_imm, [HasDQI]>;
627 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
628                              v16i32_info, vinsert256_insert,
629                              INSERT_get_vinsert256_imm, [HasDQI]>;
630 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
631                              v16i32_info, vinsert256_insert,
632                              INSERT_get_vinsert256_imm, [HasDQI]>;
633 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
634                              v8i64_info, vinsert256_insert,
635                              INSERT_get_vinsert256_imm, [HasAVX512]>;
636 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
637                              v8i64_info, vinsert256_insert,
638                              INSERT_get_vinsert256_imm, [HasAVX512]>;
639 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
640                              v8i64_info, vinsert256_insert,
641                              INSERT_get_vinsert256_imm, [HasAVX512]>;
643 // vinsertps - insert f32 to XMM
644 let ExeDomain = SSEPackedSingle in {
645 let isCommutable = 1 in
646 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
647       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
648       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
649       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
650       EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
651 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
652       (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
653       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
654       [(set VR128X:$dst, (X86insertps VR128X:$src1,
655                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
656                           timm:$src3))]>,
657       EVEX, VVVV, EVEX_CD8<32, CD8VT1>,
658       Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
661 //===----------------------------------------------------------------------===//
662 // AVX-512 VECTOR EXTRACT
663 //---
665 // Supports two different pattern operators for mask and unmasked ops. Allows
666 // null_frag to be passed for one.
667 multiclass vextract_for_size_split<int Opcode,
668                                    X86VectorVTInfo From, X86VectorVTInfo To,
669                                    SDPatternOperator vextract_extract,
670                                    SDPatternOperator vextract_for_mask,
671                                    SchedWrite SchedRR, SchedWrite SchedMR> {
673   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
674     defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
675                 (ins From.RC:$src1, u8imm:$idx),
676                 "vextract" # To.EltTypeName # "x" # To.NumElts,
677                 "$idx, $src1", "$src1, $idx",
678                 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
679                 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
680                 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
682     def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
683                     (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
684                     "vextract" # To.EltTypeName # "x" # To.NumElts #
685                         "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
686                     [(store (To.VT (vextract_extract:$idx
687                                     (From.VT From.RC:$src1), (iPTR imm))),
688                              addr:$dst)]>, EVEX,
689                     Sched<[SchedMR]>;
691     let mayStore = 1, hasSideEffects = 0 in
692     def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
693                     (ins To.MemOp:$dst, To.KRCWM:$mask,
694                                         From.RC:$src1, u8imm:$idx),
695                      "vextract" # To.EltTypeName # "x" # To.NumElts #
696                           "\t{$idx, $src1, $dst {${mask}}|"
697                           "$dst {${mask}}, $src1, $idx}", []>,
698                     EVEX_K, EVEX, Sched<[SchedMR]>;
699   }
702 // Passes the same pattern operator for masked and unmasked ops.
703 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
704                              X86VectorVTInfo To,
705                              SDPatternOperator vextract_extract,
706                              SchedWrite SchedRR, SchedWrite SchedMR> :
707   vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
709 // Codegen pattern for the alternative types
710 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
711                 X86VectorVTInfo To, PatFrag vextract_extract,
712                 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
713   let Predicates = p in {
714      def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
715                (To.VT (!cast<Instruction>(InstrStr#"rr")
716                           From.RC:$src1,
717                           (EXTRACT_get_vextract_imm To.RC:$ext)))>;
718      def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
719                               (iPTR imm))), addr:$dst),
720                (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
721                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
722   }
725 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
726                              ValueType EltVT64, int Opcode256,
727                              SchedWrite SchedRR, SchedWrite SchedMR> {
728   let Predicates = [HasAVX512] in {
729     defm NAME # "32x4Z" : vextract_for_size<Opcode128,
730                                    X86VectorVTInfo<16, EltVT32, VR512>,
731                                    X86VectorVTInfo< 4, EltVT32, VR128X>,
732                                    vextract128_extract, SchedRR, SchedMR>,
733                                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
734     defm NAME # "64x4Z" : vextract_for_size<Opcode256,
735                                    X86VectorVTInfo< 8, EltVT64, VR512>,
736                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
737                                    vextract256_extract, SchedRR, SchedMR>,
738                                        REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
739   }
740   let Predicates = [HasVLX] in
741     defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
742                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
743                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
744                                  vextract128_extract, SchedRR, SchedMR>,
745                                      EVEX_V256, EVEX_CD8<32, CD8VT4>;
747   // Even with DQI we'd like to only use these instructions for masking.
748   let Predicates = [HasVLX, HasDQI] in
749     defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
750                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
751                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
752                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
753                                     EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
755   // Even with DQI we'd like to only use these instructions for masking.
756   let Predicates = [HasDQI] in {
757     defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
758                                  X86VectorVTInfo< 8, EltVT64, VR512>,
759                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
760                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
761                                      REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
762     defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
763                                  X86VectorVTInfo<16, EltVT32, VR512>,
764                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
765                                  null_frag, vextract256_extract, SchedRR, SchedMR>,
766                                      EVEX_V512, EVEX_CD8<32, CD8VT8>;
767   }
770 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
771 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
772 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
774 // extract_subvector codegen patterns with the alternative types.
775 // Even with AVX512DQ we'll still use these for unmasked operations.
776 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
777           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
778 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
779           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
781 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
782           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
783 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
784           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
786 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
787           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
788 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
789           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
791 // Codegen pattern with the alternative types extract VEC128 from VEC256
792 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
793           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
794 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
795           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
796 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
797           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
799 // Codegen pattern with the alternative types extract VEC128 from VEC512
800 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
801                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
802 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
803                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
804 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
805                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
806 // Codegen pattern with the alternative types extract VEC256 from VEC512
807 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
808                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
809 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
810                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
811 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
812                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
815 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
816 // smaller extract to enable EVEX->VEX.
817 let Predicates = [NoVLX] in {
818 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
819           (v2i64 (VEXTRACTI128rr
820                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
821                   (iPTR 1)))>;
822 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
823           (v2f64 (VEXTRACTF128rr
824                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
825                   (iPTR 1)))>;
826 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
827           (v4i32 (VEXTRACTI128rr
828                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
829                   (iPTR 1)))>;
830 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
831           (v4f32 (VEXTRACTF128rr
832                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
833                   (iPTR 1)))>;
834 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
835           (v8i16 (VEXTRACTI128rr
836                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
837                   (iPTR 1)))>;
838 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
839           (v8f16 (VEXTRACTF128rr
840                   (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
841                   (iPTR 1)))>;
842 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
843           (v16i8 (VEXTRACTI128rr
844                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
845                   (iPTR 1)))>;
848 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
849 // smaller extract to enable EVEX->VEX.
850 let Predicates = [HasVLX] in {
851 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
852           (v2i64 (VEXTRACTI32x4Z256rr
853                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
854                   (iPTR 1)))>;
855 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
856           (v2f64 (VEXTRACTF32x4Z256rr
857                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
858                   (iPTR 1)))>;
859 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
860           (v4i32 (VEXTRACTI32x4Z256rr
861                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
862                   (iPTR 1)))>;
863 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
864           (v4f32 (VEXTRACTF32x4Z256rr
865                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
866                   (iPTR 1)))>;
867 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
868           (v8i16 (VEXTRACTI32x4Z256rr
869                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
870                   (iPTR 1)))>;
871 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
872           (v8f16 (VEXTRACTF32x4Z256rr
873                   (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
874                   (iPTR 1)))>;
875 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
876           (v16i8 (VEXTRACTI32x4Z256rr
877                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
878                   (iPTR 1)))>;
882 // Additional patterns for handling a bitcast between the vselect and the
883 // extract_subvector.
884 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
885                                   X86VectorVTInfo To, X86VectorVTInfo Cast,
886                                   PatFrag vextract_extract,
887                                   SDNodeXForm EXTRACT_get_vextract_imm,
888                                   list<Predicate> p> {
889 let Predicates = p in {
890   def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
891                                    (bitconvert
892                                     (To.VT (vextract_extract:$ext
893                                             (From.VT From.RC:$src), (iPTR imm)))),
894                                    To.RC:$src0)),
895             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
896                       Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
897                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
899   def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
900                                    (bitconvert
901                                     (To.VT (vextract_extract:$ext
902                                             (From.VT From.RC:$src), (iPTR imm)))),
903                                    Cast.ImmAllZerosV)),
904             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
905                       Cast.KRCWM:$mask, From.RC:$src,
906                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
910 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
911                               v4f32x_info, vextract128_extract,
912                               EXTRACT_get_vextract128_imm, [HasVLX]>;
913 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
914                               v2f64x_info, vextract128_extract,
915                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
917 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
918                               v4i32x_info, vextract128_extract,
919                               EXTRACT_get_vextract128_imm, [HasVLX]>;
920 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
921                               v4i32x_info, vextract128_extract,
922                               EXTRACT_get_vextract128_imm, [HasVLX]>;
923 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
924                               v4i32x_info, vextract128_extract,
925                               EXTRACT_get_vextract128_imm, [HasVLX]>;
926 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
927                               v2i64x_info, vextract128_extract,
928                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
929 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
930                               v2i64x_info, vextract128_extract,
931                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
932 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
933                               v2i64x_info, vextract128_extract,
934                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
936 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
937                               v4f32x_info, vextract128_extract,
938                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
939 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
940                               v2f64x_info, vextract128_extract,
941                               EXTRACT_get_vextract128_imm, [HasDQI]>;
943 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
944                               v4i32x_info, vextract128_extract,
945                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
946 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
947                               v4i32x_info, vextract128_extract,
948                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
949 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
950                               v4i32x_info, vextract128_extract,
951                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
952 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
953                               v2i64x_info, vextract128_extract,
954                               EXTRACT_get_vextract128_imm, [HasDQI]>;
955 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
956                               v2i64x_info, vextract128_extract,
957                               EXTRACT_get_vextract128_imm, [HasDQI]>;
958 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
959                               v2i64x_info, vextract128_extract,
960                               EXTRACT_get_vextract128_imm, [HasDQI]>;
962 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
963                               v8f32x_info, vextract256_extract,
964                               EXTRACT_get_vextract256_imm, [HasDQI]>;
965 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
966                               v4f64x_info, vextract256_extract,
967                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
969 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
970                               v8i32x_info, vextract256_extract,
971                               EXTRACT_get_vextract256_imm, [HasDQI]>;
972 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
973                               v8i32x_info, vextract256_extract,
974                               EXTRACT_get_vextract256_imm, [HasDQI]>;
975 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
976                               v8i32x_info, vextract256_extract,
977                               EXTRACT_get_vextract256_imm, [HasDQI]>;
978 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
979                               v4i64x_info, vextract256_extract,
980                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
981 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
982                               v4i64x_info, vextract256_extract,
983                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
984 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
985                               v4i64x_info, vextract256_extract,
986                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
988 // vextractps - extract 32 bits from XMM
989 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
990       (ins VR128X:$src1, u8imm:$src2),
991       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
992       [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
993       EVEX, WIG, Sched<[WriteVecExtract]>;
995 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
996       (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
997       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
998       [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
999                           addr:$dst)]>,
1000       EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1002 //===---------------------------------------------------------------------===//
1003 // AVX-512 BROADCAST
1004 //---
1005 // broadcast with a scalar argument.
1006 multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1007                                    X86VectorVTInfo SrcInfo> {
1008   def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1009             (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1010              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1011   def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1012                                        (X86VBroadcast SrcInfo.FRC:$src),
1013                                        DestInfo.RC:$src0)),
1014             (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1015              DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1016              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1017   def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1018                                        (X86VBroadcast SrcInfo.FRC:$src),
1019                                        DestInfo.ImmAllZerosV)),
1020             (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1021              DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1024 // Split version to allow mask and broadcast node to be different types. This
1025 // helps support the 32x2 broadcasts.
1026 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1027                                      SchedWrite SchedRR, SchedWrite SchedRM,
1028                                      X86VectorVTInfo MaskInfo,
1029                                      X86VectorVTInfo DestInfo,
1030                                      X86VectorVTInfo SrcInfo,
1031                                      bit IsConvertibleToThreeAddress,
1032                                      SDPatternOperator UnmaskedOp = X86VBroadcast,
1033                                      SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1034   let hasSideEffects = 0 in
1035   def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1036                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1037                     [(set MaskInfo.RC:$dst,
1038                       (MaskInfo.VT
1039                        (bitconvert
1040                         (DestInfo.VT
1041                          (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1042                     DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>;
1043   def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1044                       (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1045                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1046                        "${dst} {${mask}} {z}, $src}"),
1047                        [(set MaskInfo.RC:$dst,
1048                          (vselect_mask MaskInfo.KRCWM:$mask,
1049                           (MaskInfo.VT
1050                            (bitconvert
1051                             (DestInfo.VT
1052                              (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1053                           MaskInfo.ImmAllZerosV))],
1054                        DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1055   let Constraints = "$src0 = $dst" in
1056   def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1057                      (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1058                           SrcInfo.RC:$src),
1059                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1060                      "${dst} {${mask}}, $src}"),
1061                      [(set MaskInfo.RC:$dst,
1062                        (vselect_mask MaskInfo.KRCWM:$mask,
1063                         (MaskInfo.VT
1064                          (bitconvert
1065                           (DestInfo.VT
1066                            (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1067                         MaskInfo.RC:$src0))],
1068                       DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1070   let hasSideEffects = 0, mayLoad = 1 in
1071   def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1072                     (ins SrcInfo.ScalarMemOp:$src),
1073                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1074                     [(set MaskInfo.RC:$dst,
1075                       (MaskInfo.VT
1076                        (bitconvert
1077                         (DestInfo.VT
1078                          (UnmaskedBcastOp addr:$src)))))],
1079                     DestInfo.ExeDomain>, T8, PD, EVEX,
1080                     EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1082   def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1083                       (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1084                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1085                        "${dst} {${mask}} {z}, $src}"),
1086                        [(set MaskInfo.RC:$dst,
1087                          (vselect_mask MaskInfo.KRCWM:$mask,
1088                           (MaskInfo.VT
1089                            (bitconvert
1090                             (DestInfo.VT
1091                              (SrcInfo.BroadcastLdFrag addr:$src)))),
1092                           MaskInfo.ImmAllZerosV))],
1093                        DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ,
1094                        EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1096   let Constraints = "$src0 = $dst",
1097       isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1098   def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1099                      (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1100                           SrcInfo.ScalarMemOp:$src),
1101                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1102                      "${dst} {${mask}}, $src}"),
1103                      [(set MaskInfo.RC:$dst,
1104                        (vselect_mask MaskInfo.KRCWM:$mask,
1105                         (MaskInfo.VT
1106                          (bitconvert
1107                           (DestInfo.VT
1108                            (SrcInfo.BroadcastLdFrag addr:$src)))),
1109                         MaskInfo.RC:$src0))],
1110                       DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K,
1111                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1114 // Helper class to force mask and broadcast result to same type.
1115 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1116                                SchedWrite SchedRR, SchedWrite SchedRM,
1117                                X86VectorVTInfo DestInfo,
1118                                X86VectorVTInfo SrcInfo,
1119                                bit IsConvertibleToThreeAddress> :
1120   avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1121                             DestInfo, DestInfo, SrcInfo,
1122                             IsConvertibleToThreeAddress>;
1124 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1125                                   AVX512VLVectorVTInfo _> {
1126   let Predicates = [HasAVX512] in {
1127     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1128                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1129               avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1130               EVEX_V512;
1131   }
1133   let Predicates = [HasVLX] in {
1134     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1135                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1136                  avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1137                  EVEX_V256;
1138   }
1141 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1142                                   AVX512VLVectorVTInfo _> {
1143   let Predicates = [HasAVX512] in {
1144     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1145                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1146               avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1147               EVEX_V512;
1148   }
1150   let Predicates = [HasVLX] in {
1151     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1152                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1153                  avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1154                  EVEX_V256;
1155     defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1156                                      WriteFShuffle256Ld, _.info128, _.info128, 1>,
1157                  avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1158                  EVEX_V128;
1159   }
1161 defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1162                                        avx512vl_f32_info>;
1163 defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1164                                        avx512vl_f64_info>, REX_W;
1166 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1167                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1168                                     RegisterClass SrcRC> {
1169   // Fold with a mask even if it has multiple uses since it is cheap.
1170   let ExeDomain = _.ExeDomain in
1171   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1172                           (ins SrcRC:$src),
1173                           "vpbroadcast"#_.Suffix, "$src", "$src",
1174                           (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1175                           /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1176                           T8, PD, EVEX, Sched<[SchedRR]>;
1179 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1180                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1181                                     RegisterClass SrcRC, SubRegIndex Subreg> {
1182   let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1183   defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1184                          (outs _.RC:$dst), (ins GR32:$src),
1185                          !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1186                          !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1187                          "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1188                          "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>;
1190   def : Pat <(_.VT (OpNode SrcRC:$src)),
1191              (!cast<Instruction>(Name#rr)
1192               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1194   // Fold with a mask even if it has multiple uses since it is cheap.
1195   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1196              (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1197               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1199   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1200              (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1201               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1204 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1205                       AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1206                       RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1207   let Predicates = [prd] in
1208     defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1209               OpNode, SrcRC, Subreg>, EVEX_V512;
1210   let Predicates = [prd, HasVLX] in {
1211     defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1212               _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1213     defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1214               _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1215   }
1218 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1219                                        SDPatternOperator OpNode,
1220                                        RegisterClass SrcRC, Predicate prd> {
1221   let Predicates = [prd] in
1222     defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1223                                       SrcRC>, EVEX_V512;
1224   let Predicates = [prd, HasVLX] in {
1225     defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1226                                          SrcRC>, EVEX_V256;
1227     defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1228                                          SrcRC>, EVEX_V128;
1229   }
1232 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1233                        avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1234 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1235                        avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1236                        HasBWI>;
1237 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1238                                                  X86VBroadcast, GR32, HasAVX512>;
1239 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1240                                                  X86VBroadcast, GR64, HasAVX512>, REX_W;
1242 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1243                                       AVX512VLVectorVTInfo _, Predicate prd,
1244                                       bit IsConvertibleToThreeAddress> {
1245   let Predicates = [prd] in {
1246     defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1247                                    WriteShuffle256Ld, _.info512, _.info128,
1248                                    IsConvertibleToThreeAddress>,
1249                                   EVEX_V512;
1250   }
1251   let Predicates = [prd, HasVLX] in {
1252     defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1253                                     WriteShuffle256Ld, _.info256, _.info128,
1254                                     IsConvertibleToThreeAddress>,
1255                                  EVEX_V256;
1256     defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1257                                     WriteShuffleXLd, _.info128, _.info128,
1258                                     IsConvertibleToThreeAddress>,
1259                                  EVEX_V128;
1260   }
1263 defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1264                                            avx512vl_i8_info, HasBWI, 0>;
1265 defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1266                                            avx512vl_i16_info, HasBWI, 0>;
1267 defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1268                                            avx512vl_i32_info, HasAVX512, 1>;
1269 defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1270                                            avx512vl_i64_info, HasAVX512, 1>, REX_W;
1272 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1273                                       SDPatternOperator OpNode,
1274                                       X86VectorVTInfo _Dst,
1275                                       X86VectorVTInfo _Src> {
1276   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1277                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1278                            (_Dst.VT (OpNode addr:$src))>,
1279                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1280                            AVX5128IBase, EVEX;
1283 // This should be used for the AVX512DQ broadcast instructions. It disables
1284 // the unmasked patterns so that we only use the DQ instructions when masking
1285 //  is requested.
1286 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1287                                          SDPatternOperator OpNode,
1288                                          X86VectorVTInfo _Dst,
1289                                          X86VectorVTInfo _Src> {
1290   let hasSideEffects = 0, mayLoad = 1 in
1291   defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1292                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1293                            (null_frag),
1294                            (_Dst.VT (OpNode addr:$src))>,
1295                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1296                            AVX5128IBase, EVEX;
1298 let Predicates = [HasBWI] in {
1299   def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1300             (VPBROADCASTWZrm addr:$src)>;
1302   def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1303             (VPBROADCASTWZrr VR128X:$src)>;
1304   def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1305             (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1307 let Predicates = [HasVLX, HasBWI] in {
1308   def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1309             (VPBROADCASTWZ128rm addr:$src)>;
1310   def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1311             (VPBROADCASTWZ256rm addr:$src)>;
1313   def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1314             (VPBROADCASTWZ128rr VR128X:$src)>;
1315   def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1316             (VPBROADCASTWZ256rr VR128X:$src)>;
1318   def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1319             (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1320   def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1321             (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1324 //===----------------------------------------------------------------------===//
1325 // AVX-512 BROADCAST SUBVECTORS
1328 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1329                        X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1330                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1331 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1332                        X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1333                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1334 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1335                        X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
1336                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1337 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1338                        X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
1339                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1341 let Predicates = [HasAVX512] in {
1342 def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1343           (VBROADCASTF64X4rm addr:$src)>;
1344 def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1345           (VBROADCASTF64X4rm addr:$src)>;
1346 def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1347           (VBROADCASTF64X4rm addr:$src)>;
1348 def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1349           (VBROADCASTI64X4rm addr:$src)>;
1350 def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1351           (VBROADCASTI64X4rm addr:$src)>;
1352 def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1353           (VBROADCASTI64X4rm addr:$src)>;
1354 def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1355           (VBROADCASTI64X4rm addr:$src)>;
1357 def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1358           (VBROADCASTF32X4rm addr:$src)>;
1359 def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1360           (VBROADCASTF32X4rm addr:$src)>;
1361 def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1362           (VBROADCASTF32X4rm addr:$src)>;
1363 def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1364           (VBROADCASTI32X4rm addr:$src)>;
1365 def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1366           (VBROADCASTI32X4rm addr:$src)>;
1367 def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1368           (VBROADCASTI32X4rm addr:$src)>;
1369 def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1370           (VBROADCASTI32X4rm addr:$src)>;
1372 // Patterns for selects of bitcasted operations.
1373 def : Pat<(vselect_mask VK16WM:$mask,
1374                         (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1375                         (v16f32 immAllZerosV)),
1376           (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1377 def : Pat<(vselect_mask VK16WM:$mask,
1378                         (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1379                         VR512:$src0),
1380           (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1381 def : Pat<(vselect_mask VK16WM:$mask,
1382                         (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1383                         (v16i32 immAllZerosV)),
1384           (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1385 def : Pat<(vselect_mask VK16WM:$mask,
1386                         (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1387                         VR512:$src0),
1388           (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1390 def : Pat<(vselect_mask VK8WM:$mask,
1391                         (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1392                         (v8f64 immAllZerosV)),
1393           (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1394 def : Pat<(vselect_mask VK8WM:$mask,
1395                         (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1396                         VR512:$src0),
1397           (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1398 def : Pat<(vselect_mask VK8WM:$mask,
1399                         (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1400                         (v8i64 immAllZerosV)),
1401           (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1402 def : Pat<(vselect_mask VK8WM:$mask,
1403                         (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1404                         VR512:$src0),
1405           (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1408 let Predicates = [HasVLX] in {
1409 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1410                            X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1411                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1412 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1413                            X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1414                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1416 def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1417           (VBROADCASTF32X4Z256rm addr:$src)>;
1418 def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1419           (VBROADCASTF32X4Z256rm addr:$src)>;
1420 def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1421           (VBROADCASTF32X4Z256rm addr:$src)>;
1422 def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1423           (VBROADCASTI32X4Z256rm addr:$src)>;
1424 def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1425           (VBROADCASTI32X4Z256rm addr:$src)>;
1426 def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1427           (VBROADCASTI32X4Z256rm addr:$src)>;
1428 def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1429           (VBROADCASTI32X4Z256rm addr:$src)>;
1431 // Patterns for selects of bitcasted operations.
1432 def : Pat<(vselect_mask VK8WM:$mask,
1433                         (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1434                         (v8f32 immAllZerosV)),
1435           (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1436 def : Pat<(vselect_mask VK8WM:$mask,
1437                         (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1438                         VR256X:$src0),
1439           (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1440 def : Pat<(vselect_mask VK8WM:$mask,
1441                         (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1442                         (v8i32 immAllZerosV)),
1443           (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1444 def : Pat<(vselect_mask VK8WM:$mask,
1445                         (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1446                         VR256X:$src0),
1447           (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1450 let Predicates = [HasBF16] in {
1451   def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)),
1452             (VBROADCASTF64X4rm addr:$src)>;
1453   def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)),
1454             (VBROADCASTF32X4rm addr:$src)>;
1457 let Predicates = [HasBF16, HasVLX] in
1458   def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)),
1459             (VBROADCASTF32X4Z256rm addr:$src)>;
1461 let Predicates = [HasVLX, HasDQI] in {
1462 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1463                            X86SubVBroadcastld128, v4i64x_info, v2i64x_info>,
1464                            EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1465 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1466                            X86SubVBroadcastld128, v4f64x_info, v2f64x_info>,
1467                            EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1469 // Patterns for selects of bitcasted operations.
1470 def : Pat<(vselect_mask VK4WM:$mask,
1471                         (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1472                         (v4f64 immAllZerosV)),
1473           (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1474 def : Pat<(vselect_mask VK4WM:$mask,
1475                         (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1476                         VR256X:$src0),
1477           (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1478 def : Pat<(vselect_mask VK4WM:$mask,
1479                         (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1480                         (v4i64 immAllZerosV)),
1481           (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1482 def : Pat<(vselect_mask VK4WM:$mask,
1483                         (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1484                         VR256X:$src0),
1485           (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1488 let Predicates = [HasDQI] in {
1489 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1490                        X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
1491                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1492 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1493                        X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1494                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1495 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1496                        X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
1497                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1498 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1499                        X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1500                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1502 // Patterns for selects of bitcasted operations.
1503 def : Pat<(vselect_mask VK16WM:$mask,
1504                         (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1505                         (v16f32 immAllZerosV)),
1506           (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1507 def : Pat<(vselect_mask VK16WM:$mask,
1508                         (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1509                         VR512:$src0),
1510           (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1511 def : Pat<(vselect_mask VK16WM:$mask,
1512                         (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1513                         (v16i32 immAllZerosV)),
1514           (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1515 def : Pat<(vselect_mask VK16WM:$mask,
1516                         (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1517                         VR512:$src0),
1518           (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1520 def : Pat<(vselect_mask VK8WM:$mask,
1521                         (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1522                         (v8f64 immAllZerosV)),
1523           (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1524 def : Pat<(vselect_mask VK8WM:$mask,
1525                         (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1526                         VR512:$src0),
1527           (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1528 def : Pat<(vselect_mask VK8WM:$mask,
1529                         (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1530                         (v8i64 immAllZerosV)),
1531           (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1532 def : Pat<(vselect_mask VK8WM:$mask,
1533                         (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1534                         VR512:$src0),
1535           (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1538 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1539                                         AVX512VLVectorVTInfo _Dst,
1540                                         AVX512VLVectorVTInfo _Src> {
1541   let Predicates = [HasDQI] in
1542     defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1543                                           WriteShuffle256Ld, _Dst.info512,
1544                                           _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1545                                           EVEX_V512;
1546   let Predicates = [HasDQI, HasVLX] in
1547     defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1548                                           WriteShuffle256Ld, _Dst.info256,
1549                                           _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1550                                           EVEX_V256;
1553 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1554                                          AVX512VLVectorVTInfo _Dst,
1555                                          AVX512VLVectorVTInfo _Src> :
1556   avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1558   let Predicates = [HasDQI, HasVLX] in
1559     defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1560                                           WriteShuffleXLd, _Dst.info128,
1561                                           _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1562                                           EVEX_V128;
1565 defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1566                                           avx512vl_i32_info, avx512vl_i64_info>;
1567 defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1568                                           avx512vl_f32_info, avx512vl_f64_info>;
1570 //===----------------------------------------------------------------------===//
1571 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1572 //---
1573 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1574                                   X86VectorVTInfo _, RegisterClass KRC> {
1575   def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1576                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1577                   [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1578                   EVEX, Sched<[WriteShuffle]>;
1581 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1582                                  AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1583   let Predicates = [HasCDI] in
1584     defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1585   let Predicates = [HasCDI, HasVLX] in {
1586     defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1587     defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1588   }
1591 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1592                                                avx512vl_i32_info, VK16>;
1593 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1594                                                avx512vl_i64_info, VK8>, REX_W;
1596 //===----------------------------------------------------------------------===//
1597 // -- VPERMI2 - 3 source operands form --
1598 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1599                          X86FoldableSchedWrite sched,
1600                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1601 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1602     hasSideEffects = 0 in {
1603   defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1604           (ins _.RC:$src2, _.RC:$src3),
1605           OpcodeStr, "$src3, $src2", "$src2, $src3",
1606           (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1607           EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1609   let mayLoad = 1 in
1610   defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1611             (ins _.RC:$src2, _.MemOp:$src3),
1612             OpcodeStr, "$src3, $src2", "$src2, $src3",
1613             (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1614                    (_.VT (_.LdFrag addr:$src3)))), 1>,
1615             EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1616   }
1619 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1620                             X86FoldableSchedWrite sched,
1621                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1622   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1623       hasSideEffects = 0, mayLoad = 1 in
1624   defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1625               (ins _.RC:$src2, _.ScalarMemOp:$src3),
1626               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1627               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1628               (_.VT (X86VPermt2 _.RC:$src2,
1629                IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1630               AVX5128IBase, EVEX, VVVV, EVEX_B,
1631               Sched<[sched.Folded, sched.ReadAfterFold]>;
1634 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1635                                X86FoldableSchedWrite sched,
1636                                AVX512VLVectorVTInfo VTInfo,
1637                                AVX512VLVectorVTInfo ShuffleMask> {
1638   defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1639                              ShuffleMask.info512>,
1640                avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1641                                 ShuffleMask.info512>, EVEX_V512;
1642   let Predicates = [HasVLX] in {
1643   defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1644                                 ShuffleMask.info128>,
1645                   avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1646                                    ShuffleMask.info128>, EVEX_V128;
1647   defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1648                                 ShuffleMask.info256>,
1649                   avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1650                                    ShuffleMask.info256>, EVEX_V256;
1651   }
1654 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1655                                   X86FoldableSchedWrite sched,
1656                                   AVX512VLVectorVTInfo VTInfo,
1657                                   AVX512VLVectorVTInfo Idx,
1658                                   Predicate Prd> {
1659   let Predicates = [Prd] in
1660   defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1661                              Idx.info512>, EVEX_V512;
1662   let Predicates = [Prd, HasVLX] in {
1663   defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1664                                 Idx.info128>, EVEX_V128;
1665   defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1666                                 Idx.info256>,  EVEX_V256;
1667   }
1670 defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1671                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1672 defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1673                   avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1674 defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1675                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1676                   REX_W, EVEX_CD8<16, CD8VF>;
1677 defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1678                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1679                   EVEX_CD8<8, CD8VF>;
1680 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1681                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1682 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1683                   avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1685 // Extra patterns to deal with extra bitcasts due to passthru and index being
1686 // different types on the fp versions.
1687 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1688                                   X86VectorVTInfo IdxVT,
1689                                   X86VectorVTInfo CastVT> {
1690   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1691                                 (X86VPermt2 (_.VT _.RC:$src2),
1692                                             (IdxVT.VT (bitconvert
1693                                                        (CastVT.VT _.RC:$src1))),
1694                                             _.RC:$src3),
1695                                 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1696             (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1697                                                 _.RC:$src2, _.RC:$src3)>;
1698   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1699                                 (X86VPermt2 _.RC:$src2,
1700                                             (IdxVT.VT (bitconvert
1701                                                        (CastVT.VT _.RC:$src1))),
1702                                             (_.LdFrag addr:$src3)),
1703                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1704             (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1705                                                 _.RC:$src2, addr:$src3)>;
1706   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1707                                  (X86VPermt2 _.RC:$src2,
1708                                              (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1709                                              (_.BroadcastLdFrag addr:$src3)),
1710                                  (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1711             (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1712                                                  _.RC:$src2, addr:$src3)>;
1715 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1716 defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>;
1717 defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>;
1718 defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>;
1720 // VPERMT2
1721 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1722                          X86FoldableSchedWrite sched,
1723                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1724 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1725   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1726           (ins IdxVT.RC:$src2, _.RC:$src3),
1727           OpcodeStr, "$src3, $src2", "$src2, $src3",
1728           (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1729           EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1731   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1732             (ins IdxVT.RC:$src2, _.MemOp:$src3),
1733             OpcodeStr, "$src3, $src2", "$src2, $src3",
1734             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1735                    (_.LdFrag addr:$src3))), 1>,
1736             EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1737   }
1739 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1740                             X86FoldableSchedWrite sched,
1741                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1742   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1743   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1744               (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1745               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1746               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1747               (_.VT (X86VPermt2 _.RC:$src1,
1748                IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1749               AVX5128IBase, EVEX, VVVV, EVEX_B,
1750               Sched<[sched.Folded, sched.ReadAfterFold]>;
1753 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1754                                X86FoldableSchedWrite sched,
1755                                AVX512VLVectorVTInfo VTInfo,
1756                                AVX512VLVectorVTInfo ShuffleMask> {
1757   defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1758                              ShuffleMask.info512>,
1759                avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1760                                 ShuffleMask.info512>, EVEX_V512;
1761   let Predicates = [HasVLX] in {
1762   defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1763                                 ShuffleMask.info128>,
1764                   avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1765                                    ShuffleMask.info128>, EVEX_V128;
1766   defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1767                                 ShuffleMask.info256>,
1768                    avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1769                                     ShuffleMask.info256>, EVEX_V256;
1770   }
1773 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1774                                   X86FoldableSchedWrite sched,
1775                                   AVX512VLVectorVTInfo VTInfo,
1776                                   AVX512VLVectorVTInfo Idx, Predicate Prd> {
1777   let Predicates = [Prd] in
1778   defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1779                              Idx.info512>, EVEX_V512;
1780   let Predicates = [Prd, HasVLX] in {
1781   defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1782                                 Idx.info128>, EVEX_V128;
1783   defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1784                                 Idx.info256>, EVEX_V256;
1785   }
1788 defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1789                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1790 defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1791                   avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1792 defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1793                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1794                   REX_W, EVEX_CD8<16, CD8VF>;
1795 defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1796                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1797                   EVEX_CD8<8, CD8VF>;
1798 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1799                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1800 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1801                   avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1803 //===----------------------------------------------------------------------===//
1804 // AVX-512 - BLEND using mask
1807 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1808                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1809   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1810   def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1811              (ins _.RC:$src1, _.RC:$src2),
1812              !strconcat(OpcodeStr,
1813              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1814              EVEX, VVVV, Sched<[sched]>;
1815   def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1816              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1817              !strconcat(OpcodeStr,
1818              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1819              []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
1820   def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1821              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1822              !strconcat(OpcodeStr,
1823              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1824              []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>;
1825   let mayLoad = 1 in {
1826   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1827              (ins _.RC:$src1, _.MemOp:$src2),
1828              !strconcat(OpcodeStr,
1829              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1830              []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
1831              Sched<[sched.Folded, sched.ReadAfterFold]>;
1832   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1833              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1834              !strconcat(OpcodeStr,
1835              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1836              []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1837              Sched<[sched.Folded, sched.ReadAfterFold]>;
1838   def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1839              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1840              !strconcat(OpcodeStr,
1841              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1842              []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1843              Sched<[sched.Folded, sched.ReadAfterFold]>;
1844   }
1845   }
1847 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1848                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1849   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1850   def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1851       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1852        !strconcat(OpcodeStr,
1853             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1854             "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1855       EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1856       Sched<[sched.Folded, sched.ReadAfterFold]>;
1858   def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1859       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1860        !strconcat(OpcodeStr,
1861             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1862             "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1863       EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1864       Sched<[sched.Folded, sched.ReadAfterFold]>;
1866   def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1867       (ins _.RC:$src1, _.ScalarMemOp:$src2),
1868        !strconcat(OpcodeStr,
1869             "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1870             "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1871       EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1872       Sched<[sched.Folded, sched.ReadAfterFold]>;
1873   }
1876 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1877                         AVX512VLVectorVTInfo VTInfo> {
1878   defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1879            WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1880                                  EVEX_V512;
1882   let Predicates = [HasVLX] in {
1883     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1884                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1885                                       EVEX_V256;
1886     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1887                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1888                                       EVEX_V128;
1889   }
1892 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1893                         AVX512VLVectorVTInfo VTInfo> {
1894   let Predicates = [HasBWI] in
1895     defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1896                                EVEX_V512;
1898   let Predicates = [HasBWI, HasVLX] in {
1899     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1900                                   EVEX_V256;
1901     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1902                                   EVEX_V128;
1903   }
1906 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
1907                               avx512vl_f32_info>;
1908 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
1909                               avx512vl_f64_info>, REX_W;
1910 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
1911                               avx512vl_i32_info>;
1912 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
1913                               avx512vl_i64_info>, REX_W;
1914 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
1915                               avx512vl_i8_info>;
1916 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
1917                               avx512vl_i16_info>, REX_W;
1919 //===----------------------------------------------------------------------===//
1920 // Compare Instructions
1921 //===----------------------------------------------------------------------===//
1923 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1925 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
1926                              PatFrag OpNode_su, PatFrag OpNodeSAE_su,
1927                              X86FoldableSchedWrite sched> {
1928   defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1929                       (outs _.KRC:$dst),
1930                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1931                       "vcmp"#_.Suffix,
1932                       "$cc, $src2, $src1", "$src1, $src2, $cc",
1933                       (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1934                       (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1935                                  timm:$cc)>, EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1936   let mayLoad = 1 in
1937   defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1938                     (outs _.KRC:$dst),
1939                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
1940                     "vcmp"#_.Suffix,
1941                     "$cc, $src2, $src1", "$src1, $src2, $cc",
1942                     (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1943                         timm:$cc),
1944                     (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1945                         timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1946                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1948   let Uses = [MXCSR] in
1949   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1950                      (outs _.KRC:$dst),
1951                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1952                      "vcmp"#_.Suffix,
1953                      "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
1954                      (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1955                                 timm:$cc),
1956                      (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1957                                    timm:$cc)>,
1958                      EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
1960   let isCodeGenOnly = 1 in {
1961     let isCommutable = 1 in
1962     def rr : AVX512Ii8<0xC2, MRMSrcReg,
1963                 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
1964                 !strconcat("vcmp", _.Suffix,
1965                            "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1966                 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1967                                           _.FRC:$src2,
1968                                           timm:$cc))]>,
1969                 EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1970     def rm : AVX512Ii8<0xC2, MRMSrcMem,
1971               (outs _.KRC:$dst),
1972               (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1973               !strconcat("vcmp", _.Suffix,
1974                          "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1975               [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1976                                         (_.ScalarLdFrag addr:$src2),
1977                                         timm:$cc))]>,
1978               EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1979               Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1980   }
1983 let Predicates = [HasAVX512] in {
1984   let ExeDomain = SSEPackedSingle in
1985   defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
1986                                    X86cmpms_su, X86cmpmsSAE_su,
1987                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base;
1988   let ExeDomain = SSEPackedDouble in
1989   defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
1990                                    X86cmpms_su, X86cmpmsSAE_su,
1991                                    SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W;
1993 let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
1994   defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
1995                                    X86cmpms_su, X86cmpmsSAE_su,
1996                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
1998 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
1999                               X86FoldableSchedWrite sched,
2000                               X86VectorVTInfo _, bit IsCommutable> {
2001   let isCommutable = IsCommutable, hasSideEffects = 0 in
2002   def rr : AVX512BI<opc, MRMSrcReg,
2003              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2004              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2005              []>, EVEX, VVVV, Sched<[sched]>;
2006   let mayLoad = 1, hasSideEffects = 0 in
2007   def rm : AVX512BI<opc, MRMSrcMem,
2008              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2009              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2010              []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2011   let isCommutable = IsCommutable, hasSideEffects = 0 in
2012   def rrk : AVX512BI<opc, MRMSrcReg,
2013               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2014               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2015                           "$dst {${mask}}, $src1, $src2}"),
2016               []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
2017   let mayLoad = 1, hasSideEffects = 0 in
2018   def rmk : AVX512BI<opc, MRMSrcMem,
2019               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2020               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2021                           "$dst {${mask}}, $src1, $src2}"),
2022               []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2025 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2026                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
2027                                   bit IsCommutable> :
2028            avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2029   let mayLoad = 1, hasSideEffects = 0 in {
2030   def rmb : AVX512BI<opc, MRMSrcMem,
2031               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2032               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2033                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2034               []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2035   def rmbk : AVX512BI<opc, MRMSrcMem,
2036                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2037                                        _.ScalarMemOp:$src2),
2038                !strconcat(OpcodeStr,
2039                           "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2040                           "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2041                []>, EVEX, VVVV, EVEX_K, EVEX_B,
2042                Sched<[sched.Folded, sched.ReadAfterFold]>;
2043   }
2046 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2047                                  X86SchedWriteWidths sched,
2048                                  AVX512VLVectorVTInfo VTInfo, Predicate prd,
2049                                  bit IsCommutable = 0> {
2050   let Predicates = [prd] in
2051   defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2052                               VTInfo.info512, IsCommutable>, EVEX_V512;
2054   let Predicates = [prd, HasVLX] in {
2055     defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2056                                    VTInfo.info256, IsCommutable>, EVEX_V256;
2057     defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2058                                    VTInfo.info128, IsCommutable>, EVEX_V128;
2059   }
2062 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2063                                      X86SchedWriteWidths sched,
2064                                      AVX512VLVectorVTInfo VTInfo,
2065                                      Predicate prd, bit IsCommutable = 0> {
2066   let Predicates = [prd] in
2067   defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2068                                   VTInfo.info512, IsCommutable>, EVEX_V512;
2070   let Predicates = [prd, HasVLX] in {
2071     defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2072                                        VTInfo.info256, IsCommutable>, EVEX_V256;
2073     defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2074                                        VTInfo.info128, IsCommutable>, EVEX_V128;
2075   }
2078 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2079 // increase the pattern complexity the way an immediate would.
2080 let AddedComplexity = 2 in {
2081 // FIXME: Is there a better scheduler class for VPCMP?
2082 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2083                       SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2084                 EVEX_CD8<8, CD8VF>, WIG;
2086 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2087                       SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2088                 EVEX_CD8<16, CD8VF>, WIG;
2090 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2091                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2092                 EVEX_CD8<32, CD8VF>;
2094 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2095                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2096                 T8, REX_W, EVEX_CD8<64, CD8VF>;
2098 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2099                       SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2100                 EVEX_CD8<8, CD8VF>, WIG;
2102 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2103                       SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2104                 EVEX_CD8<16, CD8VF>, WIG;
2106 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2107                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2108                 EVEX_CD8<32, CD8VF>;
2110 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2111                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2112                 T8, REX_W, EVEX_CD8<64, CD8VF>;
2115 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2116                           PatFrag Frag_su,
2117                           X86FoldableSchedWrite sched,
2118                           X86VectorVTInfo _, string Name> {
2119   let isCommutable = 1 in
2120   def rri : AVX512AIi8<opc, MRMSrcReg,
2121              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2122              !strconcat("vpcmp", Suffix,
2123                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2124              [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2125                                                 (_.VT _.RC:$src2),
2126                                                 cond)))]>,
2127              EVEX, VVVV, Sched<[sched]>;
2128   def rmi : AVX512AIi8<opc, MRMSrcMem,
2129              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2130              !strconcat("vpcmp", Suffix,
2131                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2132              [(set _.KRC:$dst, (_.KVT
2133                                 (Frag:$cc
2134                                  (_.VT _.RC:$src1),
2135                                  (_.VT (_.LdFrag addr:$src2)),
2136                                  cond)))]>,
2137              EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2138   let isCommutable = 1 in
2139   def rrik : AVX512AIi8<opc, MRMSrcReg,
2140               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2141                                       u8imm:$cc),
2142               !strconcat("vpcmp", Suffix,
2143                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2144                          "$dst {${mask}}, $src1, $src2, $cc}"),
2145               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2146                                      (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2147                                                          (_.VT _.RC:$src2),
2148                                                          cond))))]>,
2149               EVEX, VVVV, EVEX_K, Sched<[sched]>;
2150   def rmik : AVX512AIi8<opc, MRMSrcMem,
2151               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2152                                     u8imm:$cc),
2153               !strconcat("vpcmp", Suffix,
2154                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2155                          "$dst {${mask}}, $src1, $src2, $cc}"),
2156               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2157                                      (_.KVT
2158                                       (Frag_su:$cc
2159                                        (_.VT _.RC:$src1),
2160                                        (_.VT (_.LdFrag addr:$src2)),
2161                                        cond))))]>,
2162               EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2164   def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2165                              (_.VT _.RC:$src1), cond)),
2166             (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2167              _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2169   def : Pat<(and _.KRCWM:$mask,
2170                  (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2171                                      (_.VT _.RC:$src1), cond))),
2172             (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2173              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2174              (X86pcmpm_imm_commute $cc))>;
2177 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2178                               PatFrag Frag_su, X86FoldableSchedWrite sched,
2179                               X86VectorVTInfo _, string Name> :
2180            avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2181   def rmib : AVX512AIi8<opc, MRMSrcMem,
2182              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2183                                      u8imm:$cc),
2184              !strconcat("vpcmp", Suffix,
2185                         "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2186                         "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2187              [(set _.KRC:$dst, (_.KVT (Frag:$cc
2188                                        (_.VT _.RC:$src1),
2189                                        (_.BroadcastLdFrag addr:$src2),
2190                                        cond)))]>,
2191              EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2192   def rmibk : AVX512AIi8<opc, MRMSrcMem,
2193               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2194                                        _.ScalarMemOp:$src2, u8imm:$cc),
2195               !strconcat("vpcmp", Suffix,
2196                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2197                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2198               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2199                                      (_.KVT (Frag_su:$cc
2200                                              (_.VT _.RC:$src1),
2201                                              (_.BroadcastLdFrag addr:$src2),
2202                                              cond))))]>,
2203               EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2205   def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2206                     (_.VT _.RC:$src1), cond)),
2207             (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2208              _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2210   def : Pat<(and _.KRCWM:$mask,
2211                  (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2212                                      (_.VT _.RC:$src1), cond))),
2213             (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2214              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2215              (X86pcmpm_imm_commute $cc))>;
2218 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2219                              PatFrag Frag_su, X86SchedWriteWidths sched,
2220                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2221   let Predicates = [prd] in
2222   defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2223                           sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2225   let Predicates = [prd, HasVLX] in {
2226     defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2227                                sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2228     defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2229                                sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2230   }
2233 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2234                                  PatFrag Frag_su, X86SchedWriteWidths sched,
2235                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2236   let Predicates = [prd] in
2237   defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2238                               sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2240   let Predicates = [prd, HasVLX] in {
2241     defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2242                                    sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2243     defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2244                                    sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2245   }
2248 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2249 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2250                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2251                                 EVEX_CD8<8, CD8VF>;
2252 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2253                                  SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2254                                  EVEX_CD8<8, CD8VF>;
2256 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2257                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2258                                 REX_W, EVEX_CD8<16, CD8VF>;
2259 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2260                                  SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2261                                  REX_W, EVEX_CD8<16, CD8VF>;
2263 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2264                                     SchedWriteVecALU, avx512vl_i32_info,
2265                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2266 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2267                                      SchedWriteVecALU, avx512vl_i32_info,
2268                                      HasAVX512>, EVEX_CD8<32, CD8VF>;
2270 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2271                                     SchedWriteVecALU, avx512vl_i64_info,
2272                                     HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2273 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2274                                      SchedWriteVecALU, avx512vl_i64_info,
2275                                      HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2277 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2278                               string Name> {
2279 let Uses = [MXCSR], mayRaiseFPException = 1 in {
2280   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2281                    (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2282                    "vcmp"#_.Suffix,
2283                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2284                    (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2285                    (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2286                    1>, Sched<[sched]>;
2288   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2289                 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2290                 "vcmp"#_.Suffix,
2291                 "$cc, $src2, $src1", "$src1, $src2, $cc",
2292                 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2293                              timm:$cc),
2294                 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2295                             timm:$cc)>,
2296                 Sched<[sched.Folded, sched.ReadAfterFold]>;
2298   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2299                 (outs _.KRC:$dst),
2300                 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2301                 "vcmp"#_.Suffix,
2302                 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2303                 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2304                 (X86any_cmpm (_.VT _.RC:$src1),
2305                              (_.VT (_.BroadcastLdFrag addr:$src2)),
2306                              timm:$cc),
2307                 (X86cmpm_su (_.VT _.RC:$src1),
2308                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2309                             timm:$cc)>,
2310                 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2311   }
2313   // Patterns for selecting with loads in other operand.
2314   def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2315                          timm:$cc),
2316             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2317                                                       (X86cmpm_imm_commute timm:$cc))>;
2319   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2320                                             (_.VT _.RC:$src1),
2321                                             timm:$cc)),
2322             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2323                                                        _.RC:$src1, addr:$src2,
2324                                                        (X86cmpm_imm_commute timm:$cc))>;
2326   def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2327                          (_.VT _.RC:$src1), timm:$cc),
2328             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2329                                                        (X86cmpm_imm_commute timm:$cc))>;
2331   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2332                                             (_.VT _.RC:$src1),
2333                                             timm:$cc)),
2334             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2335                                                         _.RC:$src1, addr:$src2,
2336                                                         (X86cmpm_imm_commute timm:$cc))>;
2338   // Patterns for mask intrinsics.
2339   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2340                       (_.KVT immAllOnesV)),
2341             (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2343   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2344             (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2345                                                        _.RC:$src2, timm:$cc)>;
2347   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2348                       (_.KVT immAllOnesV)),
2349             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2351   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2352                       _.KRCWM:$mask),
2353             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2354                                                        addr:$src2, timm:$cc)>;
2356   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2357                       (_.KVT immAllOnesV)),
2358             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2360   def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2361                       _.KRCWM:$mask),
2362             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2363                                                         addr:$src2, timm:$cc)>;
2365   // Patterns for mask intrinsics with loads in other operand.
2366   def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2367                       (_.KVT immAllOnesV)),
2368             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2369                                                       (X86cmpm_imm_commute timm:$cc))>;
2371   def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2372                       _.KRCWM:$mask),
2373             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2374                                                        _.RC:$src1, addr:$src2,
2375                                                        (X86cmpm_imm_commute timm:$cc))>;
2377   def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2378                       (_.KVT immAllOnesV)),
2379             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2380                                                        (X86cmpm_imm_commute timm:$cc))>;
2382   def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2383                       _.KRCWM:$mask),
2384             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2385                                                         _.RC:$src1, addr:$src2,
2386                                                         (X86cmpm_imm_commute  timm:$cc))>;
2389 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2390   // comparison code form (VCMP[EQ/LT/LE/...]
2391   let Uses = [MXCSR] in
2392   defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2393                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2394                      (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2395                      "vcmp"#_.Suffix,
2396                      "$cc, {sae}, $src2, $src1",
2397                      "$src1, $src2, {sae}, $cc",
2398                      [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2399                                         (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2400                      [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2401                                         (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2402                      EVEX_B, Sched<[sched]>;
2405 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2406                        Predicate Pred = HasAVX512> {
2407   let Predicates = [Pred] in {
2408     defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2409                 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2411   }
2412   let Predicates = [Pred,HasVLX] in {
2413    defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2414    defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2415   }
2418 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2419                           AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
2420 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2421                           AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
2422 defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2423                           AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA;
2425 // Patterns to select fp compares with load as first operand.
2426 let Predicates = [HasAVX512] in {
2427   def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2428             (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2430   def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2431             (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2434 let Predicates = [HasFP16] in {
2435   def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2436             (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2439 // ----------------------------------------------------------------
2440 // FPClass
2442 //handle fpclass instruction  mask =  op(reg_scalar,imm)
2443 //                                    op(mem_scalar,imm)
2444 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2445                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2446                                  Predicate prd> {
2447   let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2448       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2449                       (ins _.RC:$src1, i32u8imm:$src2),
2450                       OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2451                       [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2452                               (i32 timm:$src2)))]>,
2453                       Sched<[sched]>;
2454       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2455                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2456                       OpcodeStr#_.Suffix#
2457                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2458                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2459                                       (X86Vfpclasss_su (_.VT _.RC:$src1),
2460                                       (i32 timm:$src2))))]>,
2461                       EVEX_K, Sched<[sched]>;
2462     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2463                     (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2464                     OpcodeStr#_.Suffix#
2465                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2466                     [(set _.KRC:$dst,
2467                           (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2468                                         (i32 timm:$src2)))]>,
2469                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2470     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2471                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2472                     OpcodeStr#_.Suffix#
2473                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2474                     [(set _.KRC:$dst,(and _.KRCWM:$mask,
2475                         (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2476                             (i32 timm:$src2))))]>,
2477                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2478   }
2481 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2482 //                                  fpclass(reg_vec, mem_vec, imm)
2483 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
2484 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2485                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2486                                  string mem>{
2487   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2488   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2489                       (ins _.RC:$src1, i32u8imm:$src2),
2490                       OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2491                       [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2492                                        (i32 timm:$src2)))]>,
2493                       Sched<[sched]>;
2494   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2495                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2496                       OpcodeStr#_.Suffix#
2497                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2498                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2499                                        (X86Vfpclass_su (_.VT _.RC:$src1),
2500                                        (i32 timm:$src2))))]>,
2501                       EVEX_K, Sched<[sched]>;
2502   def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2503                     (ins _.MemOp:$src1, i32u8imm:$src2),
2504                     OpcodeStr#_.Suffix#"{"#mem#"}"#
2505                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2506                     [(set _.KRC:$dst,(X86Vfpclass
2507                                      (_.VT (_.LdFrag addr:$src1)),
2508                                      (i32 timm:$src2)))]>,
2509                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2510   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2511                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2512                     OpcodeStr#_.Suffix#"{"#mem#"}"#
2513                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2514                     [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2515                                   (_.VT (_.LdFrag addr:$src1)),
2516                                   (i32 timm:$src2))))]>,
2517                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2518   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2519                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2520                     OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2521                                       _.BroadcastStr#", $dst|$dst, ${src1}"
2522                                                   #_.BroadcastStr#", $src2}",
2523                     [(set _.KRC:$dst,(X86Vfpclass
2524                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2525                                      (i32 timm:$src2)))]>,
2526                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2527   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2528                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2529                     OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2530                           _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2531                                                    _.BroadcastStr#", $src2}",
2532                     [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2533                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2534                                      (i32 timm:$src2))))]>,
2535                     EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2536   }
2538   // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2539   // the memory form.
2540   def : InstAlias<OpcodeStr#_.Suffix#mem#
2541                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2542                   (!cast<Instruction>(NAME#"rr")
2543                    _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2544   def : InstAlias<OpcodeStr#_.Suffix#mem#
2545                   "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2546                   (!cast<Instruction>(NAME#"rrk")
2547                    _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2548   def : InstAlias<OpcodeStr#_.Suffix#mem#
2549                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2550                   _.BroadcastStr#", $src2}",
2551                   (!cast<Instruction>(NAME#"rmb")
2552                    _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2553   def : InstAlias<OpcodeStr#_.Suffix#mem#
2554                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2555                   "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2556                   (!cast<Instruction>(NAME#"rmbk")
2557                    _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2560 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2561                                      bits<8> opc, X86SchedWriteWidths sched,
2562                                      Predicate prd>{
2563   let Predicates = [prd] in {
2564     defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2565                                       _.info512, "z">, EVEX_V512;
2566   }
2567   let Predicates = [prd, HasVLX] in {
2568     defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2569                                       _.info128, "x">, EVEX_V128;
2570     defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2571                                       _.info256, "y">, EVEX_V256;
2572   }
2575 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2576                                  bits<8> opcScalar, X86SchedWriteWidths sched> {
2577   defm PH : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f16_info, opcVec,
2578                                       sched, HasFP16>,
2579                                       EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2580   defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2581                                    sched.Scl, f16x_info, HasFP16>,
2582                                    EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2583   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2584                                       sched, HasDQI>,
2585                                       EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2586   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2587                                       sched, HasDQI>,
2588                                       EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W;
2589   defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2590                                    sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2591                                    EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2592   defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2593                                    sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2594                                    EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W;
2597 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2599 //-----------------------------------------------------------------
2600 // Mask register copy, including
2601 // - copy between mask registers
2602 // - load/store mask registers
2603 // - copy from GPR to mask register and vice versa
2605 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2606                           string OpcodeStr, RegisterClass KRC, ValueType vvt,
2607                           X86MemOperand x86memop, string Suffix = ""> {
2608   let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
2609       explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in
2610   def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2611                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2612                   Sched<[WriteMove]>;
2613   def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2614                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2615                     [(set KRC:$dst, (vvt (load addr:$src)))]>,
2616                   Sched<[WriteLoad]>;
2617   def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2618                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2619                     [(store KRC:$src, addr:$dst)]>,
2620                   Sched<[WriteStore]>;
2623 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2624                                string OpcodeStr, RegisterClass KRC,
2625                                RegisterClass GRC, string Suffix = ""> {
2626   let hasSideEffects = 0 in {
2627     def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2628                       !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2629                     Sched<[WriteMove]>;
2630     def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2631                       !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2632                     Sched<[WriteMove]>;
2633   }
2636 let Predicates = [HasDQI, NoEGPR] in
2637   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2638                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2639                VEX, TB, PD;
2640 let Predicates = [HasDQI, HasEGPR, In64BitMode] in
2641   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
2642                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
2643                EVEX, TB, PD;
2645 let Predicates = [HasAVX512, NoEGPR] in
2646   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2647                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2648                VEX, TB;
2649 let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
2650   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
2651                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
2652                EVEX, TB;
2654 let Predicates = [HasBWI, NoEGPR] in {
2655   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2656                VEX, TB, PD, REX_W;
2657   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2658                VEX, TB, XD;
2659   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2660                VEX, TB, REX_W;
2661   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2662                VEX, TB, XD, REX_W;
2664 let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
2665   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
2666                EVEX, TB, PD, REX_W;
2667   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
2668                EVEX, TB, XD;
2669   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
2670                EVEX, TB, REX_W;
2671   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
2672                EVEX, TB, XD, REX_W;
2675 // GR from/to mask register
2676 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2677           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2678 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2679           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2680 def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2681           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2683 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2684           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2685 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2686           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2688 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2689           (KMOVWrk VK16:$src)>;
2690 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2691           (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2692 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2693           (COPY_TO_REGCLASS VK16:$src, GR32)>;
2694 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2695           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2697 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2698           (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2699 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2700           (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2701 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2702           (COPY_TO_REGCLASS VK8:$src, GR32)>;
2703 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2704           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2706 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2707           (COPY_TO_REGCLASS GR32:$src, VK32)>;
2708 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2709           (COPY_TO_REGCLASS VK32:$src, GR32)>;
2710 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2711           (COPY_TO_REGCLASS GR64:$src, VK64)>;
2712 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2713           (COPY_TO_REGCLASS VK64:$src, GR64)>;
2715 // Load/store kreg
2716 let Predicates = [HasDQI] in {
2717   def : Pat<(v1i1 (load addr:$src)),
2718             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2719   def : Pat<(v2i1 (load addr:$src)),
2720             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2721   def : Pat<(v4i1 (load addr:$src)),
2722             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2725 let Predicates = [HasAVX512] in {
2726   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2727             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2728   def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2729             (KMOVWkm addr:$src)>;
2732 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2733                          SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2734                                               SDTCVecEltisVT<1, i1>,
2735                                               SDTCisPtrTy<2>]>>;
2737 let Predicates = [HasAVX512] in {
2738   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2739     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2740               (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2742     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2743               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2745     def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2746               (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2748     def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2749               (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2750   }
2752   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2753   defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2754   defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2755   defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2756   defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2757   defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2758   defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2760   def : Pat<(insert_subvector (v16i1 immAllZerosV),
2761                               (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2762             (KMOVWkr (AND32ri
2763                       (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2764                       (i32 1)))>;
2767 // Mask unary operation
2768 // - KNOT
2769 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2770                             RegisterClass KRC, SDPatternOperator OpNode,
2771                             X86FoldableSchedWrite sched, Predicate prd> {
2772   let Predicates = [prd] in
2773     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2774                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2775                [(set KRC:$dst, (OpNode KRC:$src))]>,
2776                Sched<[sched]>;
2779 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2780                                 SDPatternOperator OpNode,
2781                                 X86FoldableSchedWrite sched> {
2782   defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2783                             sched, HasDQI>, VEX, TB, PD;
2784   defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2785                             sched, HasAVX512>, VEX, TB;
2786   defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2787                             sched, HasBWI>, VEX, TB, PD, REX_W;
2788   defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2789                             sched, HasBWI>, VEX, TB, REX_W;
2792 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2793 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2795 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2796 let Predicates = [HasAVX512, NoDQI] in
2797 def : Pat<(vnot VK8:$src),
2798           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2800 def : Pat<(vnot VK4:$src),
2801           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2802 def : Pat<(vnot VK2:$src),
2803           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2804 def : Pat<(vnot VK1:$src),
2805           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
2807 // Mask binary operation
2808 // - KAND, KANDN, KOR, KXNOR, KXOR
2809 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2810                            RegisterClass KRC, SDPatternOperator OpNode,
2811                            X86FoldableSchedWrite sched, Predicate prd,
2812                            bit IsCommutable> {
2813   let Predicates = [prd], isCommutable = IsCommutable in
2814     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2815                !strconcat(OpcodeStr,
2816                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2817                [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2818                Sched<[sched]>;
2821 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2822                                  SDPatternOperator OpNode,
2823                                  X86FoldableSchedWrite sched, bit IsCommutable,
2824                                  Predicate prdW = HasAVX512> {
2825   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2826                              sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD;
2827   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2828                              sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB;
2829   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2830                              sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD;
2831   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2832                              sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB;
2835 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2836 defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
2837 defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
2838 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
2839 defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
2840 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
2841 defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
2843 multiclass avx512_binop_pat<SDPatternOperator VOpNode,
2844                             Instruction Inst> {
2845   // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2846   // for the DQI set, this type is legal and KxxxB instruction is used
2847   let Predicates = [NoDQI] in
2848   def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2849             (COPY_TO_REGCLASS
2850               (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2851                     (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2853   // All types smaller than 8 bits require conversion anyway
2854   def : Pat<(VOpNode VK1:$src1, VK1:$src2),
2855         (COPY_TO_REGCLASS (Inst
2856                            (COPY_TO_REGCLASS VK1:$src1, VK16),
2857                            (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2858   def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2859         (COPY_TO_REGCLASS (Inst
2860                            (COPY_TO_REGCLASS VK2:$src1, VK16),
2861                            (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
2862   def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2863         (COPY_TO_REGCLASS (Inst
2864                            (COPY_TO_REGCLASS VK4:$src1, VK16),
2865                            (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
2868 defm : avx512_binop_pat<and,   KANDWrr>;
2869 defm : avx512_binop_pat<vandn, KANDNWrr>;
2870 defm : avx512_binop_pat<or,    KORWrr>;
2871 defm : avx512_binop_pat<vxnor, KXNORWrr>;
2872 defm : avx512_binop_pat<xor,   KXORWrr>;
2874 // Mask unpacking
2875 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
2876                              X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
2877                              Predicate prd> {
2878   let Predicates = [prd] in {
2879     let hasSideEffects = 0 in
2880     def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
2881                (ins Src.KRC:$src1, Src.KRC:$src2),
2882                "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
2883                VEX, VVVV, VEX_L, Sched<[sched]>;
2885     def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
2886               (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
2887   }
2890 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, TB, PD;
2891 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB;
2892 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W;
2894 // Mask bit testing
2895 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2896                               SDNode OpNode, X86FoldableSchedWrite sched,
2897                               Predicate prd> {
2898   let Predicates = [prd], Defs = [EFLAGS] in
2899     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2900                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2901                [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
2902                Sched<[sched]>;
2905 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2906                                 X86FoldableSchedWrite sched,
2907                                 Predicate prdW = HasAVX512> {
2908   defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
2909                                                                 VEX, TB, PD;
2910   defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
2911                                                                 VEX, TB;
2912   defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
2913                                                                 VEX, TB, REX_W;
2914   defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
2915                                                                 VEX, TB, PD, REX_W;
2918 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2919 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
2920 defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
2922 // Mask shift
2923 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2924                                SDNode OpNode, X86FoldableSchedWrite sched> {
2925   let Predicates = [HasAVX512] in
2926     def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2927                  !strconcat(OpcodeStr,
2928                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2929                             [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
2930                  Sched<[sched]>;
2933 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2934                                  SDNode OpNode, X86FoldableSchedWrite sched> {
2935   defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2936                                sched>, VEX, TA, PD, REX_W;
2937   let Predicates = [HasDQI] in
2938   defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2939                                sched>, VEX, TA, PD;
2940   let Predicates = [HasBWI] in {
2941   defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2942                                sched>, VEX, TA, PD, REX_W;
2943   defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2944                                sched>, VEX, TA, PD;
2945   }
2948 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
2949 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
2951 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
2952 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2953                                                  string InstStr,
2954                                                  X86VectorVTInfo Narrow,
2955                                                  X86VectorVTInfo Wide> {
2956 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2957                                 (Narrow.VT Narrow.RC:$src2), cond)),
2958           (COPY_TO_REGCLASS
2959            (!cast<Instruction>(InstStr#"Zrri")
2960             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2961             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2962             (X86pcmpm_imm $cc)), Narrow.KRC)>;
2964 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2965                            (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
2966                                                     (Narrow.VT Narrow.RC:$src2),
2967                                                     cond)))),
2968           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
2969            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2970            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2971            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2972            (X86pcmpm_imm $cc)), Narrow.KRC)>;
2975 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2976                                                      string InstStr,
2977                                                      X86VectorVTInfo Narrow,
2978                                                      X86VectorVTInfo Wide> {
2979 // Broadcast load.
2980 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2981                                 (Narrow.BroadcastLdFrag addr:$src2), cond)),
2982           (COPY_TO_REGCLASS
2983            (!cast<Instruction>(InstStr#"Zrmib")
2984             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2985             addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
2987 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2988                            (Narrow.KVT
2989                             (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
2990                                          (Narrow.BroadcastLdFrag addr:$src2),
2991                                          cond)))),
2992           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
2993            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2994            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2995            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
2997 // Commuted with broadcast load.
2998 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
2999                                 (Narrow.VT Narrow.RC:$src1),
3000                                 cond)),
3001           (COPY_TO_REGCLASS
3002            (!cast<Instruction>(InstStr#"Zrmib")
3003             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3004             addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3006 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3007                            (Narrow.KVT
3008                             (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3009                                          (Narrow.VT Narrow.RC:$src1),
3010                                          cond)))),
3011           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3012            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3013            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3014            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3017 // Same as above, but for fp types which don't use PatFrags.
3018 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3019                                                 X86VectorVTInfo Narrow,
3020                                                 X86VectorVTInfo Wide> {
3021 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3022                                (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3023           (COPY_TO_REGCLASS
3024            (!cast<Instruction>(InstStr#"Zrri")
3025             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3026             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3027             timm:$cc), Narrow.KRC)>;
3029 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3030                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3031                                        (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3032           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3033            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3034            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3035            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3036            timm:$cc), Narrow.KRC)>;
3038 // Broadcast load.
3039 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3040                                (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3041           (COPY_TO_REGCLASS
3042            (!cast<Instruction>(InstStr#"Zrmbi")
3043             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3044             addr:$src2, timm:$cc), Narrow.KRC)>;
3046 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3047                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3048                                        (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3049           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3050            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3051            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3052            addr:$src2, timm:$cc), Narrow.KRC)>;
3054 // Commuted with broadcast load.
3055 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3056                                (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3057           (COPY_TO_REGCLASS
3058            (!cast<Instruction>(InstStr#"Zrmbi")
3059             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3060             addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3062 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3063                            (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3064                                        (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3065           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3066            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3067            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3068            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3071 let Predicates = [HasAVX512, NoVLX] in {
3072   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3073   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3075   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3076   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3078   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3079   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3081   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3082   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3084   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3085   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3087   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3088   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3090   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3091   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3093   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3094   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3096   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3097   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3098   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3099   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3102 let Predicates = [HasBWI, NoVLX] in {
3103   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3104   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3106   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3107   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3109   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3110   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3112   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3113   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3116 // Mask setting all 0s or 1s
3117 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3118   let Predicates = [HasAVX512] in
3119     let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3120         SchedRW = [WriteZero] in
3121       def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3122                      [(set KRC:$dst, (VT Val))]>;
3125 multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3126   defm W : avx512_mask_setop<VK16, v16i1, Val>;
3127   defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3128   defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3131 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3132 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3134 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3135 let Predicates = [HasAVX512] in {
3136   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3137   def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3138   def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3139   def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3140   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3141   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3142   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3143   def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3146 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3147 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3148                                              RegisterClass RC, ValueType VT> {
3149   def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3150             (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3152   def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3153             (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3155 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3156 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3157 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3158 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3159 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3160 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3162 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3163 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3164 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3165 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3166 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3168 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3169 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3170 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3171 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3173 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3174 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3175 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3177 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3178 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3180 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3182 //===----------------------------------------------------------------------===//
3183 // AVX-512 - Aligned and unaligned load and store
3186 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3187                        X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3188                        X86SchedWriteMoveLS Sched, bit NoRMPattern = 0,
3189                        SDPatternOperator SelectOprr = vselect> {
3190   let hasSideEffects = 0 in {
3191   let isMoveReg = 1 in
3192   def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3193                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3194                     _.ExeDomain>, EVEX, Sched<[Sched.RR]>;
3195   def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3196                       (ins _.KRCWM:$mask,  _.RC:$src),
3197                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3198                        "${dst} {${mask}} {z}, $src}"),
3199                        [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3200                                            (_.VT _.RC:$src),
3201                                            _.ImmAllZerosV)))], _.ExeDomain>,
3202                        EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3204   let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3205   def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3206                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3207                     !if(NoRMPattern, [],
3208                         [(set _.RC:$dst,
3209                           (_.VT (ld_frag addr:$src)))]),
3210                     _.ExeDomain>, EVEX, Sched<[Sched.RM]>;
3212   let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3213     def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3214                       (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3215                       !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3216                       "${dst} {${mask}}, $src1}"),
3217                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3218                                           (_.VT _.RC:$src1),
3219                                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3220                        EVEX, EVEX_K, Sched<[Sched.RR]>;
3221     def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3222                      (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3223                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3224                       "${dst} {${mask}}, $src1}"),
3225                      [(set _.RC:$dst, (_.VT
3226                          (vselect_mask _.KRCWM:$mask,
3227                           (_.VT (ld_frag addr:$src1)),
3228                            (_.VT _.RC:$src0))))], _.ExeDomain>,
3229                      EVEX, EVEX_K, Sched<[Sched.RM]>;
3230   }
3231   def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3232                   (ins _.KRCWM:$mask, _.MemOp:$src),
3233                   OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3234                                 "${dst} {${mask}} {z}, $src}",
3235                   [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3236                     (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3237                   _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3238   }
3239   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3240             (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3242   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3243             (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3245   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3246             (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3247              _.KRCWM:$mask, addr:$ptr)>;
3250 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3251                                  AVX512VLVectorVTInfo _, Predicate prd,
3252                                  X86SchedWriteMoveLSWidths Sched,
3253                                  bit NoRMPattern = 0> {
3254   let Predicates = [prd] in
3255   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3256                        _.info512.AlignedLdFrag, masked_load_aligned,
3257                        Sched.ZMM, NoRMPattern>, EVEX_V512;
3259   let Predicates = [prd, HasVLX] in {
3260   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3261                           _.info256.AlignedLdFrag, masked_load_aligned,
3262                           Sched.YMM, NoRMPattern>, EVEX_V256;
3263   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3264                           _.info128.AlignedLdFrag, masked_load_aligned,
3265                           Sched.XMM, NoRMPattern>, EVEX_V128;
3266   }
3269 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3270                           AVX512VLVectorVTInfo _, Predicate prd,
3271                           X86SchedWriteMoveLSWidths Sched,
3272                           bit NoRMPattern = 0,
3273                           SDPatternOperator SelectOprr = vselect> {
3274   let Predicates = [prd] in
3275   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3276                        masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512;
3278   let Predicates = [prd, HasVLX] in {
3279   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3280                          masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256;
3281   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3282                          masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128;
3283   }
3286 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3287                         X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3288                         X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> {
3289   let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3290   let isMoveReg = 1 in
3291   def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3292                          OpcodeStr # "\t{$src, $dst|$dst, $src}",
3293                          [], _.ExeDomain>, EVEX,
3294                          Sched<[Sched.RR]>;
3295   def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3296                          (ins _.KRCWM:$mask, _.RC:$src),
3297                          OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3298                          "${dst} {${mask}}, $src}",
3299                          [], _.ExeDomain>,  EVEX, EVEX_K,
3300                          Sched<[Sched.RR]>;
3301   def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3302                           (ins _.KRCWM:$mask, _.RC:$src),
3303                           OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3304                           "${dst} {${mask}} {z}, $src}",
3305                           [], _.ExeDomain>, EVEX, EVEX_KZ,
3306                           Sched<[Sched.RR]>;
3307   }
3309   let hasSideEffects = 0, mayStore = 1 in
3310   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3311                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3312                     !if(NoMRPattern, [],
3313                         [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3314                     _.ExeDomain>, EVEX, Sched<[Sched.MR]>;
3315   def mrk : AVX512PI<opc, MRMDestMem, (outs),
3316                      (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3317               OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3318                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>;
3320   def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3321            (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3322                                                         _.KRCWM:$mask, _.RC:$src)>;
3324   def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3325                   (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3326                    _.RC:$dst, _.RC:$src), 0>;
3327   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3328                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3329                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3330   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3331                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3332                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3335 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3336                             AVX512VLVectorVTInfo _, Predicate prd,
3337                             X86SchedWriteMoveLSWidths Sched,
3338                             bit NoMRPattern = 0> {
3339   let Predicates = [prd] in
3340   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3341                         masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512;
3342   let Predicates = [prd, HasVLX] in {
3343     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3344                              masked_store, Sched.YMM, NoMRPattern>, EVEX_V256;
3345     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3346                              masked_store, Sched.XMM, NoMRPattern>, EVEX_V128;
3347   }
3350 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3351                                   AVX512VLVectorVTInfo _, Predicate prd,
3352                                   X86SchedWriteMoveLSWidths Sched,
3353                                   bit NoMRPattern = 0> {
3354   let Predicates = [prd] in
3355   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3356                         masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512;
3358   let Predicates = [prd, HasVLX] in {
3359     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3360                              masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256;
3361     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3362                              masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128;
3363   }
3366 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3367                                      HasAVX512, SchedWriteFMoveLS>,
3368                avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3369                                       HasAVX512, SchedWriteFMoveLS>,
3370                TB, EVEX_CD8<32, CD8VF>;
3372 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3373                                      HasAVX512, SchedWriteFMoveLS>,
3374                avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3375                                       HasAVX512, SchedWriteFMoveLS>,
3376                TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3378 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3379                               SchedWriteFMoveLS, 0, null_frag>,
3380                avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3381                                SchedWriteFMoveLS>,
3382                                TB, EVEX_CD8<32, CD8VF>;
3384 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3385                               SchedWriteFMoveLS, 0, null_frag>,
3386                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3387                                SchedWriteFMoveLS>,
3388                TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3390 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3391                                        HasAVX512, SchedWriteVecMoveLS, 1>,
3392                  avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3393                                         HasAVX512, SchedWriteVecMoveLS, 1>,
3394                  TB, PD, EVEX_CD8<32, CD8VF>;
3396 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3397                                        HasAVX512, SchedWriteVecMoveLS>,
3398                  avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3399                                         HasAVX512, SchedWriteVecMoveLS>,
3400                  TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3402 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3403                                SchedWriteVecMoveLS, 1>,
3404                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3405                                 SchedWriteVecMoveLS, 1>,
3406                 TB, XD, EVEX_CD8<8, CD8VF>;
3408 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3409                                 SchedWriteVecMoveLS, 1>,
3410                  avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3411                                  SchedWriteVecMoveLS, 1>,
3412                  TB, XD, REX_W, EVEX_CD8<16, CD8VF>;
3414 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3415                                 SchedWriteVecMoveLS, 1, null_frag>,
3416                  avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3417                                  SchedWriteVecMoveLS, 1>,
3418                  TB, XS, EVEX_CD8<32, CD8VF>;
3420 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3421                                 SchedWriteVecMoveLS, 0, null_frag>,
3422                  avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3423                                  SchedWriteVecMoveLS>,
3424                  TB, XS, REX_W, EVEX_CD8<64, CD8VF>;
3426 // Special instructions to help with spilling when we don't have VLX. We need
3427 // to load or store from a ZMM register instead. These are converted in
3428 // expandPostRAPseudos.
3429 let isReMaterializable = 1, canFoldAsLoad = 1,
3430     isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3431 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3432                             "", []>, Sched<[WriteFLoadX]>;
3433 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3434                             "", []>, Sched<[WriteFLoadY]>;
3435 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3436                             "", []>, Sched<[WriteFLoadX]>;
3437 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3438                             "", []>, Sched<[WriteFLoadY]>;
3441 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3442 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3443                             "", []>, Sched<[WriteFStoreX]>;
3444 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3445                             "", []>, Sched<[WriteFStoreY]>;
3446 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3447                             "", []>, Sched<[WriteFStoreX]>;
3448 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3449                             "", []>, Sched<[WriteFStoreY]>;
3452 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3453                           (v8i64 VR512:$src))),
3454    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3455                                               VK8), VR512:$src)>;
3457 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3458                            (v16i32 VR512:$src))),
3459                   (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3461 // These patterns exist to prevent the above patterns from introducing a second
3462 // mask inversion when one already exists.
3463 def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3464                           (v8i64 immAllZerosV),
3465                           (v8i64 VR512:$src))),
3466                  (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3467 def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3468                            (v16i32 immAllZerosV),
3469                            (v16i32 VR512:$src))),
3470                   (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3472 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3473                               X86VectorVTInfo Wide> {
3474  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3475                                Narrow.RC:$src1, Narrow.RC:$src0)),
3476            (EXTRACT_SUBREG
3477             (Wide.VT
3478              (!cast<Instruction>(InstrStr#"rrk")
3479               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3480               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3481               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3482             Narrow.SubRegIdx)>;
3484  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3485                                Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3486            (EXTRACT_SUBREG
3487             (Wide.VT
3488              (!cast<Instruction>(InstrStr#"rrkz")
3489               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3490               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3491             Narrow.SubRegIdx)>;
3494 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3495 // available. Use a 512-bit operation and extract.
3496 let Predicates = [HasAVX512, NoVLX] in {
3497   defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3498   defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3499   defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3500   defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3502   defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3503   defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3504   defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3505   defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3508 let Predicates = [HasBWI, NoVLX] in {
3509   defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3510   defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3512   defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3513   defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3515   defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
3516   defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
3518   defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
3519   defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
3522 let Predicates = [HasAVX512] in {
3523   // 512-bit load.
3524   def : Pat<(alignedloadv16i32 addr:$src),
3525             (VMOVDQA64Zrm addr:$src)>;
3526   def : Pat<(alignedloadv32i16 addr:$src),
3527             (VMOVDQA64Zrm addr:$src)>;
3528   def : Pat<(alignedloadv32f16 addr:$src),
3529             (VMOVAPSZrm addr:$src)>;
3530   def : Pat<(alignedloadv32bf16 addr:$src),
3531             (VMOVAPSZrm addr:$src)>;
3532   def : Pat<(alignedloadv64i8 addr:$src),
3533             (VMOVDQA64Zrm addr:$src)>;
3534   def : Pat<(loadv16i32 addr:$src),
3535             (VMOVDQU64Zrm addr:$src)>;
3536   def : Pat<(loadv32i16 addr:$src),
3537             (VMOVDQU64Zrm addr:$src)>;
3538   def : Pat<(loadv32f16 addr:$src),
3539             (VMOVUPSZrm addr:$src)>;
3540   def : Pat<(loadv32bf16 addr:$src),
3541             (VMOVUPSZrm addr:$src)>;
3542   def : Pat<(loadv64i8 addr:$src),
3543             (VMOVDQU64Zrm addr:$src)>;
3545   // 512-bit store.
3546   def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3547             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3548   def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3549             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3550   def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3551             (VMOVAPSZmr addr:$dst, VR512:$src)>;
3552   def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
3553             (VMOVAPSZmr addr:$dst, VR512:$src)>;
3554   def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3555             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3556   def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3557             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3558   def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3559             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3560   def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3561             (VMOVUPSZmr addr:$dst, VR512:$src)>;
3562   def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
3563             (VMOVUPSZmr addr:$dst, VR512:$src)>;
3564   def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3565             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3568 let Predicates = [HasVLX] in {
3569   // 128-bit load.
3570   def : Pat<(alignedloadv4i32 addr:$src),
3571             (VMOVDQA64Z128rm addr:$src)>;
3572   def : Pat<(alignedloadv8i16 addr:$src),
3573             (VMOVDQA64Z128rm addr:$src)>;
3574   def : Pat<(alignedloadv8f16 addr:$src),
3575             (VMOVAPSZ128rm addr:$src)>;
3576   def : Pat<(alignedloadv8bf16 addr:$src),
3577             (VMOVAPSZ128rm addr:$src)>;
3578   def : Pat<(alignedloadv16i8 addr:$src),
3579             (VMOVDQA64Z128rm addr:$src)>;
3580   def : Pat<(loadv4i32 addr:$src),
3581             (VMOVDQU64Z128rm addr:$src)>;
3582   def : Pat<(loadv8i16 addr:$src),
3583             (VMOVDQU64Z128rm addr:$src)>;
3584   def : Pat<(loadv8f16 addr:$src),
3585             (VMOVUPSZ128rm addr:$src)>;
3586   def : Pat<(loadv8bf16 addr:$src),
3587             (VMOVUPSZ128rm addr:$src)>;
3588   def : Pat<(loadv16i8 addr:$src),
3589             (VMOVDQU64Z128rm addr:$src)>;
3591   // 128-bit store.
3592   def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3593             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3594   def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3595             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3596   def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3597             (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3598   def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
3599             (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3600   def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3601             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3602   def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3603             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3604   def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3605             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3606   def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3607             (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3608   def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
3609             (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3610   def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3611             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3613   // 256-bit load.
3614   def : Pat<(alignedloadv8i32 addr:$src),
3615             (VMOVDQA64Z256rm addr:$src)>;
3616   def : Pat<(alignedloadv16i16 addr:$src),
3617             (VMOVDQA64Z256rm addr:$src)>;
3618   def : Pat<(alignedloadv16f16 addr:$src),
3619             (VMOVAPSZ256rm addr:$src)>;
3620   def : Pat<(alignedloadv16bf16 addr:$src),
3621             (VMOVAPSZ256rm addr:$src)>;
3622   def : Pat<(alignedloadv32i8 addr:$src),
3623             (VMOVDQA64Z256rm addr:$src)>;
3624   def : Pat<(loadv8i32 addr:$src),
3625             (VMOVDQU64Z256rm addr:$src)>;
3626   def : Pat<(loadv16i16 addr:$src),
3627             (VMOVDQU64Z256rm addr:$src)>;
3628   def : Pat<(loadv16f16 addr:$src),
3629             (VMOVUPSZ256rm addr:$src)>;
3630   def : Pat<(loadv16bf16 addr:$src),
3631             (VMOVUPSZ256rm addr:$src)>;
3632   def : Pat<(loadv32i8 addr:$src),
3633             (VMOVDQU64Z256rm addr:$src)>;
3635   // 256-bit store.
3636   def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3637             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3638   def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3639             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3640   def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3641             (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3642   def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
3643             (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3644   def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3645             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3646   def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3647             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3648   def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3649             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3650   def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3651             (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3652   def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
3653             (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3654   def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3655             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3658 multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
3659 let Predicates = [HasBWI] in {
3660   def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
3661             (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3662   def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
3663             (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3664   def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3665                      (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3666             (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3667   def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3668                      (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
3669             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3670   def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3671                      (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3672             (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3673   def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3674                      (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
3675             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3676   def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
3677             (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3678   def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
3679             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3680   def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
3681             (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3683   def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
3684             (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3686 let Predicates = [HasBWI, HasVLX] in {
3687   def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
3688             (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3689   def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
3690             (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3691   def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3692                      (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3693             (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3694   def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3695                      (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
3696             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3697   def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3698                      (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3699             (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3700   def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3701                      (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
3702             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3703   def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
3704             (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3705   def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
3706             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3707   def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
3708             (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3710   def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
3711             (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3713   def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
3714             (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3715   def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
3716             (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3717   def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3718                      (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3719             (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3720   def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3721                      (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
3722             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3723   def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3724                      (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3725             (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3726   def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3727                      (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
3728             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3729   def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
3730             (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3731   def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
3732             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3733   def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
3734             (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3736   def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
3737             (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3741 defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
3742 defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;
3744 // Move Int Doubleword to Packed Double Int
3746 let ExeDomain = SSEPackedInt in {
3747 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3748                       "vmovd\t{$src, $dst|$dst, $src}",
3749                       [(set VR128X:$dst,
3750                         (v4i32 (scalar_to_vector GR32:$src)))]>,
3751                         EVEX, Sched<[WriteVecMoveFromGpr]>;
3752 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3753                       "vmovd\t{$src, $dst|$dst, $src}",
3754                       [(set VR128X:$dst,
3755                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3756                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3757 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3758                       "vmovq\t{$src, $dst|$dst, $src}",
3759                         [(set VR128X:$dst,
3760                           (v2i64 (scalar_to_vector GR64:$src)))]>,
3761                       EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3762 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3763 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3764                       (ins i64mem:$src),
3765                       "vmovq\t{$src, $dst|$dst, $src}", []>,
3766                       EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3767 let isCodeGenOnly = 1 in {
3768 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3769                        "vmovq\t{$src, $dst|$dst, $src}",
3770                        [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3771                        EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3772 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3773                          "vmovq\t{$src, $dst|$dst, $src}",
3774                          [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3775                          EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3777 } // ExeDomain = SSEPackedInt
3779 // Move Int Doubleword to Single Scalar
3781 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3782 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3783                       "vmovd\t{$src, $dst|$dst, $src}",
3784                       [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3785                       EVEX, Sched<[WriteVecMoveFromGpr]>;
3786 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3788 // Move doubleword from xmm register to r/m32
3790 let ExeDomain = SSEPackedInt in {
3791 def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3792                        "vmovd\t{$src, $dst|$dst, $src}",
3793                        [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3794                                         (iPTR 0)))]>,
3795                        EVEX, Sched<[WriteVecMoveToGpr]>;
3796 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3797                        (ins i32mem:$dst, VR128X:$src),
3798                        "vmovd\t{$src, $dst|$dst, $src}",
3799                        [(store (i32 (extractelt (v4i32 VR128X:$src),
3800                                      (iPTR 0))), addr:$dst)]>,
3801                        EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3802 } // ExeDomain = SSEPackedInt
3804 // Move quadword from xmm1 register to r/m64
3806 let ExeDomain = SSEPackedInt in {
3807 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3808                       "vmovq\t{$src, $dst|$dst, $src}",
3809                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3810                                                    (iPTR 0)))]>,
3811                       TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
3812                       Requires<[HasAVX512]>;
3814 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3815 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3816                       "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD,
3817                       EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>,
3818                       Requires<[HasAVX512, In64BitMode]>;
3820 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3821                       (ins i64mem:$dst, VR128X:$src),
3822                       "vmovq\t{$src, $dst|$dst, $src}",
3823                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3824                               addr:$dst)]>,
3825                       EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>,
3826                       Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3828 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3829 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3830                              (ins VR128X:$src),
3831                              "vmovq\t{$src, $dst|$dst, $src}", []>,
3832                              EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>;
3833 } // ExeDomain = SSEPackedInt
3835 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3836                 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3838 let Predicates = [HasAVX512] in {
3839   def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3840             (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3843 // Move Scalar Single to Double Int
3845 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3846 def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3847                       (ins FR32X:$src),
3848                       "vmovd\t{$src, $dst|$dst, $src}",
3849                       [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3850                       EVEX, Sched<[WriteVecMoveToGpr]>;
3851 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3853 // Move Quadword Int to Packed Quadword Int
3855 let ExeDomain = SSEPackedInt in {
3856 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3857                       (ins i64mem:$src),
3858                       "vmovq\t{$src, $dst|$dst, $src}",
3859                       [(set VR128X:$dst,
3860                         (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3861                       EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3862 } // ExeDomain = SSEPackedInt
3864 // Allow "vmovd" but print "vmovq".
3865 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3866                 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3867 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3868                 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3870 // Conversions between masks and scalar fp.
3871 def : Pat<(v32i1 (bitconvert FR32X:$src)),
3872           (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3873 def : Pat<(f32 (bitconvert VK32:$src)),
3874           (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3876 def : Pat<(v64i1 (bitconvert FR64X:$src)),
3877           (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3878 def : Pat<(f64 (bitconvert VK64:$src)),
3879           (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3881 //===----------------------------------------------------------------------===//
3882 // AVX-512  MOVSH, MOVSS, MOVSD
3883 //===----------------------------------------------------------------------===//
3885 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3886                               X86VectorVTInfo _, Predicate prd = HasAVX512> {
3887   let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
3888   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3889              (ins _.RC:$src1, _.RC:$src2),
3890              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3891              [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3892              _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
3893   let Predicates = [prd] in {
3894   def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3895               (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3896               !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3897               "$dst {${mask}} {z}, $src1, $src2}"),
3898               [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3899                                       (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3900                                       _.ImmAllZerosV)))],
3901               _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3902   let Constraints = "$src0 = $dst"  in
3903   def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3904              (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3905              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3906              "$dst {${mask}}, $src1, $src2}"),
3907              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3908                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3909                                      (_.VT _.RC:$src0))))],
3910              _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3911   let canFoldAsLoad = 1, isReMaterializable = 1 in {
3912   def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3913              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3914              [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3915              _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3916   // _alt version uses FR32/FR64 register class.
3917   let isCodeGenOnly = 1 in
3918   def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3919                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3920                  [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3921                  _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3922   }
3923   let mayLoad = 1, hasSideEffects = 0 in {
3924     let Constraints = "$src0 = $dst" in
3925     def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3926                (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3927                !strconcat(asm, "\t{$src, $dst {${mask}}|",
3928                "$dst {${mask}}, $src}"),
3929                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3930     def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3931                (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3932                !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3933                "$dst {${mask}} {z}, $src}"),
3934                [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3935   }
3936   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3937              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3938              [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3939              EVEX, Sched<[WriteFStore]>;
3940   let mayStore = 1, hasSideEffects = 0 in
3941   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3942               (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3943               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3944               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>;
3945   }
3948 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3949                                   VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>;
3951 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3952                                   VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
3954 defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
3955                                   HasFP16>,
3956                                   VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
3958 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3959                                        PatLeaf ZeroFP, X86VectorVTInfo _> {
3961 def : Pat<(_.VT (OpNode _.RC:$src0,
3962                         (_.VT (scalar_to_vector
3963                                   (_.EltVT (X86selects VK1WM:$mask,
3964                                                        (_.EltVT _.FRC:$src1),
3965                                                        (_.EltVT _.FRC:$src2))))))),
3966           (!cast<Instruction>(InstrStr#rrk)
3967                         (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3968                         VK1WM:$mask,
3969                         (_.VT _.RC:$src0),
3970                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3972 def : Pat<(_.VT (OpNode _.RC:$src0,
3973                         (_.VT (scalar_to_vector
3974                                   (_.EltVT (X86selects VK1WM:$mask,
3975                                                        (_.EltVT _.FRC:$src1),
3976                                                        (_.EltVT ZeroFP))))))),
3977           (!cast<Instruction>(InstrStr#rrkz)
3978                         VK1WM:$mask,
3979                         (_.VT _.RC:$src0),
3980                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3983 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3984                                         dag Mask, RegisterClass MaskRC> {
3986 def : Pat<(masked_store
3987              (_.info512.VT (insert_subvector undef,
3988                                (_.info128.VT _.info128.RC:$src),
3989                                (iPTR 0))), addr:$dst, Mask),
3990           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3991                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3992                       _.info128.RC:$src)>;
3996 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3997                                                AVX512VLVectorVTInfo _,
3998                                                dag Mask, RegisterClass MaskRC,
3999                                                SubRegIndex subreg> {
4001 def : Pat<(masked_store
4002              (_.info512.VT (insert_subvector undef,
4003                                (_.info128.VT _.info128.RC:$src),
4004                                (iPTR 0))), addr:$dst, Mask),
4005           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4006                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4007                       _.info128.RC:$src)>;
4011 // This matches the more recent codegen from clang that avoids emitting a 512
4012 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4013 // bits on AVX512F only targets.
4014 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4015                                                AVX512VLVectorVTInfo _,
4016                                                dag Mask512, dag Mask128,
4017                                                RegisterClass MaskRC,
4018                                                SubRegIndex subreg> {
4020 // AVX512F pattern.
4021 def : Pat<(masked_store
4022              (_.info512.VT (insert_subvector undef,
4023                                (_.info128.VT _.info128.RC:$src),
4024                                (iPTR 0))), addr:$dst, Mask512),
4025           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4026                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4027                       _.info128.RC:$src)>;
4029 // AVX512VL pattern.
4030 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4031           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4032                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4033                       _.info128.RC:$src)>;
4036 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4037                                        dag Mask, RegisterClass MaskRC> {
4039 def : Pat<(_.info128.VT (extract_subvector
4040                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4041                                         _.info512.ImmAllZerosV)),
4042                            (iPTR 0))),
4043           (!cast<Instruction>(InstrStr#rmkz)
4044                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4045                       addr:$srcAddr)>;
4047 def : Pat<(_.info128.VT (extract_subvector
4048                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4049                       (_.info512.VT (insert_subvector undef,
4050                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4051                             (iPTR 0))))),
4052                 (iPTR 0))),
4053           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4054                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4055                       addr:$srcAddr)>;
4059 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4060                                               AVX512VLVectorVTInfo _,
4061                                               dag Mask, RegisterClass MaskRC,
4062                                               SubRegIndex subreg> {
4064 def : Pat<(_.info128.VT (extract_subvector
4065                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4066                                         _.info512.ImmAllZerosV)),
4067                            (iPTR 0))),
4068           (!cast<Instruction>(InstrStr#rmkz)
4069                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4070                       addr:$srcAddr)>;
4072 def : Pat<(_.info128.VT (extract_subvector
4073                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4074                       (_.info512.VT (insert_subvector undef,
4075                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4076                             (iPTR 0))))),
4077                 (iPTR 0))),
4078           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4079                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4080                       addr:$srcAddr)>;
4084 // This matches the more recent codegen from clang that avoids emitting a 512
4085 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4086 // bits on AVX512F only targets.
4087 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4088                                               AVX512VLVectorVTInfo _,
4089                                               dag Mask512, dag Mask128,
4090                                               RegisterClass MaskRC,
4091                                               SubRegIndex subreg> {
4092 // AVX512F patterns.
4093 def : Pat<(_.info128.VT (extract_subvector
4094                          (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4095                                         _.info512.ImmAllZerosV)),
4096                            (iPTR 0))),
4097           (!cast<Instruction>(InstrStr#rmkz)
4098                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4099                       addr:$srcAddr)>;
4101 def : Pat<(_.info128.VT (extract_subvector
4102                 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4103                       (_.info512.VT (insert_subvector undef,
4104                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4105                             (iPTR 0))))),
4106                 (iPTR 0))),
4107           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4108                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4109                       addr:$srcAddr)>;
4111 // AVX512Vl patterns.
4112 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4113                          _.info128.ImmAllZerosV)),
4114           (!cast<Instruction>(InstrStr#rmkz)
4115                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4116                       addr:$srcAddr)>;
4118 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4119                          (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4120           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4121                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4122                       addr:$srcAddr)>;
4125 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4126 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4128 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4129                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4130 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4131                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4132 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4133                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4135 let Predicates = [HasFP16] in {
4136 defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4137 defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4138                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4139 defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4140                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4141 defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4142                    (v32i1 (insert_subvector
4143                            (v32i1 immAllZerosV),
4144                            (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4145                            (iPTR 0))),
4146                    (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4147                    GR8, sub_8bit>;
4149 defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4150                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4151 defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4152                    (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4153 defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4154                    (v32i1 (insert_subvector
4155                            (v32i1 immAllZerosV),
4156                            (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4157                            (iPTR 0))),
4158                    (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4159                    GR8, sub_8bit>;
4161 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4162           (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4163            (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4164            VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4165            (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4167 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4168           (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4169            (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4172 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4173                    (v16i1 (insert_subvector
4174                            (v16i1 immAllZerosV),
4175                            (v4i1 (extract_subvector
4176                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4177                                   (iPTR 0))),
4178                            (iPTR 0))),
4179                    (v4i1 (extract_subvector
4180                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4181                           (iPTR 0))), GR8, sub_8bit>;
4182 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4183                    (v8i1
4184                     (extract_subvector
4185                      (v16i1
4186                       (insert_subvector
4187                        (v16i1 immAllZerosV),
4188                        (v2i1 (extract_subvector
4189                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4190                               (iPTR 0))),
4191                        (iPTR 0))),
4192                      (iPTR 0))),
4193                    (v2i1 (extract_subvector
4194                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4195                           (iPTR 0))), GR8, sub_8bit>;
4197 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4198                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4199 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4200                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4201 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4202                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4204 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4205                    (v16i1 (insert_subvector
4206                            (v16i1 immAllZerosV),
4207                            (v4i1 (extract_subvector
4208                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4209                                   (iPTR 0))),
4210                            (iPTR 0))),
4211                    (v4i1 (extract_subvector
4212                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4213                           (iPTR 0))), GR8, sub_8bit>;
4214 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4215                    (v8i1
4216                     (extract_subvector
4217                      (v16i1
4218                       (insert_subvector
4219                        (v16i1 immAllZerosV),
4220                        (v2i1 (extract_subvector
4221                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4222                               (iPTR 0))),
4223                        (iPTR 0))),
4224                      (iPTR 0))),
4225                    (v2i1 (extract_subvector
4226                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4227                           (iPTR 0))), GR8, sub_8bit>;
4229 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4230           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4231            (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4232            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4233            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4235 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4236           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4237            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4239 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4240           (COPY_TO_REGCLASS
4241            (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4242                                                        VK1WM:$mask, addr:$src)),
4243            FR32X)>;
4244 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4245           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4247 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4248           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4249            (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4250            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4251            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4253 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4254           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4255            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4257 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4258           (COPY_TO_REGCLASS
4259            (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4260                                                        VK1WM:$mask, addr:$src)),
4261            FR64X)>;
4262 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4263           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4266 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4267           (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4268 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4269           (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4271 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4272           (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4273 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4274           (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4276 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4277   let Predicates = [HasFP16] in {
4278     def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4279         (ins VR128X:$src1, VR128X:$src2),
4280         "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4281         []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4282         Sched<[SchedWriteFShuffle.XMM]>;
4284     let Constraints = "$src0 = $dst" in
4285     def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4286         (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4287          VR128X:$src1, VR128X:$src2),
4288         "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4289           "$dst {${mask}}, $src1, $src2}",
4290         []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG,
4291         Sched<[SchedWriteFShuffle.XMM]>;
4293     def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4294         (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4295         "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4296           "$dst {${mask}} {z}, $src1, $src2}",
4297         []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4298         Sched<[SchedWriteFShuffle.XMM]>;
4299   }
4300   def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4301                            (ins VR128X:$src1, VR128X:$src2),
4302                            "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4303                            []>, TB, XS, EVEX, VVVV, VEX_LIG,
4304                            Sched<[SchedWriteFShuffle.XMM]>;
4306   let Constraints = "$src0 = $dst" in
4307   def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4308                              (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4309                                                    VR128X:$src1, VR128X:$src2),
4310                              "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4311                                         "$dst {${mask}}, $src1, $src2}",
4312                              []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG,
4313                              Sched<[SchedWriteFShuffle.XMM]>;
4315   def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4316                          (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4317                          "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4318                                     "$dst {${mask}} {z}, $src1, $src2}",
4319                          []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG,
4320                          Sched<[SchedWriteFShuffle.XMM]>;
4322   def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4323                            (ins VR128X:$src1, VR128X:$src2),
4324                            "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4325                            []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W,
4326                            Sched<[SchedWriteFShuffle.XMM]>;
4328   let Constraints = "$src0 = $dst" in
4329   def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4330                              (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4331                                                    VR128X:$src1, VR128X:$src2),
4332                              "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4333                                         "$dst {${mask}}, $src1, $src2}",
4334                              []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG,
4335                              REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4337   def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4338                               (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4339                                                           VR128X:$src2),
4340                               "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4341                                          "$dst {${mask}} {z}, $src1, $src2}",
4342                               []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG,
4343                               REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4346 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4347                 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4348 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4349                              "$dst {${mask}}, $src1, $src2}",
4350                 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4351                                 VR128X:$src1, VR128X:$src2), 0>;
4352 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4353                              "$dst {${mask}} {z}, $src1, $src2}",
4354                 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4355                                  VR128X:$src1, VR128X:$src2), 0>;
4356 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4357                 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4358 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4359                              "$dst {${mask}}, $src1, $src2}",
4360                 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4361                                 VR128X:$src1, VR128X:$src2), 0>;
4362 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4363                              "$dst {${mask}} {z}, $src1, $src2}",
4364                 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4365                                  VR128X:$src1, VR128X:$src2), 0>;
4366 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4367                 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4368 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4369                              "$dst {${mask}}, $src1, $src2}",
4370                 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4371                                 VR128X:$src1, VR128X:$src2), 0>;
4372 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4373                              "$dst {${mask}} {z}, $src1, $src2}",
4374                 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4375                                  VR128X:$src1, VR128X:$src2), 0>;
4377 let Predicates = [HasAVX512, OptForSize] in {
4378   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4379             (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4380   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4381             (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4383   // Move low f32 and clear high bits.
4384   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4385             (SUBREG_TO_REG (i32 0),
4386              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4387               (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4388   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4389             (SUBREG_TO_REG (i32 0),
4390              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4391               (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4393   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4394             (SUBREG_TO_REG (i32 0),
4395              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4396               (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4397   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4398             (SUBREG_TO_REG (i32 0),
4399              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4400               (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4403 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4404 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4405 let Predicates = [HasAVX512, OptForSpeed] in {
4406   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4407             (SUBREG_TO_REG (i32 0),
4408              (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4409                           (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4410                           (i8 1))), sub_xmm)>;
4411   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4412             (SUBREG_TO_REG (i32 0),
4413              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4414                           (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4415                           (i8 3))), sub_xmm)>;
4418 let Predicates = [HasAVX512] in {
4419   def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4420             (VMOVSSZrm addr:$src)>;
4421   def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4422             (VMOVSDZrm addr:$src)>;
4424   // Represent the same patterns above but in the form they appear for
4425   // 256-bit types
4426   def : Pat<(v8f32 (X86vzload32 addr:$src)),
4427             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4428   def : Pat<(v4f64 (X86vzload64 addr:$src)),
4429             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4431   // Represent the same patterns above but in the form they appear for
4432   // 512-bit types
4433   def : Pat<(v16f32 (X86vzload32 addr:$src)),
4434             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4435   def : Pat<(v8f64 (X86vzload64 addr:$src)),
4436             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4438 let Predicates = [HasFP16] in {
4439   def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4440             (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4441   def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
4442             (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;
4444   // FIXME we need better canonicalization in dag combine
4445   def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4446             (SUBREG_TO_REG (i32 0),
4447              (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4448               (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4449   def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
4450             (SUBREG_TO_REG (i32 0),
4451              (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4452               (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4454   // FIXME we need better canonicalization in dag combine
4455   def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4456             (SUBREG_TO_REG (i32 0),
4457              (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4458               (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4459   def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
4460             (SUBREG_TO_REG (i32 0),
4461              (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4462               (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;
4464   def : Pat<(v8f16 (X86vzload16 addr:$src)),
4465             (VMOVSHZrm addr:$src)>;
4467   def : Pat<(v16f16 (X86vzload16 addr:$src)),
4468             (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4470   def : Pat<(v32f16 (X86vzload16 addr:$src)),
4471             (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4474 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4475 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4476                                 (ins VR128X:$src),
4477                                 "vmovq\t{$src, $dst|$dst, $src}",
4478                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
4479                                                    (v2i64 VR128X:$src))))]>,
4480                                 EVEX, REX_W;
4483 let Predicates = [HasAVX512] in {
4484   def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
4485             (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
4486                                               GR8:$src, sub_8bit)))>;
4487   def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4488             (VMOVDI2PDIZrr GR32:$src)>;
4490   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4491             (VMOV64toPQIZrr GR64:$src)>;
4493   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4494   def : Pat<(v4i32 (X86vzload32 addr:$src)),
4495             (VMOVDI2PDIZrm addr:$src)>;
4496   def : Pat<(v8i32 (X86vzload32 addr:$src)),
4497             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4498   def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4499             (VMOVZPQILo2PQIZrr VR128X:$src)>;
4500   def : Pat<(v2i64 (X86vzload64 addr:$src)),
4501             (VMOVQI2PQIZrm addr:$src)>;
4502   def : Pat<(v4i64 (X86vzload64 addr:$src)),
4503             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4505   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4506   def : Pat<(v16i32 (X86vzload32 addr:$src)),
4507             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4508   def : Pat<(v8i64 (X86vzload64 addr:$src)),
4509             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4511   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4512             (SUBREG_TO_REG (i32 0),
4513              (v2f64 (VMOVZPQILo2PQIZrr
4514                      (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4515              sub_xmm)>;
4516   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4517             (SUBREG_TO_REG (i32 0),
4518              (v2i64 (VMOVZPQILo2PQIZrr
4519                      (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4520              sub_xmm)>;
4522   def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4523             (SUBREG_TO_REG (i32 0),
4524              (v2f64 (VMOVZPQILo2PQIZrr
4525                      (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4526              sub_xmm)>;
4527   def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4528             (SUBREG_TO_REG (i32 0),
4529              (v2i64 (VMOVZPQILo2PQIZrr
4530                      (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4531              sub_xmm)>;
4534 //===----------------------------------------------------------------------===//
4535 // AVX-512 - Non-temporals
4536 //===----------------------------------------------------------------------===//
4538 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4539                       (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4540                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4541                       EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4543 let Predicates = [HasVLX] in {
4544   def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4545                        (ins i256mem:$src),
4546                        "vmovntdqa\t{$src, $dst|$dst, $src}",
4547                        [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4548                        EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4550   def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4551                       (ins i128mem:$src),
4552                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4553                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4554                       EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4557 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4558                         X86SchedWriteMoveLS Sched,
4559                         PatFrag st_frag = alignednontemporalstore> {
4560   let SchedRW = [Sched.MR], AddedComplexity = 400 in
4561   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4562                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4563                     [(st_frag (_.VT _.RC:$src), addr:$dst)],
4564                     _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4567 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4568                            AVX512VLVectorVTInfo VTInfo,
4569                            X86SchedWriteMoveLSWidths Sched> {
4570   let Predicates = [HasAVX512] in
4571     defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4573   let Predicates = [HasAVX512, HasVLX] in {
4574     defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4575     defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4576   }
4579 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4580                                 SchedWriteVecMoveLSNT>, TB, PD;
4581 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4582                                 SchedWriteFMoveLSNT>, TB, PD, REX_W;
4583 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4584                                 SchedWriteFMoveLSNT>, TB;
4586 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4587   def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4588             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4589   def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4590             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4591   def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4592             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4594   def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4595             (VMOVNTDQAZrm addr:$src)>;
4596   def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4597             (VMOVNTDQAZrm addr:$src)>;
4598   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4599             (VMOVNTDQAZrm addr:$src)>;
4600   def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4601             (VMOVNTDQAZrm addr:$src)>;
4602   def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4603             (VMOVNTDQAZrm addr:$src)>;
4604   def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4605             (VMOVNTDQAZrm addr:$src)>;
4608 let Predicates = [HasVLX], AddedComplexity = 400 in {
4609   def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4610             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4611   def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4612             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4613   def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4614             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4616   def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4617             (VMOVNTDQAZ256rm addr:$src)>;
4618   def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4619             (VMOVNTDQAZ256rm addr:$src)>;
4620   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4621             (VMOVNTDQAZ256rm addr:$src)>;
4622   def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4623             (VMOVNTDQAZ256rm addr:$src)>;
4624   def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4625             (VMOVNTDQAZ256rm addr:$src)>;
4626   def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4627             (VMOVNTDQAZ256rm addr:$src)>;
4629   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4630             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4631   def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4632             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4633   def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4634             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4636   def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4637             (VMOVNTDQAZ128rm addr:$src)>;
4638   def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4639             (VMOVNTDQAZ128rm addr:$src)>;
4640   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4641             (VMOVNTDQAZ128rm addr:$src)>;
4642   def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4643             (VMOVNTDQAZ128rm addr:$src)>;
4644   def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4645             (VMOVNTDQAZ128rm addr:$src)>;
4646   def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4647             (VMOVNTDQAZ128rm addr:$src)>;
4650 //===----------------------------------------------------------------------===//
4651 // AVX-512 - Integer arithmetic
4653 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4654                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4655                            bit IsCommutable = 0> {
4656   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4657                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4658                     "$src2, $src1", "$src1, $src2",
4659                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4660                     IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV,
4661                     Sched<[sched]>;
4663   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4664                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4665                   "$src2, $src1", "$src1, $src2",
4666                   (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4667                   AVX512BIBase, EVEX, VVVV,
4668                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4671 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4672                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
4673                             bit IsCommutable = 0> :
4674            avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4675   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4676                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4677                   "${src2}"#_.BroadcastStr#", $src1",
4678                   "$src1, ${src2}"#_.BroadcastStr,
4679                   (_.VT (OpNode _.RC:$src1,
4680                                 (_.BroadcastLdFrag addr:$src2)))>,
4681                   AVX512BIBase, EVEX, VVVV, EVEX_B,
4682                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4685 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4686                               AVX512VLVectorVTInfo VTInfo,
4687                               X86SchedWriteWidths sched, Predicate prd,
4688                               bit IsCommutable = 0> {
4689   let Predicates = [prd] in
4690     defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4691                              IsCommutable>, EVEX_V512;
4693   let Predicates = [prd, HasVLX] in {
4694     defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4695                                 sched.YMM, IsCommutable>, EVEX_V256;
4696     defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4697                                 sched.XMM, IsCommutable>, EVEX_V128;
4698   }
4701 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4702                                AVX512VLVectorVTInfo VTInfo,
4703                                X86SchedWriteWidths sched, Predicate prd,
4704                                bit IsCommutable = 0> {
4705   let Predicates = [prd] in
4706     defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4707                              IsCommutable>, EVEX_V512;
4709   let Predicates = [prd, HasVLX] in {
4710     defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4711                                  sched.YMM, IsCommutable>, EVEX_V256;
4712     defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4713                                  sched.XMM, IsCommutable>, EVEX_V128;
4714   }
4717 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4718                                 X86SchedWriteWidths sched, Predicate prd,
4719                                 bit IsCommutable = 0> {
4720   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4721                                   sched, prd, IsCommutable>,
4722                                   REX_W, EVEX_CD8<64, CD8VF>;
4725 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4726                                 X86SchedWriteWidths sched, Predicate prd,
4727                                 bit IsCommutable = 0> {
4728   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4729                                   sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4732 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4733                                 X86SchedWriteWidths sched, Predicate prd,
4734                                 bit IsCommutable = 0> {
4735   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4736                                  sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4737                                  WIG;
4740 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4741                                 X86SchedWriteWidths sched, Predicate prd,
4742                                 bit IsCommutable = 0> {
4743   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4744                                  sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4745                                  WIG;
4748 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4749                                  SDNode OpNode, X86SchedWriteWidths sched,
4750                                  Predicate prd, bit IsCommutable = 0> {
4751   defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4752                                    IsCommutable>;
4754   defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4755                                    IsCommutable>;
4758 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4759                                  SDNode OpNode, X86SchedWriteWidths sched,
4760                                  Predicate prd, bit IsCommutable = 0> {
4761   defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4762                                    IsCommutable>;
4764   defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4765                                    IsCommutable>;
4768 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4769                                   bits<8> opc_d, bits<8> opc_q,
4770                                   string OpcodeStr, SDNode OpNode,
4771                                   X86SchedWriteWidths sched,
4772                                   bit IsCommutable = 0> {
4773   defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4774                                     sched, HasAVX512, IsCommutable>,
4775               avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4776                                     sched, HasBWI, IsCommutable>;
4779 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4780                             X86FoldableSchedWrite sched,
4781                             SDNode OpNode,X86VectorVTInfo _Src,
4782                             X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4783                             bit IsCommutable = 0> {
4784   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4785                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4786                             "$src2, $src1","$src1, $src2",
4787                             (_Dst.VT (OpNode
4788                                          (_Src.VT _Src.RC:$src1),
4789                                          (_Src.VT _Src.RC:$src2))),
4790                             IsCommutable>,
4791                             AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
4792   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4793                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4794                         "$src2, $src1", "$src1, $src2",
4795                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4796                                       (_Src.LdFrag addr:$src2)))>,
4797                         AVX512BIBase, EVEX, VVVV,
4798                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4800   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4801                     (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4802                     OpcodeStr,
4803                     "${src2}"#_Brdct.BroadcastStr#", $src1",
4804                      "$src1, ${src2}"#_Brdct.BroadcastStr,
4805                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4806                                  (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4807                     AVX512BIBase, EVEX, VVVV, EVEX_B,
4808                     Sched<[sched.Folded, sched.ReadAfterFold]>;
4811 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4812                                     SchedWriteVecALU, 1>;
4813 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4814                                     SchedWriteVecALU, 0>;
4815 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4816                                     SchedWriteVecALU, HasBWI, 1>;
4817 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4818                                     SchedWriteVecALU, HasBWI, 0>;
4819 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4820                                      SchedWriteVecALU, HasBWI, 1>;
4821 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4822                                      SchedWriteVecALU, HasBWI, 0>;
4823 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4824                                     SchedWritePMULLD, HasAVX512, 1>, T8;
4825 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4826                                     SchedWriteVecIMul, HasBWI, 1>;
4827 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4828                                     SchedWriteVecIMul, HasDQI, 1>, T8;
4829 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4830                                     HasBWI, 1>;
4831 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4832                                      HasBWI, 1>;
4833 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4834                                       SchedWriteVecIMul, HasBWI, 1>, T8;
4835 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
4836                                    SchedWriteVecALU, HasBWI, 1>;
4837 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4838                                     SchedWriteVecIMul, HasAVX512, 1>, T8;
4839 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4840                                      SchedWriteVecIMul, HasAVX512, 1>;
4842 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4843                             X86SchedWriteWidths sched,
4844                             AVX512VLVectorVTInfo _SrcVTInfo,
4845                             AVX512VLVectorVTInfo _DstVTInfo,
4846                             SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4847   let Predicates = [prd] in
4848     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4849                                  _SrcVTInfo.info512, _DstVTInfo.info512,
4850                                  v8i64_info, IsCommutable>,
4851                                   EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W;
4852   let Predicates = [HasVLX, prd] in {
4853     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4854                                       _SrcVTInfo.info256, _DstVTInfo.info256,
4855                                       v4i64x_info, IsCommutable>,
4856                                       EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W;
4857     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4858                                       _SrcVTInfo.info128, _DstVTInfo.info128,
4859                                       v2i64x_info, IsCommutable>,
4860                                      EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W;
4861   }
4864 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4865                                 avx512vl_i8_info, avx512vl_i8_info,
4866                                 X86multishift, HasVBMI, 0>, T8;
4868 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4869                             X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4870                             X86FoldableSchedWrite sched> {
4871   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4872                     (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4873                     OpcodeStr,
4874                     "${src2}"#_Src.BroadcastStr#", $src1",
4875                      "$src1, ${src2}"#_Src.BroadcastStr,
4876                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4877                                  (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4878                     EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4879                     Sched<[sched.Folded, sched.ReadAfterFold]>;
4882 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4883                             SDNode OpNode,X86VectorVTInfo _Src,
4884                             X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4885                             bit IsCommutable = 0> {
4886   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4887                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4888                             "$src2, $src1","$src1, $src2",
4889                             (_Dst.VT (OpNode
4890                                          (_Src.VT _Src.RC:$src1),
4891                                          (_Src.VT _Src.RC:$src2))),
4892                             IsCommutable, IsCommutable>,
4893                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>;
4894   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4895                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4896                         "$src2, $src1", "$src1, $src2",
4897                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4898                                       (_Src.LdFrag addr:$src2)))>,
4899                          EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>,
4900                          Sched<[sched.Folded, sched.ReadAfterFold]>;
4903 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4904                                     SDNode OpNode> {
4905   let Predicates = [HasBWI] in
4906   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4907                                  v32i16_info, SchedWriteShuffle.ZMM>,
4908                 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4909                                  v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4910   let Predicates = [HasBWI, HasVLX] in {
4911     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4912                                      v16i16x_info, SchedWriteShuffle.YMM>,
4913                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4914                                       v16i16x_info, SchedWriteShuffle.YMM>,
4915                                       EVEX_V256;
4916     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4917                                      v8i16x_info, SchedWriteShuffle.XMM>,
4918                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4919                                       v8i16x_info, SchedWriteShuffle.XMM>,
4920                                       EVEX_V128;
4921   }
4923 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4924                             SDNode OpNode> {
4925   let Predicates = [HasBWI] in
4926   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4927                                 SchedWriteShuffle.ZMM>, EVEX_V512, WIG;
4928   let Predicates = [HasBWI, HasVLX] in {
4929     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4930                                      v32i8x_info, SchedWriteShuffle.YMM>,
4931                                      EVEX_V256, WIG;
4932     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4933                                      v16i8x_info, SchedWriteShuffle.XMM>,
4934                                      EVEX_V128, WIG;
4935   }
4938 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4939                             SDNode OpNode, AVX512VLVectorVTInfo _Src,
4940                             AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4941   let Predicates = [HasBWI] in
4942   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4943                                 _Dst.info512, SchedWriteVecIMul.ZMM,
4944                                 IsCommutable>, EVEX_V512;
4945   let Predicates = [HasBWI, HasVLX] in {
4946     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4947                                      _Dst.info256, SchedWriteVecIMul.YMM,
4948                                      IsCommutable>, EVEX_V256;
4949     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4950                                      _Dst.info128, SchedWriteVecIMul.XMM,
4951                                      IsCommutable>, EVEX_V128;
4952   }
4955 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4956 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4957 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4958 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4960 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4961                      avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG;
4962 defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4963                      avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG;
4965 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4966                                     SchedWriteVecALU, HasBWI, 1>, T8;
4967 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4968                                     SchedWriteVecALU, HasBWI, 1>;
4969 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4970                                     SchedWriteVecALU, HasAVX512, 1>, T8;
4971 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4972                                     SchedWriteVecALU, HasAVX512, 1>, T8;
4974 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4975                                     SchedWriteVecALU, HasBWI, 1>;
4976 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4977                                     SchedWriteVecALU, HasBWI, 1>, T8;
4978 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4979                                     SchedWriteVecALU, HasAVX512, 1>, T8;
4980 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4981                                     SchedWriteVecALU, HasAVX512, 1>, T8;
4983 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4984                                     SchedWriteVecALU, HasBWI, 1>, T8;
4985 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4986                                     SchedWriteVecALU, HasBWI, 1>;
4987 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4988                                     SchedWriteVecALU, HasAVX512, 1>, T8;
4989 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4990                                     SchedWriteVecALU, HasAVX512, 1>, T8;
4992 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4993                                     SchedWriteVecALU, HasBWI, 1>;
4994 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4995                                     SchedWriteVecALU, HasBWI, 1>, T8;
4996 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4997                                     SchedWriteVecALU, HasAVX512, 1>, T8;
4998 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4999                                     SchedWriteVecALU, HasAVX512, 1>, T8;
5001 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5002 let Predicates = [HasDQI, NoVLX] in {
5003   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5004             (EXTRACT_SUBREG
5005                 (VPMULLQZrr
5006                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5007                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5008              sub_ymm)>;
5009   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5010             (EXTRACT_SUBREG
5011                 (VPMULLQZrmb
5012                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5013                     addr:$src2),
5014              sub_ymm)>;
5016   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5017             (EXTRACT_SUBREG
5018                 (VPMULLQZrr
5019                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5020                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5021              sub_xmm)>;
5022   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5023             (EXTRACT_SUBREG
5024                 (VPMULLQZrmb
5025                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5026                     addr:$src2),
5027              sub_xmm)>;
5030 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5031   def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5032             (EXTRACT_SUBREG
5033                 (!cast<Instruction>(Instr#"rr")
5034                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5035                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5036              sub_ymm)>;
5037   def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5038             (EXTRACT_SUBREG
5039                 (!cast<Instruction>(Instr#"rmb")
5040                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5041                     addr:$src2),
5042              sub_ymm)>;
5044   def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5045             (EXTRACT_SUBREG
5046                 (!cast<Instruction>(Instr#"rr")
5047                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5048                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5049              sub_xmm)>;
5050   def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5051             (EXTRACT_SUBREG
5052                 (!cast<Instruction>(Instr#"rmb")
5053                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5054                     addr:$src2),
5055              sub_xmm)>;
5058 let Predicates = [HasAVX512, NoVLX] in {
5059   defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5060   defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5061   defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5062   defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5065 //===----------------------------------------------------------------------===//
5066 // AVX-512  Logical Instructions
5067 //===----------------------------------------------------------------------===//
5069 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5070                                    SchedWriteVecLogic, HasAVX512, 1>;
5071 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5072                                   SchedWriteVecLogic, HasAVX512, 1>;
5073 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5074                                    SchedWriteVecLogic, HasAVX512, 1>;
5075 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5076                                     SchedWriteVecLogic, HasAVX512>;
5078 let Predicates = [HasVLX] in {
5079   def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5080             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5081   def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5082             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5084   def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5085             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5086   def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5087             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5089   def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5090             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5091   def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5092             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5094   def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5095             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5096   def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5097             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5099   def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5100             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5101   def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5102             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5104   def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5105             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5106   def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5107             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5109   def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5110             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5111   def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5112             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5114   def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5115             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5116   def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5117             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5119   def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5120             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5121   def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5122             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5124   def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5125             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5126   def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5127             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5129   def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5130             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5131   def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5132             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5134   def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5135             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5136   def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5137             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5139   def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5140             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5141   def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5142             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5144   def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5145             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5146   def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5147             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5149   def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5150             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5151   def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5152             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5154   def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5155             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5156   def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5157             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5160 let Predicates = [HasAVX512] in {
5161   def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5162             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5163   def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5164             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5166   def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5167             (VPORQZrr VR512:$src1, VR512:$src2)>;
5168   def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5169             (VPORQZrr VR512:$src1, VR512:$src2)>;
5171   def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5172             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5173   def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5174             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5176   def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5177             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5178   def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5179             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5181   def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5182             (VPANDQZrm VR512:$src1, addr:$src2)>;
5183   def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5184             (VPANDQZrm VR512:$src1, addr:$src2)>;
5186   def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5187             (VPORQZrm VR512:$src1, addr:$src2)>;
5188   def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5189             (VPORQZrm VR512:$src1, addr:$src2)>;
5191   def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5192             (VPXORQZrm VR512:$src1, addr:$src2)>;
5193   def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5194             (VPXORQZrm VR512:$src1, addr:$src2)>;
5196   def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5197             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5198   def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5199             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5202 // Patterns to catch vselect with different type than logic op.
5203 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5204                                     X86VectorVTInfo _,
5205                                     X86VectorVTInfo IntInfo> {
5206   // Masked register-register logical operations.
5207   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5208                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5209                    _.RC:$src0)),
5210             (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5211              _.RC:$src1, _.RC:$src2)>;
5213   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5214                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5215                    _.ImmAllZerosV)),
5216             (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5217              _.RC:$src2)>;
5219   // Masked register-memory logical operations.
5220   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5221                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5222                                             (load addr:$src2)))),
5223                    _.RC:$src0)),
5224             (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5225              _.RC:$src1, addr:$src2)>;
5226   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5227                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5228                                             (load addr:$src2)))),
5229                    _.ImmAllZerosV)),
5230             (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5231              addr:$src2)>;
5234 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5235                                          X86VectorVTInfo _,
5236                                          X86VectorVTInfo IntInfo> {
5237   // Register-broadcast logical operations.
5238   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5239                    (bitconvert
5240                     (IntInfo.VT (OpNode _.RC:$src1,
5241                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5242                    _.RC:$src0)),
5243             (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5244              _.RC:$src1, addr:$src2)>;
5245   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5246                    (bitconvert
5247                     (IntInfo.VT (OpNode _.RC:$src1,
5248                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5249                    _.ImmAllZerosV)),
5250             (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5251              _.RC:$src1, addr:$src2)>;
5254 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5255                                          AVX512VLVectorVTInfo SelectInfo,
5256                                          AVX512VLVectorVTInfo IntInfo> {
5257 let Predicates = [HasVLX] in {
5258   defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5259                                  IntInfo.info128>;
5260   defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5261                                  IntInfo.info256>;
5263 let Predicates = [HasAVX512] in {
5264   defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5265                                  IntInfo.info512>;
5269 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5270                                                AVX512VLVectorVTInfo SelectInfo,
5271                                                AVX512VLVectorVTInfo IntInfo> {
5272 let Predicates = [HasVLX] in {
5273   defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5274                                        SelectInfo.info128, IntInfo.info128>;
5275   defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5276                                        SelectInfo.info256, IntInfo.info256>;
5278 let Predicates = [HasAVX512] in {
5279   defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5280                                        SelectInfo.info512, IntInfo.info512>;
5284 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5285   // i64 vselect with i32/i16/i8 logic op
5286   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5287                                        avx512vl_i32_info>;
5288   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5289                                        avx512vl_i16_info>;
5290   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5291                                        avx512vl_i8_info>;
5293   // i32 vselect with i64/i16/i8 logic op
5294   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5295                                        avx512vl_i64_info>;
5296   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5297                                        avx512vl_i16_info>;
5298   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5299                                        avx512vl_i8_info>;
5301   // f32 vselect with i64/i32/i16/i8 logic op
5302   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5303                                        avx512vl_i64_info>;
5304   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5305                                        avx512vl_i32_info>;
5306   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5307                                        avx512vl_i16_info>;
5308   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5309                                        avx512vl_i8_info>;
5311   // f64 vselect with i64/i32/i16/i8 logic op
5312   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5313                                        avx512vl_i64_info>;
5314   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5315                                        avx512vl_i32_info>;
5316   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5317                                        avx512vl_i16_info>;
5318   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5319                                        avx512vl_i8_info>;
5321   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5322                                              avx512vl_f32_info,
5323                                              avx512vl_i32_info>;
5324   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5325                                              avx512vl_f64_info,
5326                                              avx512vl_i64_info>;
5329 defm : avx512_logical_lowering_types<"VPAND", and>;
5330 defm : avx512_logical_lowering_types<"VPOR",  or>;
5331 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5332 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5334 //===----------------------------------------------------------------------===//
5335 // AVX-512  FP arithmetic
5336 //===----------------------------------------------------------------------===//
5338 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5339                             SDPatternOperator OpNode, SDNode VecNode,
5340                             X86FoldableSchedWrite sched, bit IsCommutable> {
5341   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5342   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5343                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5344                            "$src2, $src1", "$src1, $src2",
5345                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5346                            Sched<[sched]>;
5348   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5349                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5350                          "$src2, $src1", "$src1, $src2",
5351                          (_.VT (VecNode _.RC:$src1,
5352                                         (_.ScalarIntMemFrags addr:$src2)))>,
5353                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5354   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5355   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5356                          (ins _.FRC:$src1, _.FRC:$src2),
5357                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5358                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5359                           Sched<[sched]> {
5360     let isCommutable = IsCommutable;
5361   }
5362   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5363                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5364                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5365                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5366                          (_.ScalarLdFrag addr:$src2)))]>,
5367                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5368   }
5369   }
5372 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5373                                   SDNode VecNode, X86FoldableSchedWrite sched> {
5374   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5375   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5376                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5377                           "$rc, $src2, $src1", "$src1, $src2, $rc",
5378                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5379                           (i32 timm:$rc))>,
5380                           EVEX_B, EVEX_RC, Sched<[sched]>;
5382 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5383                                 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5384                                 X86FoldableSchedWrite sched, bit IsCommutable> {
5385   let ExeDomain = _.ExeDomain in {
5386   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5387                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5388                            "$src2, $src1", "$src1, $src2",
5389                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5390                            Sched<[sched]>, SIMD_EXC;
5392   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5393                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5394                          "$src2, $src1", "$src1, $src2",
5395                          (_.VT (VecNode _.RC:$src1,
5396                                         (_.ScalarIntMemFrags addr:$src2)))>,
5397                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5399   let isCodeGenOnly = 1, Predicates = [HasAVX512],
5400       Uses = [MXCSR], mayRaiseFPException = 1 in {
5401   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5402                          (ins _.FRC:$src1, _.FRC:$src2),
5403                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5404                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5405                           Sched<[sched]> {
5406     let isCommutable = IsCommutable;
5407   }
5408   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5409                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5410                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5411                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5412                          (_.ScalarLdFrag addr:$src2)))]>,
5413                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5414   }
5416   let Uses = [MXCSR] in
5417   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5418                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5419                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5420                             (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5421                             EVEX_B, Sched<[sched]>;
5422   }
5425 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5426                                 SDNode VecNode, SDNode RndNode,
5427                                 X86SchedWriteSizes sched, bit IsCommutable> {
5428   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5429                               sched.PS.Scl, IsCommutable>,
5430              avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5431                               sched.PS.Scl>,
5432                               TB, XS, EVEX, VVVV, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5433   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5434                               sched.PD.Scl, IsCommutable>,
5435              avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5436                               sched.PD.Scl>,
5437                               TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5438   let Predicates = [HasFP16] in
5439     defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5440                                 VecNode, sched.PH.Scl, IsCommutable>,
5441                avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5442                                 sched.PH.Scl>,
5443                                 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5446 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5447                               SDNode VecNode, SDNode SaeNode,
5448                               X86SchedWriteSizes sched, bit IsCommutable> {
5449   defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5450                               VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5451                               TB, XS, EVEX, VVVV, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5452   defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5453                               VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5454                               TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5455   let Predicates = [HasFP16] in {
5456     defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5457                                 VecNode, SaeNode, sched.PH.Scl, IsCommutable>,
5458                                 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5459   }
5461 defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5462                                  SchedWriteFAddSizes, 1>;
5463 defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5464                                  SchedWriteFMulSizes, 1>;
5465 defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5466                                  SchedWriteFAddSizes, 0>;
5467 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5468                                  SchedWriteFDivSizes, 0>;
5469 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5470                                SchedWriteFCmpSizes, 0>;
5471 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5472                                SchedWriteFCmpSizes, 0>;
5474 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5475 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5476 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5477                                     X86VectorVTInfo _, SDNode OpNode,
5478                                     X86FoldableSchedWrite sched> {
5479   let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5480   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5481                          (ins _.FRC:$src1, _.FRC:$src2),
5482                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5483                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5484                           Sched<[sched]> {
5485     let isCommutable = 1;
5486   }
5487   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5488                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5489                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5490                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5491                          (_.ScalarLdFrag addr:$src2)))]>,
5492                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5493   }
5495 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5496                                          SchedWriteFCmp.Scl>, TB, XS,
5497                                          EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5499 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5500                                          SchedWriteFCmp.Scl>, TB, XD,
5501                                          REX_W, EVEX, VVVV, VEX_LIG,
5502                                          EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5504 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5505                                          SchedWriteFCmp.Scl>, TB, XS,
5506                                          EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5508 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5509                                          SchedWriteFCmp.Scl>, TB, XD,
5510                                          REX_W, EVEX, VVVV, VEX_LIG,
5511                                          EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5513 defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5514                                          SchedWriteFCmp.Scl>, T_MAP5, XS,
5515                                          EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5517 defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5518                                          SchedWriteFCmp.Scl>, T_MAP5, XS,
5519                                          EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5521 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5522                             SDPatternOperator MaskOpNode,
5523                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
5524                             bit IsCommutable,
5525                             bit IsKCommutable = IsCommutable,
5526                             string suffix = _.Suffix,
5527                             string ClobberConstraint = "",
5528                             bit MayRaiseFPException = 1> {
5529   let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5530       Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5531   defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5532                                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5533                                  "$src2, $src1", "$src1, $src2",
5534                                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5535                                  (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5536                                  IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>;
5537   let mayLoad = 1 in {
5538     defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5539                                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5540                                    "$src2, $src1", "$src1, $src2",
5541                                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5542                                    (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5543                                    ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5544     defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5545                                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5546                                     "${src2}"#_.BroadcastStr#", $src1",
5547                                     "$src1, ${src2}"#_.BroadcastStr,
5548                                     (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5549                                     (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5550                                     ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5551     }
5552   }
5555 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5556                                   SDPatternOperator OpNodeRnd,
5557                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
5558                                   string suffix = _.Suffix,
5559                                   string ClobberConstraint = ""> {
5560   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5561   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5562                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5563                   "$rc, $src2, $src1", "$src1, $src2, $rc",
5564                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5565                   0, 0, 0, vselect_mask, ClobberConstraint>,
5566                   EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
5569 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5570                                 SDPatternOperator OpNodeSAE,
5571                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5572   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5573   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5574                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5575                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5576                   (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5577                   EVEX, VVVV, EVEX_B, Sched<[sched]>;
5580 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5581                              SDPatternOperator MaskOpNode,
5582                              Predicate prd, X86SchedWriteSizes sched,
5583                              bit IsCommutable = 0,
5584                              bit IsPD128Commutable = IsCommutable> {
5585   let Predicates = [prd] in {
5586   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5587                               sched.PS.ZMM, IsCommutable>, EVEX_V512, TB,
5588                               EVEX_CD8<32, CD8VF>;
5589   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5590                               sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W,
5591                               EVEX_CD8<64, CD8VF>;
5592   }
5594     // Define only if AVX512VL feature is present.
5595   let Predicates = [prd, HasVLX] in {
5596     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5597                                    sched.PS.XMM, IsCommutable>, EVEX_V128, TB,
5598                                    EVEX_CD8<32, CD8VF>;
5599     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5600                                    sched.PS.YMM, IsCommutable>, EVEX_V256, TB,
5601                                    EVEX_CD8<32, CD8VF>;
5602     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5603                                    sched.PD.XMM, IsPD128Commutable,
5604                                    IsCommutable>, EVEX_V128, TB, PD, REX_W,
5605                                    EVEX_CD8<64, CD8VF>;
5606     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5607                                    sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W,
5608                                    EVEX_CD8<64, CD8VF>;
5609   }
5612 multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5613                               SDPatternOperator MaskOpNode,
5614                               X86SchedWriteSizes sched, bit IsCommutable = 0> {
5615   let Predicates = [HasFP16] in {
5616     defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5617                                 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5,
5618                                 EVEX_CD8<16, CD8VF>;
5619   }
5620   let Predicates = [HasVLX, HasFP16] in {
5621     defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5622                                    sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5,
5623                                    EVEX_CD8<16, CD8VF>;
5624     defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5625                                    sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5,
5626                                    EVEX_CD8<16, CD8VF>;
5627   }
5630 let Uses = [MXCSR] in
5631 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5632                                    X86SchedWriteSizes sched> {
5633   let Predicates = [HasFP16] in {
5634     defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5635                                       v32f16_info>,
5636                                       EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5637   }
5638   defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5639                                     v16f32_info>,
5640                                     EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5641   defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5642                                     v8f64_info>,
5643                                     EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5646 let Uses = [MXCSR] in
5647 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5648                                  X86SchedWriteSizes sched> {
5649   let Predicates = [HasFP16] in {
5650     defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5651                                     v32f16_info>,
5652                                     EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5653   }
5654   defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5655                                   v16f32_info>,
5656                                   EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5657   defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5658                                   v8f64_info>,
5659                                   EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5662 defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5663                               SchedWriteFAddSizes, 1>,
5664             avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5665             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5666 defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5667                               SchedWriteFMulSizes, 1>,
5668             avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5669             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5670 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5671                               SchedWriteFAddSizes>,
5672             avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5673             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5674 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5675                               SchedWriteFDivSizes>,
5676             avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5677             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5678 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5679                               SchedWriteFCmpSizes, 0>,
5680             avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5681             avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5682 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5683                               SchedWriteFCmpSizes, 0>,
5684             avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5685             avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5686 let isCodeGenOnly = 1 in {
5687   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5688                                  SchedWriteFCmpSizes, 1>,
5689                avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5690                                  SchedWriteFCmpSizes, 1>;
5691   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5692                                  SchedWriteFCmpSizes, 1>,
5693                avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5694                                  SchedWriteFCmpSizes, 1>;
5696 let Uses = []<Register>, mayRaiseFPException = 0 in {
5697 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5698                                SchedWriteFLogicSizes, 1>;
5699 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5700                                SchedWriteFLogicSizes, 0>;
5701 defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5702                                SchedWriteFLogicSizes, 1>;
5703 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5704                                SchedWriteFLogicSizes, 1>;
5707 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5708                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5709   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5710   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5711                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5712                   "$src2, $src1", "$src1, $src2",
5713                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5714                   EVEX, VVVV, Sched<[sched]>;
5715   defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5716                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5717                   "$src2, $src1", "$src1, $src2",
5718                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5719                   EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5720   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5721                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5722                    "${src2}"#_.BroadcastStr#", $src1",
5723                    "$src1, ${src2}"#_.BroadcastStr,
5724                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5725                    EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5726   }
5729 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5730                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5731   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5732   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5733                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5734                   "$src2, $src1", "$src1, $src2",
5735                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5736                   Sched<[sched]>;
5737   defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5738                   (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5739                   "$src2, $src1", "$src1, $src2",
5740                   (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5741                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5742   }
5745 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5746                                 X86SchedWriteWidths sched> {
5747   let Predicates = [HasFP16] in {
5748     defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
5749                avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
5750                                 EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
5751     defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
5752                avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
5753                              EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>;
5754   }
5755   defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5756              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5757                               EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD;
5758   defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5759              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5760                               EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5761   defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5762              avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5763                                     X86scalefsRnd, sched.Scl>,
5764                                     EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD;
5765   defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5766              avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5767                                     X86scalefsRnd, sched.Scl>,
5768                                     EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD;
5770   // Define only if AVX512VL feature is present.
5771   let Predicates = [HasVLX] in {
5772     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5773                                    EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD;
5774     defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5775                                    EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD;
5776     defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5777                                    EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5778     defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5779                                    EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5780   }
5782   let Predicates = [HasFP16, HasVLX] in {
5783     defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
5784                                    EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5785     defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
5786                                    EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5787   }
5789 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>;
5791 //===----------------------------------------------------------------------===//
5792 // AVX-512  VPTESTM instructions
5793 //===----------------------------------------------------------------------===//
5795 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5796                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5797   // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5798   // There are just too many permutations due to commutability and bitcasts.
5799   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5800   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5801                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5802                       "$src2, $src1", "$src1, $src2",
5803                    (null_frag), (null_frag), 1>,
5804                    EVEX, VVVV, Sched<[sched]>;
5805   let mayLoad = 1 in
5806   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5807                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5808                        "$src2, $src1", "$src1, $src2",
5809                    (null_frag), (null_frag)>,
5810                    EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5811                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5812   }
5815 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5816                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5817   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5818   defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5819                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5820                     "${src2}"#_.BroadcastStr#", $src1",
5821                     "$src1, ${src2}"#_.BroadcastStr,
5822                     (null_frag), (null_frag)>,
5823                     EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5824                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5827 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5828                                   X86SchedWriteWidths sched,
5829                                   AVX512VLVectorVTInfo _> {
5830   let Predicates  = [HasAVX512] in
5831   defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
5832            avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5834   let Predicates = [HasAVX512, HasVLX] in {
5835   defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
5836               avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5837   defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
5838               avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5839   }
5842 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5843                             X86SchedWriteWidths sched> {
5844   defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5845                                  avx512vl_i32_info>;
5846   defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5847                                  avx512vl_i64_info>, REX_W;
5850 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5851                             X86SchedWriteWidths sched> {
5852   let Predicates = [HasBWI] in {
5853   defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5854                             v32i16_info>, EVEX_V512, REX_W;
5855   defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5856                             v64i8_info>, EVEX_V512;
5857   }
5859   let Predicates = [HasVLX, HasBWI] in {
5860   defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5861                             v16i16x_info>, EVEX_V256, REX_W;
5862   defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5863                             v8i16x_info>, EVEX_V128, REX_W;
5864   defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5865                             v32i8x_info>, EVEX_V256;
5866   defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5867                             v16i8x_info>, EVEX_V128;
5868   }
5871 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5872                                    X86SchedWriteWidths sched> :
5873   avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5874   avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5876 defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5877                                          SchedWriteVecLogic>, T8, PD;
5878 defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5879                                          SchedWriteVecLogic>, T8, XS;
5881 //===----------------------------------------------------------------------===//
5882 // AVX-512  Shift instructions
5883 //===----------------------------------------------------------------------===//
5885 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5886                             string OpcodeStr, SDNode OpNode,
5887                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5888   let ExeDomain = _.ExeDomain in {
5889   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5890                    (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5891                       "$src2, $src1", "$src1, $src2",
5892                    (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5893                    Sched<[sched]>;
5894   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5895                    (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5896                        "$src2, $src1", "$src1, $src2",
5897                    (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5898                           (i8 timm:$src2)))>,
5899                    Sched<[sched.Folded]>;
5900   }
5903 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5904                              string OpcodeStr, SDNode OpNode,
5905                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5906   let ExeDomain = _.ExeDomain in
5907   defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5908                    (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5909       "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
5910      (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5911      EVEX_B, Sched<[sched.Folded]>;
5914 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5915                             X86FoldableSchedWrite sched, ValueType SrcVT,
5916                             X86VectorVTInfo _> {
5917    // src2 is always 128-bit
5918   let ExeDomain = _.ExeDomain in {
5919   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5920                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5921                       "$src2, $src1", "$src1, $src2",
5922                    (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5923                    AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
5924   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5925                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5926                        "$src2, $src1", "$src1, $src2",
5927                    (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5928                    AVX512BIBase,
5929                    EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5930   }
5933 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5934                               X86SchedWriteWidths sched, ValueType SrcVT,
5935                               AVX512VLVectorVTInfo VTInfo,
5936                               Predicate prd> {
5937   let Predicates = [prd] in
5938   defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5939                                VTInfo.info512>, EVEX_V512,
5940                                EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5941   let Predicates = [prd, HasVLX] in {
5942   defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5943                                VTInfo.info256>, EVEX_V256,
5944                                EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5945   defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5946                                VTInfo.info128>, EVEX_V128,
5947                                EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5948   }
5951 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5952                               string OpcodeStr, SDNode OpNode,
5953                               X86SchedWriteWidths sched> {
5954   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5955                               avx512vl_i32_info, HasAVX512>;
5956   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5957                               avx512vl_i64_info, HasAVX512>, REX_W;
5958   defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5959                               avx512vl_i16_info, HasBWI>;
5962 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5963                                   string OpcodeStr, SDNode OpNode,
5964                                   X86SchedWriteWidths sched,
5965                                   AVX512VLVectorVTInfo VTInfo> {
5966   let Predicates = [HasAVX512] in
5967   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5968                               sched.ZMM, VTInfo.info512>,
5969              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5970                                VTInfo.info512>, EVEX_V512;
5971   let Predicates = [HasAVX512, HasVLX] in {
5972   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5973                               sched.YMM, VTInfo.info256>,
5974              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5975                                VTInfo.info256>, EVEX_V256;
5976   defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5977                               sched.XMM, VTInfo.info128>,
5978              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5979                                VTInfo.info128>, EVEX_V128;
5980   }
5983 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5984                               string OpcodeStr, SDNode OpNode,
5985                               X86SchedWriteWidths sched> {
5986   let Predicates = [HasBWI] in
5987   defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5988                                sched.ZMM, v32i16_info>, EVEX_V512, WIG;
5989   let Predicates = [HasVLX, HasBWI] in {
5990   defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5991                                sched.YMM, v16i16x_info>, EVEX_V256, WIG;
5992   defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5993                                sched.XMM, v8i16x_info>, EVEX_V128, WIG;
5994   }
5997 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5998                                Format ImmFormR, Format ImmFormM,
5999                                string OpcodeStr, SDNode OpNode,
6000                                X86SchedWriteWidths sched> {
6001   defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6002                                  sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6003   defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6004                                  sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
6007 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6008                                  SchedWriteVecShiftImm>,
6009              avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6010                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6012 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6013                                  SchedWriteVecShiftImm>,
6014              avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6015                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6017 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6018                                  SchedWriteVecShiftImm>,
6019              avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6020                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6022 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6023                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6024 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6025                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6027 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6028                                 SchedWriteVecShift>;
6029 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6030                                 SchedWriteVecShift>;
6031 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6032                                 SchedWriteVecShift>;
6034 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6035 let Predicates = [HasAVX512, NoVLX] in {
6036   def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6037             (EXTRACT_SUBREG (v8i64
6038               (VPSRAQZrr
6039                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6040                  VR128X:$src2)), sub_ymm)>;
6042   def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6043             (EXTRACT_SUBREG (v8i64
6044               (VPSRAQZrr
6045                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6046                  VR128X:$src2)), sub_xmm)>;
6048   def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6049             (EXTRACT_SUBREG (v8i64
6050               (VPSRAQZri
6051                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6052                  timm:$src2)), sub_ymm)>;
6054   def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6055             (EXTRACT_SUBREG (v8i64
6056               (VPSRAQZri
6057                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6058                  timm:$src2)), sub_xmm)>;
6061 //===-------------------------------------------------------------------===//
6062 // Variable Bit Shifts
6063 //===-------------------------------------------------------------------===//
6065 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6066                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6067   let ExeDomain = _.ExeDomain in {
6068   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6069                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6070                       "$src2, $src1", "$src1, $src2",
6071                    (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6072                    AVX5128IBase, EVEX, VVVV, Sched<[sched]>;
6073   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6074                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6075                        "$src2, $src1", "$src1, $src2",
6076                    (_.VT (OpNode _.RC:$src1,
6077                    (_.VT (_.LdFrag addr:$src2))))>,
6078                    AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6079                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6080   }
6083 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6084                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6085   let ExeDomain = _.ExeDomain in
6086   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6087                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6088                     "${src2}"#_.BroadcastStr#", $src1",
6089                     "$src1, ${src2}"#_.BroadcastStr,
6090                     (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6091                     AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6092                     Sched<[sched.Folded, sched.ReadAfterFold]>;
6095 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6096                                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6097   let Predicates  = [HasAVX512] in
6098   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6099            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6101   let Predicates = [HasAVX512, HasVLX] in {
6102   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6103               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6104   defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6105               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6106   }
6109 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6110                                   SDNode OpNode, X86SchedWriteWidths sched> {
6111   defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6112                                  avx512vl_i32_info>;
6113   defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6114                                  avx512vl_i64_info>, REX_W;
6117 // Use 512bit version to implement 128/256 bit in case NoVLX.
6118 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6119                                      SDNode OpNode, list<Predicate> p> {
6120   let Predicates = p in {
6121   def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6122                                   (_.info256.VT _.info256.RC:$src2))),
6123             (EXTRACT_SUBREG
6124                 (!cast<Instruction>(OpcodeStr#"Zrr")
6125                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6126                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6127              sub_ymm)>;
6129   def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6130                                   (_.info128.VT _.info128.RC:$src2))),
6131             (EXTRACT_SUBREG
6132                 (!cast<Instruction>(OpcodeStr#"Zrr")
6133                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6134                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6135              sub_xmm)>;
6136   }
6138 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6139                               SDNode OpNode, X86SchedWriteWidths sched> {
6140   let Predicates = [HasBWI] in
6141   defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6142               EVEX_V512, REX_W;
6143   let Predicates = [HasVLX, HasBWI] in {
6145   defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6146               EVEX_V256, REX_W;
6147   defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6148               EVEX_V128, REX_W;
6149   }
6152 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6153               avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6155 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6156               avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6158 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6159               avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6161 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6162 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6164 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6165 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6166 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6167 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6170 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6171 let Predicates = [HasAVX512, NoVLX] in {
6172   def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6173             (EXTRACT_SUBREG (v8i64
6174               (VPROLVQZrr
6175                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6176                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6177                        sub_xmm)>;
6178   def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6179             (EXTRACT_SUBREG (v8i64
6180               (VPROLVQZrr
6181                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6182                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6183                        sub_ymm)>;
6185   def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6186             (EXTRACT_SUBREG (v16i32
6187               (VPROLVDZrr
6188                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6189                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6190                         sub_xmm)>;
6191   def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6192             (EXTRACT_SUBREG (v16i32
6193               (VPROLVDZrr
6194                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6195                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6196                         sub_ymm)>;
6198   def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6199             (EXTRACT_SUBREG (v8i64
6200               (VPROLQZri
6201                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6202                         timm:$src2)), sub_xmm)>;
6203   def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6204             (EXTRACT_SUBREG (v8i64
6205               (VPROLQZri
6206                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6207                        timm:$src2)), sub_ymm)>;
6209   def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6210             (EXTRACT_SUBREG (v16i32
6211               (VPROLDZri
6212                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6213                         timm:$src2)), sub_xmm)>;
6214   def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6215             (EXTRACT_SUBREG (v16i32
6216               (VPROLDZri
6217                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6218                         timm:$src2)), sub_ymm)>;
6221 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6222 let Predicates = [HasAVX512, NoVLX] in {
6223   def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6224             (EXTRACT_SUBREG (v8i64
6225               (VPRORVQZrr
6226                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6227                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6228                        sub_xmm)>;
6229   def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6230             (EXTRACT_SUBREG (v8i64
6231               (VPRORVQZrr
6232                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6233                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6234                        sub_ymm)>;
6236   def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6237             (EXTRACT_SUBREG (v16i32
6238               (VPRORVDZrr
6239                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6240                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6241                         sub_xmm)>;
6242   def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6243             (EXTRACT_SUBREG (v16i32
6244               (VPRORVDZrr
6245                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6246                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6247                         sub_ymm)>;
6249   def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6250             (EXTRACT_SUBREG (v8i64
6251               (VPRORQZri
6252                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6253                         timm:$src2)), sub_xmm)>;
6254   def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6255             (EXTRACT_SUBREG (v8i64
6256               (VPRORQZri
6257                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6258                        timm:$src2)), sub_ymm)>;
6260   def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6261             (EXTRACT_SUBREG (v16i32
6262               (VPRORDZri
6263                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6264                         timm:$src2)), sub_xmm)>;
6265   def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6266             (EXTRACT_SUBREG (v16i32
6267               (VPRORDZri
6268                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6269                         timm:$src2)), sub_ymm)>;
6272 //===-------------------------------------------------------------------===//
6273 // 1-src variable permutation VPERMW/D/Q
6274 //===-------------------------------------------------------------------===//
6276 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6277                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6278   let Predicates  = [HasAVX512] in
6279   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6280            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6282   let Predicates = [HasAVX512, HasVLX] in
6283   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6284               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6287 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6288                                  string OpcodeStr, SDNode OpNode,
6289                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6290   let Predicates = [HasAVX512] in
6291   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6292                               sched, VTInfo.info512>,
6293              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6294                                sched, VTInfo.info512>, EVEX_V512;
6295   let Predicates = [HasAVX512, HasVLX] in
6296   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6297                               sched, VTInfo.info256>,
6298              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6299                                sched, VTInfo.info256>, EVEX_V256;
6302 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6303                               Predicate prd, SDNode OpNode,
6304                               X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6305   let Predicates = [prd] in
6306   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6307               EVEX_V512 ;
6308   let Predicates = [HasVLX, prd] in {
6309   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6310               EVEX_V256 ;
6311   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6312               EVEX_V128 ;
6313   }
6316 defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6317                                WriteVarShuffle256, avx512vl_i16_info>, REX_W;
6318 defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6319                                WriteVarShuffle256, avx512vl_i8_info>;
6321 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6322                                     WriteVarShuffle256, avx512vl_i32_info>;
6323 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6324                                     WriteVarShuffle256, avx512vl_i64_info>, REX_W;
6325 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6326                                      WriteFVarShuffle256, avx512vl_f32_info>;
6327 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6328                                      WriteFVarShuffle256, avx512vl_f64_info>, REX_W;
6330 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6331                              X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6332                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6333 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6334                              X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6335                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6337 //===----------------------------------------------------------------------===//
6338 // AVX-512 - VPERMIL
6339 //===----------------------------------------------------------------------===//
6341 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6342                              X86FoldableSchedWrite sched, X86VectorVTInfo _,
6343                              X86VectorVTInfo Ctrl> {
6344   defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6345                   (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6346                   "$src2, $src1", "$src1, $src2",
6347                   (_.VT (OpNode _.RC:$src1,
6348                                (Ctrl.VT Ctrl.RC:$src2)))>,
6349                   T8, PD, EVEX, VVVV, Sched<[sched]>;
6350   defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6351                   (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6352                   "$src2, $src1", "$src1, $src2",
6353                   (_.VT (OpNode
6354                            _.RC:$src1,
6355                            (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6356                   T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6357                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6358   defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6359                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6360                    "${src2}"#_.BroadcastStr#", $src1",
6361                    "$src1, ${src2}"#_.BroadcastStr,
6362                    (_.VT (OpNode
6363                             _.RC:$src1,
6364                             (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6365                    T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6366                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6369 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6370                                     X86SchedWriteWidths sched,
6371                                     AVX512VLVectorVTInfo _,
6372                                     AVX512VLVectorVTInfo Ctrl> {
6373   let Predicates = [HasAVX512] in {
6374     defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6375                                   _.info512, Ctrl.info512>, EVEX_V512;
6376   }
6377   let Predicates = [HasAVX512, HasVLX] in {
6378     defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6379                                   _.info128, Ctrl.info128>, EVEX_V128;
6380     defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6381                                   _.info256, Ctrl.info256>, EVEX_V256;
6382   }
6385 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6386                          AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6387   defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6388                                       _, Ctrl>;
6389   defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6390                                     X86VPermilpi, SchedWriteFShuffle, _>,
6391                     EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6394 let ExeDomain = SSEPackedSingle in
6395 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6396                                avx512vl_i32_info>;
6397 let ExeDomain = SSEPackedDouble in
6398 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6399                                avx512vl_i64_info>, REX_W;
6401 //===----------------------------------------------------------------------===//
6402 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6403 //===----------------------------------------------------------------------===//
6405 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6406                              X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6407                              EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6408 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6409                                   X86PShufhw, SchedWriteShuffle>,
6410                                   EVEX, AVX512XSIi8Base;
6411 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6412                                   X86PShuflw, SchedWriteShuffle>,
6413                                   EVEX, AVX512XDIi8Base;
6415 //===----------------------------------------------------------------------===//
6416 // AVX-512 - VPSHUFB
6417 //===----------------------------------------------------------------------===//
6419 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6420                                X86SchedWriteWidths sched> {
6421   let Predicates = [HasBWI] in
6422   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6423                               EVEX_V512;
6425   let Predicates = [HasVLX, HasBWI] in {
6426   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6427                               EVEX_V256;
6428   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6429                               EVEX_V128;
6430   }
6433 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6434                                   SchedWriteVarShuffle>, WIG;
6436 //===----------------------------------------------------------------------===//
6437 // Move Low to High and High to Low packed FP Instructions
6438 //===----------------------------------------------------------------------===//
6440 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6441           (ins VR128X:$src1, VR128X:$src2),
6442           "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6443           [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6444           Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6445 let isCommutable = 1 in
6446 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6447           (ins VR128X:$src1, VR128X:$src2),
6448           "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6449           [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6450           Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6452 //===----------------------------------------------------------------------===//
6453 // VMOVHPS/PD VMOVLPS Instructions
6454 // All patterns was taken from SSS implementation.
6455 //===----------------------------------------------------------------------===//
6457 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6458                                   SDPatternOperator OpNode,
6459                                   X86VectorVTInfo _> {
6460   let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6461   def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6462                   (ins _.RC:$src1, f64mem:$src2),
6463                   !strconcat(OpcodeStr,
6464                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6465                   [(set _.RC:$dst,
6466                      (OpNode _.RC:$src1,
6467                        (_.VT (bitconvert
6468                          (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6469                   Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV;
6472 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6473 // SSE1. And MOVLPS pattern is even more complex.
6474 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6475                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6476 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6477                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6478 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6479                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6480 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6481                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6483 let Predicates = [HasAVX512] in {
6484   // VMOVHPD patterns
6485   def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6486             (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6488   // VMOVLPD patterns
6489   def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6490             (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6493 let SchedRW = [WriteFStore] in {
6494 let mayStore = 1, hasSideEffects = 0 in
6495 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6496                        (ins f64mem:$dst, VR128X:$src),
6497                        "vmovhps\t{$src, $dst|$dst, $src}",
6498                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6499 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6500                        (ins f64mem:$dst, VR128X:$src),
6501                        "vmovhpd\t{$src, $dst|$dst, $src}",
6502                        [(store (f64 (extractelt
6503                                      (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6504                                      (iPTR 0))), addr:$dst)]>,
6505                        EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6506 let mayStore = 1, hasSideEffects = 0 in
6507 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6508                        (ins f64mem:$dst, VR128X:$src),
6509                        "vmovlps\t{$src, $dst|$dst, $src}",
6510                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6511 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6512                        (ins f64mem:$dst, VR128X:$src),
6513                        "vmovlpd\t{$src, $dst|$dst, $src}",
6514                        [(store (f64 (extractelt (v2f64 VR128X:$src),
6515                                      (iPTR 0))), addr:$dst)]>,
6516                        EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6517 } // SchedRW
6519 let Predicates = [HasAVX512] in {
6520   // VMOVHPD patterns
6521   def : Pat<(store (f64 (extractelt
6522                            (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6523                            (iPTR 0))), addr:$dst),
6524            (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6526 //===----------------------------------------------------------------------===//
6527 // FMA - Fused Multiply Operations
6530 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6531                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6532                                X86VectorVTInfo _> {
6533   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6534       Uses = [MXCSR], mayRaiseFPException = 1 in {
6535   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6536           (ins _.RC:$src2, _.RC:$src3),
6537           OpcodeStr, "$src3, $src2", "$src2, $src3",
6538           (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6539           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6540           EVEX, VVVV, Sched<[sched]>;
6542   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6543           (ins _.RC:$src2, _.MemOp:$src3),
6544           OpcodeStr, "$src3, $src2", "$src2, $src3",
6545           (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6546           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6547           EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6548                           sched.ReadAfterFold]>;
6550   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6551             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6552             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6553             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6554             (OpNode _.RC:$src2,
6555              _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6556             (MaskOpNode _.RC:$src2,
6557              _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6558             EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6559                                     sched.ReadAfterFold]>;
6560   }
6563 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6564                                  X86FoldableSchedWrite sched,
6565                                  X86VectorVTInfo _> {
6566   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6567       Uses = [MXCSR] in
6568   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6569           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6570           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6571           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6572           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6573           EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6576 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6577                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6578                                    X86SchedWriteWidths sched,
6579                                    AVX512VLVectorVTInfo _,
6580                                    Predicate prd = HasAVX512> {
6581   let Predicates = [prd] in {
6582     defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6583                                       sched.ZMM, _.info512>,
6584                   avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6585                                         _.info512>,
6586                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6587   }
6588   let Predicates = [HasVLX, prd] in {
6589     defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6590                                     sched.YMM, _.info256>,
6591                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6592     defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6593                                     sched.XMM, _.info128>,
6594                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6595   }
6598 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6599                               SDNode MaskOpNode, SDNode OpNodeRnd> {
6600     defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6601                                       OpNodeRnd, SchedWriteFMA,
6602                                       avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6603     defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6604                                       OpNodeRnd, SchedWriteFMA,
6605                                       avx512vl_f32_info>, T8, PD;
6606     defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6607                                       OpNodeRnd, SchedWriteFMA,
6608                                       avx512vl_f64_info>, T8, PD, REX_W;
6611 defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6612                                        fma, X86FmaddRnd>;
6613 defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6614                                        X86Fmsub, X86FmsubRnd>;
6615 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6616                                        X86Fmaddsub, X86FmaddsubRnd>;
6617 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6618                                        X86Fmsubadd, X86FmsubaddRnd>;
6619 defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6620                                        X86Fnmadd, X86FnmaddRnd>;
6621 defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6622                                        X86Fnmsub, X86FnmsubRnd>;
6625 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6626                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6627                                X86VectorVTInfo _> {
6628   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6629       Uses = [MXCSR], mayRaiseFPException = 1 in {
6630   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6631           (ins _.RC:$src2, _.RC:$src3),
6632           OpcodeStr, "$src3, $src2", "$src2, $src3",
6633           (null_frag),
6634           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6635           EVEX, VVVV, Sched<[sched]>;
6637   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6638           (ins _.RC:$src2, _.MemOp:$src3),
6639           OpcodeStr, "$src3, $src2", "$src2, $src3",
6640           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6641           (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6642           EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6643                           sched.ReadAfterFold]>;
6645   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6646          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6647          OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6648          "$src2, ${src3}"#_.BroadcastStr,
6649          (_.VT (OpNode _.RC:$src2,
6650                       (_.VT (_.BroadcastLdFrag addr:$src3)),
6651                       _.RC:$src1)),
6652          (_.VT (MaskOpNode _.RC:$src2,
6653                            (_.VT (_.BroadcastLdFrag addr:$src3)),
6654                            _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B,
6655          Sched<[sched.Folded, sched.ReadAfterFold,
6656                 sched.ReadAfterFold]>;
6657   }
6660 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6661                                  X86FoldableSchedWrite sched,
6662                                  X86VectorVTInfo _> {
6663   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6664       Uses = [MXCSR] in
6665   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6666           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6667           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6668           (null_frag),
6669           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6670           1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6673 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6674                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6675                                    X86SchedWriteWidths sched,
6676                                    AVX512VLVectorVTInfo _,
6677                                    Predicate prd = HasAVX512> {
6678   let Predicates = [prd] in {
6679     defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6680                                       sched.ZMM, _.info512>,
6681                   avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6682                                         _.info512>,
6683                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6684   }
6685   let Predicates = [HasVLX, prd] in {
6686     defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6687                                     sched.YMM, _.info256>,
6688                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6689     defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6690                                     sched.XMM, _.info128>,
6691                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6692   }
6695 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6696                               SDNode MaskOpNode, SDNode OpNodeRnd > {
6697     defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6698                                       OpNodeRnd, SchedWriteFMA,
6699                                       avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6700     defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6701                                       OpNodeRnd, SchedWriteFMA,
6702                                       avx512vl_f32_info>, T8, PD;
6703     defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6704                                       OpNodeRnd, SchedWriteFMA,
6705                                       avx512vl_f64_info>, T8, PD, REX_W;
6708 defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6709                                        fma, X86FmaddRnd>;
6710 defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6711                                        X86Fmsub, X86FmsubRnd>;
6712 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6713                                        X86Fmaddsub, X86FmaddsubRnd>;
6714 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6715                                        X86Fmsubadd, X86FmsubaddRnd>;
6716 defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6717                                        X86Fnmadd, X86FnmaddRnd>;
6718 defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6719                                        X86Fnmsub, X86FnmsubRnd>;
6721 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6722                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6723                                X86VectorVTInfo _> {
6724   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6725       Uses = [MXCSR], mayRaiseFPException = 1 in {
6726   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6727           (ins _.RC:$src2, _.RC:$src3),
6728           OpcodeStr, "$src3, $src2", "$src2, $src3",
6729           (null_frag),
6730           (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6731           EVEX, VVVV, Sched<[sched]>;
6733   // Pattern is 312 order so that the load is in a different place from the
6734   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6735   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6736           (ins _.RC:$src2, _.MemOp:$src3),
6737           OpcodeStr, "$src3, $src2", "$src2, $src3",
6738           (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6739           (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6740           EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6741                           sched.ReadAfterFold]>;
6743   // Pattern is 312 order so that the load is in a different place from the
6744   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6745   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6746          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6747          OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6748          "$src2, ${src3}"#_.BroadcastStr,
6749          (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6750                        _.RC:$src1, _.RC:$src2)),
6751          (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6752                            _.RC:$src1, _.RC:$src2)), 1, 0>,
6753          EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6754                                  sched.ReadAfterFold]>;
6755   }
6758 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6759                                  X86FoldableSchedWrite sched,
6760                                  X86VectorVTInfo _> {
6761   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6762       Uses = [MXCSR] in
6763   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6764           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6765           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6766           (null_frag),
6767           (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6768           1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6771 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6772                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6773                                    X86SchedWriteWidths sched,
6774                                    AVX512VLVectorVTInfo _,
6775                                    Predicate prd = HasAVX512> {
6776   let Predicates = [prd] in {
6777     defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6778                                       sched.ZMM, _.info512>,
6779                   avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6780                                         _.info512>,
6781                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6782   }
6783   let Predicates = [HasVLX, prd] in {
6784     defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6785                                     sched.YMM, _.info256>,
6786                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6787     defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6788                                     sched.XMM, _.info128>,
6789                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6790   }
6793 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6794                               SDNode MaskOpNode, SDNode OpNodeRnd > {
6795     defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6796                                       OpNodeRnd, SchedWriteFMA,
6797                                       avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6798     defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6799                                       OpNodeRnd, SchedWriteFMA,
6800                                       avx512vl_f32_info>, T8, PD;
6801     defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6802                                       OpNodeRnd, SchedWriteFMA,
6803                                       avx512vl_f64_info>, T8, PD, REX_W;
6806 defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
6807                                        fma, X86FmaddRnd>;
6808 defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
6809                                        X86Fmsub, X86FmsubRnd>;
6810 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
6811                                        X86Fmaddsub, X86FmaddsubRnd>;
6812 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
6813                                        X86Fmsubadd, X86FmsubaddRnd>;
6814 defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
6815                                        X86Fnmadd, X86FnmaddRnd>;
6816 defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
6817                                        X86Fnmsub, X86FnmsubRnd>;
6819 // Scalar FMA
6820 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6821                                dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6822 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6823   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6824           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6825           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6826           EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6828   let mayLoad = 1 in
6829   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6830           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6831           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6832           EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6833                           SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6835   let Uses = [MXCSR] in
6836   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6837          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6838          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6839          EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6841   let isCodeGenOnly = 1, isCommutable = 1 in {
6842     def r     : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6843                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6844                      !strconcat(OpcodeStr,
6845                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6846                      !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC;
6847     def m     : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
6848                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6849                     !strconcat(OpcodeStr,
6850                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6851                     [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6852                                      SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC;
6854     let Uses = [MXCSR] in
6855     def rb    : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6856                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6857                      !strconcat(OpcodeStr,
6858                               "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6859                      !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6860                      Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV;
6861   }// isCodeGenOnly = 1
6862 }// Constraints = "$src1 = $dst"
6865 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6866                             string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
6867                             X86VectorVTInfo _, string SUFF> {
6868   let ExeDomain = _.ExeDomain in {
6869   defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6870                 // Operands for intrinsic are in 123 order to preserve passthu
6871                 // semantics.
6872                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6873                          _.FRC:$src3))),
6874                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6875                          (_.ScalarLdFrag addr:$src3)))),
6876                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6877                          _.FRC:$src3, (i32 timm:$rc)))), 0>;
6879   defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6880                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6881                                           _.FRC:$src1))),
6882                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6883                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6884                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6885                          _.FRC:$src1, (i32 timm:$rc)))), 1>;
6887   // One pattern is 312 order so that the load is in a different place from the
6888   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6889   defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6890                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6891                          _.FRC:$src2))),
6892                 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6893                                  _.FRC:$src1, _.FRC:$src2))),
6894                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6895                          _.FRC:$src2, (i32 timm:$rc)))), 1>;
6896   }
6899 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6900                         string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
6901   let Predicates = [HasAVX512] in {
6902     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6903                                  OpNodeRnd, f32x_info, "SS">,
6904                                  EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD;
6905     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6906                                  OpNodeRnd, f64x_info, "SD">,
6907                                  EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD;
6908   }
6909   let Predicates = [HasFP16] in {
6910     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6911                                  OpNodeRnd, f16x_info, "SH">,
6912                                  EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD;
6913   }
6916 defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
6917 defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
6918 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6919 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6921 multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
6922                                       SDNode RndOp, string Prefix,
6923                                       string Suffix, SDNode Move,
6924                                       X86VectorVTInfo _, PatLeaf ZeroFP,
6925                                       Predicate prd = HasAVX512> {
6926   let Predicates = [prd] in {
6927     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6928                 (Op _.FRC:$src2,
6929                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6930                     _.FRC:$src3))))),
6931               (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6932                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6933                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6935     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6936                 (Op _.FRC:$src2, _.FRC:$src3,
6937                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6938               (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6939                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6940                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6942     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6943                 (Op _.FRC:$src2,
6944                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6945                     (_.ScalarLdFrag addr:$src3)))))),
6946               (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6947                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6948                addr:$src3)>;
6950     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6951                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6952                     (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6953               (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6954                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6955                addr:$src3)>;
6957     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6958                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6959                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6960               (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6961                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6962                addr:$src3)>;
6964     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6965                (X86selects_mask VK1WM:$mask,
6966                 (MaskedOp _.FRC:$src2,
6967                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6968                     _.FRC:$src3),
6969                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6970               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6971                VR128X:$src1, VK1WM:$mask,
6972                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6973                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6975     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6976                (X86selects_mask VK1WM:$mask,
6977                 (MaskedOp _.FRC:$src2,
6978                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6979                     (_.ScalarLdFrag addr:$src3)),
6980                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6981               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6982                VR128X:$src1, VK1WM:$mask,
6983                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6985     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6986                (X86selects_mask VK1WM:$mask,
6987                 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6988                           (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6989                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6990               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6991                VR128X:$src1, VK1WM:$mask,
6992                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6994     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6995                (X86selects_mask VK1WM:$mask,
6996                 (MaskedOp _.FRC:$src2, _.FRC:$src3,
6997                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6998                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6999               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7000                VR128X:$src1, VK1WM:$mask,
7001                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7002                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7004     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7005                (X86selects_mask VK1WM:$mask,
7006                 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7007                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7008                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7009               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7010                VR128X:$src1, VK1WM:$mask,
7011                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7013     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7014                (X86selects_mask VK1WM:$mask,
7015                 (MaskedOp _.FRC:$src2,
7016                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7017                           _.FRC:$src3),
7018                 (_.EltVT ZeroFP)))))),
7019               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7020                VR128X:$src1, VK1WM:$mask,
7021                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7022                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7024     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7025                (X86selects_mask VK1WM:$mask,
7026                 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7027                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7028                 (_.EltVT ZeroFP)))))),
7029               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7030                VR128X:$src1, VK1WM:$mask,
7031                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7032                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7034     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7035                (X86selects_mask VK1WM:$mask,
7036                 (MaskedOp _.FRC:$src2,
7037                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7038                           (_.ScalarLdFrag addr:$src3)),
7039                 (_.EltVT ZeroFP)))))),
7040               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7041                VR128X:$src1, VK1WM:$mask,
7042                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7044     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7045                (X86selects_mask VK1WM:$mask,
7046                 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7047                           _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7048                 (_.EltVT ZeroFP)))))),
7049               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7050                VR128X:$src1, VK1WM:$mask,
7051                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7053     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7054                (X86selects_mask VK1WM:$mask,
7055                 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7056                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7057                 (_.EltVT ZeroFP)))))),
7058               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7059                VR128X:$src1, VK1WM:$mask,
7060                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7062     // Patterns with rounding mode.
7063     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7064                 (RndOp _.FRC:$src2,
7065                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7066                        _.FRC:$src3, (i32 timm:$rc)))))),
7067               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7068                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7069                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7071     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7072                 (RndOp _.FRC:$src2, _.FRC:$src3,
7073                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7074                        (i32 timm:$rc)))))),
7075               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7076                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7077                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7079     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7080                (X86selects_mask VK1WM:$mask,
7081                 (RndOp _.FRC:$src2,
7082                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7083                        _.FRC:$src3, (i32 timm:$rc)),
7084                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7085               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7086                VR128X:$src1, VK1WM:$mask,
7087                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7088                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7090     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7091                (X86selects_mask VK1WM:$mask,
7092                 (RndOp _.FRC:$src2, _.FRC:$src3,
7093                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7094                        (i32 timm:$rc)),
7095                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7096               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7097                VR128X:$src1, VK1WM:$mask,
7098                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7099                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7101     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7102                (X86selects_mask VK1WM:$mask,
7103                 (RndOp _.FRC:$src2,
7104                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7105                        _.FRC:$src3, (i32 timm:$rc)),
7106                 (_.EltVT ZeroFP)))))),
7107               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7108                VR128X:$src1, VK1WM:$mask,
7109                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7110                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7112     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7113                (X86selects_mask VK1WM:$mask,
7114                 (RndOp _.FRC:$src2, _.FRC:$src3,
7115                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7116                        (i32 timm:$rc)),
7117                 (_.EltVT ZeroFP)))))),
7118               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7119                VR128X:$src1, VK1WM:$mask,
7120                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7121                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7122   }
7124 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7125                                   X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7126 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7127                                   X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7128 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7129                                   X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7130 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7131                                   X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7133 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7134                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7135 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7136                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7137 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7138                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7139 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7140                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
7142 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7143                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7144 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7145                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7146 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7147                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7148 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7149                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7151 //===----------------------------------------------------------------------===//
7152 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7153 //===----------------------------------------------------------------------===//
7154 let Constraints = "$src1 = $dst" in {
7155 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7156                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7157   // NOTE: The SDNode have the multiply operands first with the add last.
7158   // This enables commuted load patterns to be autogenerated by tablegen.
7159   let ExeDomain = _.ExeDomain in {
7160   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7161           (ins _.RC:$src2, _.RC:$src3),
7162           OpcodeStr, "$src3, $src2", "$src2, $src3",
7163           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7164           T8, PD, EVEX, VVVV, Sched<[sched]>;
7166   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7167           (ins _.RC:$src2, _.MemOp:$src3),
7168           OpcodeStr, "$src3, $src2", "$src2, $src3",
7169           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7170           T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
7171                                 sched.ReadAfterFold]>;
7173   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7174             (ins _.RC:$src2, _.ScalarMemOp:$src3),
7175             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7176             !strconcat("$src2, ${src3}", _.BroadcastStr ),
7177             (OpNode _.RC:$src2,
7178                     (_.VT (_.BroadcastLdFrag addr:$src3)),
7179                     _.RC:$src1)>,
7180             T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7181                                           sched.ReadAfterFold]>;
7182   }
7184 } // Constraints = "$src1 = $dst"
7186 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7187                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7188   let Predicates = [HasIFMA] in {
7189     defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7190                       EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7191   }
7192   let Predicates = [HasVLX, HasIFMA] in {
7193     defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7194                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7195     defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7196                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7197   }
7200 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7201                                          SchedWriteVecIMul, avx512vl_i64_info>,
7202                                          REX_W;
7203 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7204                                          SchedWriteVecIMul, avx512vl_i64_info>,
7205                                          REX_W;
7207 //===----------------------------------------------------------------------===//
7208 // AVX-512  Scalar convert from sign integer to float/double
7209 //===----------------------------------------------------------------------===//
7211 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7212                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
7213                     X86MemOperand x86memop, PatFrag ld_frag, string asm,
7214                     string mem, list<Register> _Uses = [MXCSR],
7215                     bit _mayRaiseFPException = 1> {
7216 let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7217     mayRaiseFPException = _mayRaiseFPException in {
7218   let hasSideEffects = 0, isCodeGenOnly = 1 in {
7219     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7220               (ins DstVT.FRC:$src1, SrcRC:$src),
7221               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7222               EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7223     let mayLoad = 1 in
7224       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7225               (ins DstVT.FRC:$src1, x86memop:$src),
7226               asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7227               EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7228   } // hasSideEffects = 0
7229   def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7230                 (ins DstVT.RC:$src1, SrcRC:$src2),
7231                 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7232                 [(set DstVT.RC:$dst,
7233                       (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7234                EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7236   def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7237                 (ins DstVT.RC:$src1, x86memop:$src2),
7238                 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7239                 [(set DstVT.RC:$dst,
7240                       (OpNode (DstVT.VT DstVT.RC:$src1),
7241                                (ld_frag addr:$src2)))]>,
7242                 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7244   def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7245                   (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7246                   DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7249 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7250                                X86FoldableSchedWrite sched, RegisterClass SrcRC,
7251                                X86VectorVTInfo DstVT, string asm,
7252                                string mem> {
7253   let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7254   def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7255               (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7256               !strconcat(asm,
7257                   "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7258               [(set DstVT.RC:$dst,
7259                     (OpNode (DstVT.VT DstVT.RC:$src1),
7260                              SrcRC:$src2,
7261                              (i32 timm:$rc)))]>,
7262               EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7263   def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7264                   (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7265                   DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7268 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7269                                 X86FoldableSchedWrite sched,
7270                                 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7271                                 X86MemOperand x86memop, PatFrag ld_frag,
7272                                 string asm, string mem> {
7273   defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7274               avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7275                             ld_frag, asm, mem>, VEX_LIG;
7278 let Predicates = [HasAVX512] in {
7279 defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7280                                  WriteCvtI2SS, GR32,
7281                                  v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7282                                  TB, XS, EVEX_CD8<32, CD8VT1>;
7283 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7284                                  WriteCvtI2SS, GR64,
7285                                  v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7286                                  TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7287 defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7288                                  v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7289                                  TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7290 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7291                                  WriteCvtI2SD, GR64,
7292                                  v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7293                                  TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7295 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7296               (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7297 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7298               (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7300 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7301           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7302 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7303           (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7304 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7305           (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7306 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7307           (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7309 def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7310           (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7311 def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7312           (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7313 def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7314           (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7315 def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7316           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7318 defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7319                                   WriteCvtI2SS, GR32,
7320                                   v4f32x_info, i32mem, loadi32,
7321                                   "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>;
7322 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7323                                   WriteCvtI2SS, GR64,
7324                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7325                                   TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7326 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7327                                   i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7328                                   TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7329 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7330                                   WriteCvtI2SD, GR64,
7331                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7332                                   TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7334 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7335               (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7336 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7337               (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7339 def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7340           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7341 def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7342           (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7343 def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7344           (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7345 def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7346           (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7348 def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7349           (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7350 def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7351           (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7352 def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7353           (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7354 def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7355           (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7358 //===----------------------------------------------------------------------===//
7359 // AVX-512  Scalar convert from float/double to integer
7360 //===----------------------------------------------------------------------===//
7362 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7363                                   X86VectorVTInfo DstVT, SDNode OpNode,
7364                                   SDNode OpNodeRnd,
7365                                   X86FoldableSchedWrite sched, string asm,
7366                                   string aliasStr, Predicate prd = HasAVX512> {
7367   let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7368     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7369                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7370                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7371                 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7372     let Uses = [MXCSR] in
7373     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7374                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7375                  [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7376                  EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7377                  Sched<[sched]>;
7378     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7379                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7380                 [(set DstVT.RC:$dst, (OpNode
7381                       (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7382                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7383   } // Predicates = [prd]
7385   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7386           (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7387   def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7388           (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7389   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7390           (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7391                                           SrcVT.IntScalarMemOp:$src), 0, "att">;
7394 // Convert float/double to signed/unsigned int 32/64
7395 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7396                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7397                                    TB, XS, EVEX_CD8<32, CD8VT1>;
7398 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7399                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7400                                    TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7401 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7402                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7403                                    TB, XS, EVEX_CD8<32, CD8VT1>;
7404 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7405                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7406                                    TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7407 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7408                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7409                                    TB, XD, EVEX_CD8<64, CD8VT1>;
7410 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7411                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7412                                    TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7413 defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7414                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7415                                    TB, XD, EVEX_CD8<64, CD8VT1>;
7416 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7417                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7418                                    TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7420 multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7421                         X86VectorVTInfo DstVT, SDNode OpNode,
7422                         X86FoldableSchedWrite sched> {
7423   let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7424     let isCodeGenOnly = 1 in {
7425     def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7426                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7427                 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7428                 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7429     def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7430                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7431                 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7432                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7433     }
7434   } // Predicates = [HasAVX512]
7437 defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7438                        lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>;
7439 defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7440                        llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7441 defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7442                        lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>;
7443 defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7444                        llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7446 let Predicates = [HasAVX512] in {
7447   def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7448   def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7450   def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7451   def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7454 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7455 // which produce unnecessary vmovs{s,d} instructions
7456 let Predicates = [HasAVX512] in {
7457 def : Pat<(v4f32 (X86Movss
7458                    (v4f32 VR128X:$dst),
7459                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7460           (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7462 def : Pat<(v4f32 (X86Movss
7463                    (v4f32 VR128X:$dst),
7464                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7465           (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7467 def : Pat<(v4f32 (X86Movss
7468                    (v4f32 VR128X:$dst),
7469                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7470           (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7472 def : Pat<(v4f32 (X86Movss
7473                    (v4f32 VR128X:$dst),
7474                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7475           (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7477 def : Pat<(v2f64 (X86Movsd
7478                    (v2f64 VR128X:$dst),
7479                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7480           (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7482 def : Pat<(v2f64 (X86Movsd
7483                    (v2f64 VR128X:$dst),
7484                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7485           (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7487 def : Pat<(v2f64 (X86Movsd
7488                    (v2f64 VR128X:$dst),
7489                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7490           (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7492 def : Pat<(v2f64 (X86Movsd
7493                    (v2f64 VR128X:$dst),
7494                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7495           (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7497 def : Pat<(v4f32 (X86Movss
7498                    (v4f32 VR128X:$dst),
7499                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7500           (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7502 def : Pat<(v4f32 (X86Movss
7503                    (v4f32 VR128X:$dst),
7504                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7505           (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7507 def : Pat<(v4f32 (X86Movss
7508                    (v4f32 VR128X:$dst),
7509                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7510           (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7512 def : Pat<(v4f32 (X86Movss
7513                    (v4f32 VR128X:$dst),
7514                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7515           (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7517 def : Pat<(v2f64 (X86Movsd
7518                    (v2f64 VR128X:$dst),
7519                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7520           (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7522 def : Pat<(v2f64 (X86Movsd
7523                    (v2f64 VR128X:$dst),
7524                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7525           (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7527 def : Pat<(v2f64 (X86Movsd
7528                    (v2f64 VR128X:$dst),
7529                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7530           (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7532 def : Pat<(v2f64 (X86Movsd
7533                    (v2f64 VR128X:$dst),
7534                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7535           (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7536 } // Predicates = [HasAVX512]
7538 // Convert float/double to signed/unsigned int 32/64 with truncation
7539 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7540                             X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7541                             SDNode OpNodeInt, SDNode OpNodeSAE,
7542                             X86FoldableSchedWrite sched, string aliasStr,
7543                             Predicate prd = HasAVX512> {
7544 let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7545   let isCodeGenOnly = 1 in {
7546   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7547               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7548               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7549               EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7550   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7551               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7552               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7553               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7554   }
7556   def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7557             !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7558            [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7559            EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7560   let Uses = [MXCSR] in
7561   def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7562             !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7563             [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7564                                   EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7565   def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7566               (ins _SrcRC.IntScalarMemOp:$src),
7567               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7568               [(set _DstRC.RC:$dst,
7569                 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7570               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7571 } // Predicates = [prd]
7573   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7574           (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7575   def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7576           (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7577   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7578           (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7579                                           _SrcRC.IntScalarMemOp:$src), 0, "att">;
7582 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7583                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7584                         "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7585 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7586                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7587                         "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7588 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7589                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7590                         "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7591 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7592                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7593                         "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7595 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7596                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7597                         "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7598 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7599                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7600                         "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>;
7601 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7602                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7603                         "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7604 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7605                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7606                         "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7608 //===----------------------------------------------------------------------===//
7609 // AVX-512  Convert form float to double and back
7610 //===----------------------------------------------------------------------===//
7612 let Uses = [MXCSR], mayRaiseFPException = 1 in
7613 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7614                                 X86VectorVTInfo _Src, SDNode OpNode,
7615                                 X86FoldableSchedWrite sched> {
7616   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7617                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7618                          "$src2, $src1", "$src1, $src2",
7619                          (_.VT (OpNode (_.VT _.RC:$src1),
7620                                        (_Src.VT _Src.RC:$src2)))>,
7621                          EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7622   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7623                          (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7624                          "$src2, $src1", "$src1, $src2",
7625                          (_.VT (OpNode (_.VT _.RC:$src1),
7626                                   (_Src.ScalarIntMemFrags addr:$src2)))>,
7627                          EVEX, VVVV, VEX_LIG,
7628                          Sched<[sched.Folded, sched.ReadAfterFold]>;
7630   let isCodeGenOnly = 1, hasSideEffects = 0 in {
7631     def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7632                (ins _.FRC:$src1, _Src.FRC:$src2),
7633                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7634                EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7635     let mayLoad = 1 in
7636     def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7637                (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7638                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7639                EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7640   }
7643 // Scalar Conversion with SAE - suppress all exceptions
7644 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7645                                     X86VectorVTInfo _Src, SDNode OpNodeSAE,
7646                                     X86FoldableSchedWrite sched> {
7647   let Uses = [MXCSR] in
7648   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7649                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7650                         "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7651                         (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7652                                          (_Src.VT _Src.RC:$src2)))>,
7653                         EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
7656 // Scalar Conversion with rounding control (RC)
7657 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7658                                    X86VectorVTInfo _Src, SDNode OpNodeRnd,
7659                                    X86FoldableSchedWrite sched> {
7660   let Uses = [MXCSR] in
7661   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7662                         (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7663                         "$rc, $src2, $src1", "$src1, $src2, $rc",
7664                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7665                                          (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7666                         EVEX, VVVV, VEX_LIG, Sched<[sched]>,
7667                         EVEX_B, EVEX_RC;
7669 multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7670                                       SDNode OpNode, SDNode OpNodeRnd,
7671                                       X86FoldableSchedWrite sched,
7672                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
7673                                       Predicate prd = HasAVX512> {
7674   let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7675     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7676              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7677                                OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7678   }
7681 multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7682                                        SDNode OpNode, SDNode OpNodeSAE,
7683                                        X86FoldableSchedWrite sched,
7684                                        X86VectorVTInfo _src, X86VectorVTInfo _dst,
7685                                        Predicate prd = HasAVX512> {
7686   let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7687     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7688              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7689              EVEX_CD8<_src.EltSize, CD8VT1>;
7690   }
7692 defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7693                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7694                                          f32x_info>, TB, XD, REX_W;
7695 defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7696                                           X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7697                                           f64x_info>, TB, XS;
7698 defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7699                                           X86froundsRnd, WriteCvtSD2SS, f64x_info,
7700                                           f16x_info, HasFP16>, T_MAP5, XD, REX_W;
7701 defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7702                                           X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7703                                           f64x_info, HasFP16>, T_MAP5, XS;
7704 defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7705                                           X86froundsRnd, WriteCvtSD2SS, f32x_info,
7706                                           f16x_info, HasFP16>, T_MAP5;
7707 defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7708                                           X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7709                                           f32x_info, HasFP16>, T_MAP6;
7711 def : Pat<(f64 (any_fpextend FR32X:$src)),
7712           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7713           Requires<[HasAVX512]>;
7714 def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7715           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7716           Requires<[HasAVX512, OptForSize]>;
7718 def : Pat<(f32 (any_fpround FR64X:$src)),
7719           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7720            Requires<[HasAVX512]>;
7722 def : Pat<(f32 (any_fpextend FR16X:$src)),
7723           (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7724           Requires<[HasFP16]>;
7725 def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
7726           (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
7727           Requires<[HasFP16, OptForSize]>;
7729 def : Pat<(f64 (any_fpextend FR16X:$src)),
7730           (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
7731           Requires<[HasFP16]>;
7732 def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
7733           (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7734           Requires<[HasFP16, OptForSize]>;
7736 def : Pat<(f16 (any_fpround FR32X:$src)),
7737           (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
7738            Requires<[HasFP16]>;
7739 def : Pat<(f16 (any_fpround FR64X:$src)),
7740           (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
7741            Requires<[HasFP16]>;
7743 def : Pat<(v4f32 (X86Movss
7744                    (v4f32 VR128X:$dst),
7745                    (v4f32 (scalar_to_vector
7746                      (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7747           (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7748           Requires<[HasAVX512]>;
7750 def : Pat<(v2f64 (X86Movsd
7751                    (v2f64 VR128X:$dst),
7752                    (v2f64 (scalar_to_vector
7753                      (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7754           (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7755           Requires<[HasAVX512]>;
7757 //===----------------------------------------------------------------------===//
7758 // AVX-512  Vector convert from signed/unsigned integer to float/double
7759 //          and from float/double to signed/unsigned integer
7760 //===----------------------------------------------------------------------===//
7762 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7763                           X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7764                           X86FoldableSchedWrite sched,
7765                           string Broadcast = _.BroadcastStr,
7766                           string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7767                           RegisterClass MaskRC = _.KRCWM,
7768                           dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7769                           dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7770 let Uses = [MXCSR], mayRaiseFPException = 1 in {
7771   defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7772                          (ins _Src.RC:$src),
7773                          (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7774                          (ins MaskRC:$mask, _Src.RC:$src),
7775                           OpcodeStr, "$src", "$src",
7776                          (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7777                          (vselect_mask MaskRC:$mask,
7778                                        (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7779                                        _.RC:$src0),
7780                          (vselect_mask MaskRC:$mask,
7781                                        (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7782                                        _.ImmAllZerosV)>,
7783                          EVEX, Sched<[sched]>;
7785   defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7786                          (ins MemOp:$src),
7787                          (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7788                          (ins MaskRC:$mask, MemOp:$src),
7789                          OpcodeStr#Alias, "$src", "$src",
7790                          LdDAG,
7791                          (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7792                          (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7793                          EVEX, Sched<[sched.Folded]>;
7795   defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7796                          (ins _Src.ScalarMemOp:$src),
7797                          (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7798                          (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7799                          OpcodeStr,
7800                          "${src}"#Broadcast, "${src}"#Broadcast,
7801                          (_.VT (OpNode (_Src.VT
7802                                   (_Src.BroadcastLdFrag addr:$src))
7803                             )),
7804                          (vselect_mask MaskRC:$mask,
7805                                        (_.VT
7806                                         (MaskOpNode
7807                                          (_Src.VT
7808                                           (_Src.BroadcastLdFrag addr:$src)))),
7809                                        _.RC:$src0),
7810                          (vselect_mask MaskRC:$mask,
7811                                        (_.VT
7812                                         (MaskOpNode
7813                                          (_Src.VT
7814                                           (_Src.BroadcastLdFrag addr:$src)))),
7815                                        _.ImmAllZerosV)>,
7816                          EVEX, EVEX_B, Sched<[sched.Folded]>;
7817   }
7819 // Conversion with SAE - suppress all exceptions
7820 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7821                               X86VectorVTInfo _Src, SDNode OpNodeSAE,
7822                               X86FoldableSchedWrite sched> {
7823   let Uses = [MXCSR] in
7824   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7825                         (ins _Src.RC:$src), OpcodeStr,
7826                         "{sae}, $src", "$src, {sae}",
7827                         (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7828                         EVEX, EVEX_B, Sched<[sched]>;
7831 // Conversion with rounding control (RC)
7832 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7833                          X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
7834                          X86FoldableSchedWrite sched> {
7835   let Uses = [MXCSR] in
7836   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7837                         (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7838                         "$rc, $src", "$src, $rc",
7839                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7840                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7843 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7844 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7845                                 X86VectorVTInfo _Src, SDPatternOperator OpNode,
7846                                 SDNode MaskOpNode,
7847                                 X86FoldableSchedWrite sched,
7848                                 string Broadcast = _.BroadcastStr,
7849                                 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7850                                 RegisterClass MaskRC = _.KRCWM>
7851   : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
7852                    Alias, MemOp, MaskRC,
7853                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
7854                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7856 // Extend [Float to Double, Half to Float]
7857 multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
7858                              AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7859                              X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
7860   let Predicates = [prd] in {
7861     defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
7862                             any_fpextend, fpextend, sched.ZMM>,
7863              avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
7864                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7865   }
7866   let Predicates = [prd, HasVLX] in {
7867     defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
7868                                X86any_vfpext, X86vfpext, sched.XMM,
7869                                _dst.info128.BroadcastStr,
7870                                "", f64mem>, EVEX_V128;
7871     defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
7872                                any_fpextend, fpextend, sched.YMM>, EVEX_V256;
7873   }
7876 // Truncate [Double to Float, Float to Half]
7877 multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
7878                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7879                             X86SchedWriteWidths sched, Predicate prd = HasAVX512,
7880                             PatFrag bcast128 = _src.info128.BroadcastLdFrag,
7881                             PatFrag loadVT128 = _src.info128.LdFrag,
7882                             RegisterClass maskRC128 = _src.info128.KRCWM> {
7883   let Predicates = [prd] in {
7884     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
7885                             X86any_vfpround, X86vfpround, sched.ZMM>,
7886              avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
7887                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
7888   }
7889   let Predicates = [prd, HasVLX] in {
7890     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
7891                                null_frag, null_frag, sched.XMM,
7892                                _src.info128.BroadcastStr, "{x}",
7893                                f128mem, maskRC128>, EVEX_V128;
7894     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
7895                                X86any_vfpround, X86vfpround,
7896                                sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
7898     // Special patterns to allow use of X86vmfpround for masking. Instruction
7899     // patterns have been disabled with null_frag.
7900     def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
7901               (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
7902     def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
7903                             maskRC128:$mask),
7904               (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
7905     def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
7906                             maskRC128:$mask),
7907               (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
7909     def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
7910               (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
7911     def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
7912                             maskRC128:$mask),
7913               (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7914     def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
7915                             maskRC128:$mask),
7916               (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
7918     def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
7919               (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
7920     def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7921                             (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
7922               (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7923     def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7924                             _dst.info128.ImmAllZerosV, maskRC128:$mask),
7925               (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
7926   }
7928   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7929                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7930   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7931                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7932                   VK2WM:$mask, VR128X:$src), 0, "att">;
7933   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
7934                   "$dst {${mask}} {z}, $src}",
7935                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7936                   VK2WM:$mask, VR128X:$src), 0, "att">;
7937   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7938                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7939   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7940                   "$dst {${mask}}, ${src}{1to2}}",
7941                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7942                   VK2WM:$mask, f64mem:$src), 0, "att">;
7943   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7944                   "$dst {${mask}} {z}, ${src}{1to2}}",
7945                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7946                   VK2WM:$mask, f64mem:$src), 0, "att">;
7948   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7949                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7950   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7951                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7952                   VK4WM:$mask, VR256X:$src), 0, "att">;
7953   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
7954                   "$dst {${mask}} {z}, $src}",
7955                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7956                   VK4WM:$mask, VR256X:$src), 0, "att">;
7957   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7958                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7959   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7960                   "$dst {${mask}}, ${src}{1to4}}",
7961                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7962                   VK4WM:$mask, f64mem:$src), 0, "att">;
7963   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7964                   "$dst {${mask}} {z}, ${src}{1to4}}",
7965                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7966                   VK4WM:$mask, f64mem:$src), 0, "att">;
7969 defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
7970                                   avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
7971                                   REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
7972 defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
7973                                    avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
7974                                    TB, EVEX_CD8<32, CD8VH>;
7976 // Extend Half to Double
7977 multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
7978                             X86SchedWriteWidths sched> {
7979   let Predicates = [HasFP16] in {
7980     defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
7981                                   any_fpextend, fpextend, sched.ZMM>,
7982              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
7983                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7984     def : Pat<(v8f64 (extloadv8f16 addr:$src)),
7985                 (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
7986   }
7987   let Predicates = [HasFP16, HasVLX] in {
7988     defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
7989                                      X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
7990                                      f32mem>, EVEX_V128;
7991     defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
7992                                      X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
7993                                      f64mem>, EVEX_V256;
7994   }
7997 // Truncate Double to Half
7998 multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7999   let Predicates = [HasFP16] in {
8000     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8001                             X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8002              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8003                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
8004   }
8005   let Predicates = [HasFP16, HasVLX] in {
8006     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8007                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8008                                VK2WM>, EVEX_V128;
8009     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8010                                null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8011                                VK4WM>, EVEX_V256;
8012   }
8013   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8014                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8015                   VR128X:$src), 0, "att">;
8016   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8017                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8018                   VK2WM:$mask, VR128X:$src), 0, "att">;
8019   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8020                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8021                   VK2WM:$mask, VR128X:$src), 0, "att">;
8022   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8023                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8024                   i64mem:$src), 0, "att">;
8025   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8026                   "$dst {${mask}}, ${src}{1to2}}",
8027                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8028                   VK2WM:$mask, i64mem:$src), 0, "att">;
8029   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8030                   "$dst {${mask}} {z}, ${src}{1to2}}",
8031                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8032                   VK2WM:$mask, i64mem:$src), 0, "att">;
8034   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8035                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8036                   VR256X:$src), 0, "att">;
8037   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8038                   "$dst {${mask}}, $src}",
8039                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8040                   VK4WM:$mask, VR256X:$src), 0, "att">;
8041   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8042                   "$dst {${mask}} {z}, $src}",
8043                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8044                   VK4WM:$mask, VR256X:$src), 0, "att">;
8045   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8046                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8047                   i64mem:$src), 0, "att">;
8048   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8049                   "$dst {${mask}}, ${src}{1to4}}",
8050                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8051                   VK4WM:$mask, i64mem:$src), 0, "att">;
8052   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8053                   "$dst {${mask}} {z}, ${src}{1to4}}",
8054                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8055                   VK4WM:$mask, i64mem:$src), 0, "att">;
8057   def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8058                   (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8059                   VR512:$src), 0, "att">;
8060   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8061                   "$dst {${mask}}, $src}",
8062                   (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8063                   VK8WM:$mask, VR512:$src), 0, "att">;
8064   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8065                   "$dst {${mask}} {z}, $src}",
8066                   (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8067                   VK8WM:$mask, VR512:$src), 0, "att">;
8068   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8069                   (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8070                   i64mem:$src), 0, "att">;
8071   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8072                   "$dst {${mask}}, ${src}{1to8}}",
8073                   (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8074                   VK8WM:$mask, i64mem:$src), 0, "att">;
8075   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8076                   "$dst {${mask}} {z}, ${src}{1to8}}",
8077                   (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8078                   VK8WM:$mask, i64mem:$src), 0, "att">;
8081 defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8082                                    avx512vl_f32_info, SchedWriteCvtPD2PS,
8083                                    HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>;
8084 defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8085                                     avx512vl_f16_info, SchedWriteCvtPS2PD,
8086                                     HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>;
8087 defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8088                                  REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>;
8089 defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8090                                  T_MAP5, EVEX_CD8<16, CD8VQ>;
8092 let Predicates = [HasFP16, HasVLX] in {
8093   // Special patterns to allow use of X86vmfpround for masking. Instruction
8094   // patterns have been disabled with null_frag.
8095   def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8096             (VCVTPD2PHZ256rr VR256X:$src)>;
8097   def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8098                           VK4WM:$mask)),
8099             (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8100   def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8101                           VK4WM:$mask),
8102             (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8104   def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8105             (VCVTPD2PHZ256rm addr:$src)>;
8106   def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8107                           VK4WM:$mask),
8108             (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8109   def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8110                           VK4WM:$mask),
8111             (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8113   def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8114             (VCVTPD2PHZ256rmb addr:$src)>;
8115   def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8116                           (v8f16 VR128X:$src0), VK4WM:$mask),
8117             (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8118   def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8119                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8120             (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8122   def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8123             (VCVTPD2PHZ128rr VR128X:$src)>;
8124   def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8125                           VK2WM:$mask),
8126             (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8127   def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8128                           VK2WM:$mask),
8129             (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8131   def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8132             (VCVTPD2PHZ128rm addr:$src)>;
8133   def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8134                           VK2WM:$mask),
8135             (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8136   def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8137                           VK2WM:$mask),
8138             (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8140   def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8141             (VCVTPD2PHZ128rmb addr:$src)>;
8142   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8143                           (v8f16 VR128X:$src0), VK2WM:$mask),
8144             (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8145   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8146                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8147             (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8150 // Convert Signed/Unsigned Doubleword to Double
8151 let Uses = []<Register>, mayRaiseFPException = 0 in
8152 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8153                            SDNode MaskOpNode, SDPatternOperator OpNode128,
8154                            SDNode MaskOpNode128,
8155                            X86SchedWriteWidths sched> {
8156   // No rounding in this op
8157   let Predicates = [HasAVX512] in
8158     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8159                             MaskOpNode, sched.ZMM>, EVEX_V512;
8161   let Predicates = [HasVLX] in {
8162     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8163                                OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8164                                "", i64mem, VK2WM,
8165                                (v2f64 (OpNode128 (bc_v4i32
8166                                 (v2i64
8167                                  (scalar_to_vector (loadi64 addr:$src)))))),
8168                                (v2f64 (MaskOpNode128 (bc_v4i32
8169                                 (v2i64
8170                                  (scalar_to_vector (loadi64 addr:$src))))))>,
8171                                EVEX_V128;
8172     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8173                                MaskOpNode, sched.YMM>, EVEX_V256;
8174   }
8177 // Convert Signed/Unsigned Doubleword to Float
8178 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8179                            SDNode MaskOpNode, SDNode OpNodeRnd,
8180                            X86SchedWriteWidths sched> {
8181   let Predicates = [HasAVX512] in
8182     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8183                             MaskOpNode, sched.ZMM>,
8184              avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8185                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8187   let Predicates = [HasVLX] in {
8188     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8189                                MaskOpNode, sched.XMM>, EVEX_V128;
8190     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8191                                MaskOpNode, sched.YMM>, EVEX_V256;
8192   }
8195 // Convert Float to Signed/Unsigned Doubleword with truncation
8196 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8197                             SDNode MaskOpNode,
8198                             SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8199   let Predicates = [HasAVX512] in {
8200     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8201                             MaskOpNode, sched.ZMM>,
8202              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8203                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
8204   }
8205   let Predicates = [HasVLX] in {
8206     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8207                                MaskOpNode, sched.XMM>, EVEX_V128;
8208     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8209                                MaskOpNode, sched.YMM>, EVEX_V256;
8210   }
8213 // Convert Float to Signed/Unsigned Doubleword
8214 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8215                            SDNode MaskOpNode, SDNode OpNodeRnd,
8216                            X86SchedWriteWidths sched> {
8217   let Predicates = [HasAVX512] in {
8218     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8219                             MaskOpNode, sched.ZMM>,
8220              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8221                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8222   }
8223   let Predicates = [HasVLX] in {
8224     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8225                                MaskOpNode, sched.XMM>, EVEX_V128;
8226     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8227                                MaskOpNode, sched.YMM>, EVEX_V256;
8228   }
8231 // Convert Double to Signed/Unsigned Doubleword with truncation
8232 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8233                             SDNode MaskOpNode, SDNode OpNodeSAE,
8234                             X86SchedWriteWidths sched> {
8235   let Predicates = [HasAVX512] in {
8236     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8237                             MaskOpNode, sched.ZMM>,
8238              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8239                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
8240   }
8241   let Predicates = [HasVLX] in {
8242     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8243     // memory forms of these instructions in Asm Parser. They have the same
8244     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8245     // due to the same reason.
8246     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8247                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8248                                VK2WM>, EVEX_V128;
8249     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8250                                MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8251   }
8253   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8254                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8255                   VR128X:$src), 0, "att">;
8256   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8257                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8258                   VK2WM:$mask, VR128X:$src), 0, "att">;
8259   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8260                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8261                   VK2WM:$mask, VR128X:$src), 0, "att">;
8262   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8263                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8264                   f64mem:$src), 0, "att">;
8265   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8266                   "$dst {${mask}}, ${src}{1to2}}",
8267                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8268                   VK2WM:$mask, f64mem:$src), 0, "att">;
8269   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8270                   "$dst {${mask}} {z}, ${src}{1to2}}",
8271                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8272                   VK2WM:$mask, f64mem:$src), 0, "att">;
8274   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8275                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8276                   VR256X:$src), 0, "att">;
8277   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8278                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8279                   VK4WM:$mask, VR256X:$src), 0, "att">;
8280   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8281                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8282                   VK4WM:$mask, VR256X:$src), 0, "att">;
8283   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8284                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8285                   f64mem:$src), 0, "att">;
8286   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8287                   "$dst {${mask}}, ${src}{1to4}}",
8288                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8289                   VK4WM:$mask, f64mem:$src), 0, "att">;
8290   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8291                   "$dst {${mask}} {z}, ${src}{1to4}}",
8292                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8293                   VK4WM:$mask, f64mem:$src), 0, "att">;
8296 // Convert Double to Signed/Unsigned Doubleword
8297 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8298                            SDNode MaskOpNode, SDNode OpNodeRnd,
8299                            X86SchedWriteWidths sched> {
8300   let Predicates = [HasAVX512] in {
8301     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8302                             MaskOpNode, sched.ZMM>,
8303              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8304                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8305   }
8306   let Predicates = [HasVLX] in {
8307     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8308     // memory forms of these instructions in Asm Parcer. They have the same
8309     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8310     // due to the same reason.
8311     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8312                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8313                                VK2WM>, EVEX_V128;
8314     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8315                                MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8316   }
8318   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8319                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8320   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8321                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8322                   VK2WM:$mask, VR128X:$src), 0, "att">;
8323   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8324                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8325                   VK2WM:$mask, VR128X:$src), 0, "att">;
8326   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8327                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8328                   f64mem:$src), 0, "att">;
8329   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8330                   "$dst {${mask}}, ${src}{1to2}}",
8331                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8332                   VK2WM:$mask, f64mem:$src), 0, "att">;
8333   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8334                   "$dst {${mask}} {z}, ${src}{1to2}}",
8335                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8336                   VK2WM:$mask, f64mem:$src), 0, "att">;
8338   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8339                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8340   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8341                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8342                   VK4WM:$mask, VR256X:$src), 0, "att">;
8343   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8344                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8345                   VK4WM:$mask, VR256X:$src), 0, "att">;
8346   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8347                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8348                   f64mem:$src), 0, "att">;
8349   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8350                   "$dst {${mask}}, ${src}{1to4}}",
8351                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8352                   VK4WM:$mask, f64mem:$src), 0, "att">;
8353   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8354                   "$dst {${mask}} {z}, ${src}{1to4}}",
8355                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8356                   VK4WM:$mask, f64mem:$src), 0, "att">;
8359 // Convert Double to Signed/Unsigned Quardword
8360 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8361                            SDNode MaskOpNode, SDNode OpNodeRnd,
8362                            X86SchedWriteWidths sched> {
8363   let Predicates = [HasDQI] in {
8364     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8365                             MaskOpNode, sched.ZMM>,
8366              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8367                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8368   }
8369   let Predicates = [HasDQI, HasVLX] in {
8370     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8371                                MaskOpNode, sched.XMM>, EVEX_V128;
8372     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8373                                MaskOpNode, sched.YMM>, EVEX_V256;
8374   }
8377 // Convert Double to Signed/Unsigned Quardword with truncation
8378 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8379                             SDNode MaskOpNode, SDNode OpNodeRnd,
8380                             X86SchedWriteWidths sched> {
8381   let Predicates = [HasDQI] in {
8382     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8383                             MaskOpNode, sched.ZMM>,
8384              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8385                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8386   }
8387   let Predicates = [HasDQI, HasVLX] in {
8388     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8389                                MaskOpNode, sched.XMM>, EVEX_V128;
8390     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8391                                MaskOpNode, sched.YMM>, EVEX_V256;
8392   }
8395 // Convert Signed/Unsigned Quardword to Double
8396 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8397                            SDNode MaskOpNode, SDNode OpNodeRnd,
8398                            X86SchedWriteWidths sched> {
8399   let Predicates = [HasDQI] in {
8400     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8401                             MaskOpNode, sched.ZMM>,
8402              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8403                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8404   }
8405   let Predicates = [HasDQI, HasVLX] in {
8406     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8407                                MaskOpNode, sched.XMM>, EVEX_V128;
8408     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8409                                MaskOpNode, sched.YMM>, EVEX_V256;
8410   }
8413 // Convert Float to Signed/Unsigned Quardword
8414 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8415                            SDNode MaskOpNode, SDNode OpNodeRnd,
8416                            X86SchedWriteWidths sched> {
8417   let Predicates = [HasDQI] in {
8418     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8419                             MaskOpNode, sched.ZMM>,
8420              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8421                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8422   }
8423   let Predicates = [HasDQI, HasVLX] in {
8424     // Explicitly specified broadcast string, since we take only 2 elements
8425     // from v4f32x_info source
8426     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8427                                MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8428                                (v2i64 (OpNode (bc_v4f32
8429                                 (v2f64
8430                                  (scalar_to_vector (loadf64 addr:$src)))))),
8431                                (v2i64 (MaskOpNode (bc_v4f32
8432                                 (v2f64
8433                                  (scalar_to_vector (loadf64 addr:$src))))))>,
8434                                EVEX_V128;
8435     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8436                                MaskOpNode, sched.YMM>, EVEX_V256;
8437   }
8440 // Convert Float to Signed/Unsigned Quardword with truncation
8441 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8442                             SDNode MaskOpNode, SDNode OpNodeRnd,
8443                             X86SchedWriteWidths sched> {
8444   let Predicates = [HasDQI] in {
8445     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8446                             MaskOpNode, sched.ZMM>,
8447              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8448                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8449   }
8450   let Predicates = [HasDQI, HasVLX] in {
8451     // Explicitly specified broadcast string, since we take only 2 elements
8452     // from v4f32x_info source
8453     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8454                                MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8455                                (v2i64 (OpNode (bc_v4f32
8456                                 (v2f64
8457                                  (scalar_to_vector (loadf64 addr:$src)))))),
8458                                (v2i64 (MaskOpNode (bc_v4f32
8459                                 (v2f64
8460                                  (scalar_to_vector (loadf64 addr:$src))))))>,
8461                                EVEX_V128;
8462     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8463                                MaskOpNode, sched.YMM>, EVEX_V256;
8464   }
8467 // Convert Signed/Unsigned Quardword to Float
8468 // Also Convert Signed/Unsigned Doubleword to Half
8469 multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8470                                  SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8471                                  SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8472                                  AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8473                                  X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8474   let Predicates = [prd] in {
8475     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8476                             MaskOpNode, sched.ZMM>,
8477              avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8478                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8479   }
8480   let Predicates = [prd, HasVLX] in {
8481     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8482     // memory forms of these instructions in Asm Parcer. They have the same
8483     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8484     // due to the same reason.
8485     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8486                                null_frag, sched.XMM, _src.info128.BroadcastStr,
8487                                "{x}", i128mem, _src.info128.KRCWM>,
8488                                EVEX_V128;
8489     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8490                                MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8491                                "{y}">, EVEX_V256;
8493     // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8494     // patterns have been disabled with null_frag.
8495     def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8496               (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8497     def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8498                              _src.info128.KRCWM:$mask),
8499               (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8500     def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8501                              _src.info128.KRCWM:$mask),
8502               (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8504     def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8505               (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8506     def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8507                              _src.info128.KRCWM:$mask),
8508               (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8509     def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8510                              _src.info128.KRCWM:$mask),
8511               (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8513     def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8514               (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8515     def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8516                              (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8517               (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8518     def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8519                              _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8520               (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8521   }
8523   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8524                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8525                   VR128X:$src), 0, "att">;
8526   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8527                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8528                   VK2WM:$mask, VR128X:$src), 0, "att">;
8529   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8530                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8531                   VK2WM:$mask, VR128X:$src), 0, "att">;
8532   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8533                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8534                   i64mem:$src), 0, "att">;
8535   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8536                   "$dst {${mask}}, ${src}{1to2}}",
8537                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8538                   VK2WM:$mask, i64mem:$src), 0, "att">;
8539   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8540                   "$dst {${mask}} {z}, ${src}{1to2}}",
8541                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8542                   VK2WM:$mask, i64mem:$src), 0, "att">;
8544   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8545                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8546                   VR256X:$src), 0, "att">;
8547   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8548                   "$dst {${mask}}, $src}",
8549                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8550                   VK4WM:$mask, VR256X:$src), 0, "att">;
8551   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8552                   "$dst {${mask}} {z}, $src}",
8553                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8554                   VK4WM:$mask, VR256X:$src), 0, "att">;
8555   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8556                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8557                   i64mem:$src), 0, "att">;
8558   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8559                   "$dst {${mask}}, ${src}{1to4}}",
8560                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8561                   VK4WM:$mask, i64mem:$src), 0, "att">;
8562   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8563                   "$dst {${mask}} {z}, ${src}{1to4}}",
8564                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8565                   VK4WM:$mask, i64mem:$src), 0, "att">;
8568 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8569                                  X86any_VSintToFP, X86VSintToFP,
8570                                  SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8572 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8573                                 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8574                                 TB, EVEX_CD8<32, CD8VF>;
8576 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8577                                  X86cvttp2si, X86cvttp2siSAE,
8578                                  SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>;
8580 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8581                                  X86cvttp2si, X86cvttp2siSAE,
8582                                  SchedWriteCvtPD2DQ>,
8583                                  TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
8585 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8586                                  X86cvttp2ui, X86cvttp2uiSAE,
8587                                  SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>;
8589 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8590                                  X86cvttp2ui, X86cvttp2uiSAE,
8591                                  SchedWriteCvtPD2DQ>,
8592                                  TB, REX_W, EVEX_CD8<64, CD8VF>;
8594 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8595                                   uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8596                                   SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8598 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8599                                  uint_to_fp, X86VUintToFpRnd,
8600                                  SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>;
8602 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8603                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8604                                  EVEX_CD8<32, CD8VF>;
8606 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8607                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD,
8608                                  REX_W, EVEX_CD8<64, CD8VF>;
8610 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8611                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8612                                  TB, EVEX_CD8<32, CD8VF>;
8614 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8615                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8616                                  TB, EVEX_CD8<64, CD8VF>;
8618 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8619                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W,
8620                                  TB, PD, EVEX_CD8<64, CD8VF>;
8622 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8623                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8624                                  EVEX_CD8<32, CD8VH>;
8626 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8627                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8628                                  TB, PD, EVEX_CD8<64, CD8VF>;
8630 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8631                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8632                                  EVEX_CD8<32, CD8VH>;
8634 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8635                                  X86cvttp2si, X86cvttp2siSAE,
8636                                  SchedWriteCvtPD2DQ>, REX_W,
8637                                  TB, PD, EVEX_CD8<64, CD8VF>;
8639 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8640                                  X86cvttp2si, X86cvttp2siSAE,
8641                                  SchedWriteCvtPS2DQ>, TB, PD,
8642                                  EVEX_CD8<32, CD8VH>;
8644 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8645                                  X86cvttp2ui, X86cvttp2uiSAE,
8646                                  SchedWriteCvtPD2DQ>, REX_W,
8647                                  TB, PD, EVEX_CD8<64, CD8VF>;
8649 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8650                                  X86cvttp2ui, X86cvttp2uiSAE,
8651                                  SchedWriteCvtPS2DQ>, TB, PD,
8652                                  EVEX_CD8<32, CD8VH>;
8654 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8655                             sint_to_fp, X86VSintToFpRnd,
8656                             SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8658 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8659                             uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8660                             REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8662 defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8663                             X86any_VSintToFP, X86VMSintToFP,
8664                             X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8665                             SchedWriteCvtDQ2PS, HasFP16>,
8666                             T_MAP5, EVEX_CD8<32, CD8VF>;
8668 defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8669                             X86any_VUintToFP, X86VMUintToFP,
8670                             X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8671                             SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD,
8672                             EVEX_CD8<32, CD8VF>;
8674 defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8675                             X86any_VSintToFP, X86VMSintToFP,
8676                             X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8677                             SchedWriteCvtDQ2PS>, REX_W, TB,
8678                             EVEX_CD8<64, CD8VF>;
8680 defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8681                             X86any_VUintToFP, X86VMUintToFP,
8682                             X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8683                             SchedWriteCvtDQ2PS>, REX_W, TB, XD,
8684                             EVEX_CD8<64, CD8VF>;
8686 let Predicates = [HasVLX] in {
8687   // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8688   // patterns have been disabled with null_frag.
8689   def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8690             (VCVTPD2DQZ128rr VR128X:$src)>;
8691   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8692                           VK2WM:$mask),
8693             (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8694   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8695                           VK2WM:$mask),
8696             (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8698   def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8699             (VCVTPD2DQZ128rm addr:$src)>;
8700   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8701                           VK2WM:$mask),
8702             (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8703   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8704                           VK2WM:$mask),
8705             (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8707   def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8708             (VCVTPD2DQZ128rmb addr:$src)>;
8709   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8710                           (v4i32 VR128X:$src0), VK2WM:$mask),
8711             (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8712   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8713                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8714             (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8716   // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8717   // patterns have been disabled with null_frag.
8718   def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8719             (VCVTTPD2DQZ128rr VR128X:$src)>;
8720   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8721                           VK2WM:$mask),
8722             (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8723   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8724                           VK2WM:$mask),
8725             (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8727   def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8728             (VCVTTPD2DQZ128rm addr:$src)>;
8729   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8730                           VK2WM:$mask),
8731             (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8732   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8733                           VK2WM:$mask),
8734             (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8736   def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8737             (VCVTTPD2DQZ128rmb addr:$src)>;
8738   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8739                           (v4i32 VR128X:$src0), VK2WM:$mask),
8740             (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8741   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8742                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8743             (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8745   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8746   // patterns have been disabled with null_frag.
8747   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8748             (VCVTPD2UDQZ128rr VR128X:$src)>;
8749   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8750                            VK2WM:$mask),
8751             (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8752   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8753                            VK2WM:$mask),
8754             (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8756   def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8757             (VCVTPD2UDQZ128rm addr:$src)>;
8758   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8759                            VK2WM:$mask),
8760             (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8761   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8762                            VK2WM:$mask),
8763             (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8765   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8766             (VCVTPD2UDQZ128rmb addr:$src)>;
8767   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8768                            (v4i32 VR128X:$src0), VK2WM:$mask),
8769             (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8770   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8771                            v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8772             (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8774   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8775   // patterns have been disabled with null_frag.
8776   def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8777             (VCVTTPD2UDQZ128rr VR128X:$src)>;
8778   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8779                           VK2WM:$mask),
8780             (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8781   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8782                           VK2WM:$mask),
8783             (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8785   def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8786             (VCVTTPD2UDQZ128rm addr:$src)>;
8787   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8788                           VK2WM:$mask),
8789             (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8790   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8791                           VK2WM:$mask),
8792             (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8794   def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8795             (VCVTTPD2UDQZ128rmb addr:$src)>;
8796   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8797                           (v4i32 VR128X:$src0), VK2WM:$mask),
8798             (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8799   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8800                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8801             (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8804 let Predicates = [HasDQI, HasVLX] in {
8805   def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8806             (VCVTPS2QQZ128rm addr:$src)>;
8807   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8808                                  (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8809                                  VR128X:$src0)),
8810             (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8811   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8812                                  (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8813                                  v2i64x_info.ImmAllZerosV)),
8814             (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8816   def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8817             (VCVTPS2UQQZ128rm addr:$src)>;
8818   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8819                                  (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8820                                  VR128X:$src0)),
8821             (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8822   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8823                                  (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8824                                  v2i64x_info.ImmAllZerosV)),
8825             (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8827   def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8828             (VCVTTPS2QQZ128rm addr:$src)>;
8829   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8830                                  (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8831                                  VR128X:$src0)),
8832             (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8833   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8834                                  (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8835                                  v2i64x_info.ImmAllZerosV)),
8836             (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8838   def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8839             (VCVTTPS2UQQZ128rm addr:$src)>;
8840   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8841                                  (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8842                                  VR128X:$src0)),
8843             (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8844   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8845                                  (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8846                                  v2i64x_info.ImmAllZerosV)),
8847             (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8850 let Predicates = [HasVLX] in {
8851   def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8852             (VCVTDQ2PDZ128rm addr:$src)>;
8853   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8854                                  (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8855                                  VR128X:$src0)),
8856             (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8857   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8858                                  (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8859                                  v2f64x_info.ImmAllZerosV)),
8860             (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8862   def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8863             (VCVTUDQ2PDZ128rm addr:$src)>;
8864   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8865                                  (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8866                                  VR128X:$src0)),
8867             (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8868   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8869                                  (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8870                                  v2f64x_info.ImmAllZerosV)),
8871             (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8874 //===----------------------------------------------------------------------===//
8875 // Half precision conversion instructions
8876 //===----------------------------------------------------------------------===//
8878 let Uses = [MXCSR], mayRaiseFPException = 1 in
8879 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8880                            X86MemOperand x86memop, dag ld_dag,
8881                            X86FoldableSchedWrite sched> {
8882   defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8883                             (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8884                             (X86any_cvtph2ps (_src.VT _src.RC:$src)),
8885                             (X86cvtph2ps (_src.VT _src.RC:$src))>,
8886                             T8, PD, Sched<[sched]>;
8887   defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8888                             (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8889                             (X86any_cvtph2ps (_src.VT ld_dag)),
8890                             (X86cvtph2ps (_src.VT ld_dag))>,
8891                             T8, PD, Sched<[sched.Folded]>;
8894 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8895                                X86FoldableSchedWrite sched> {
8896   let Uses = [MXCSR] in
8897   defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8898                              (ins _src.RC:$src), "vcvtph2ps",
8899                              "{sae}, $src", "$src, {sae}",
8900                              (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8901                              T8, PD, EVEX_B, Sched<[sched]>;
8904 let Predicates = [HasAVX512] in
8905   defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
8906                                     (load addr:$src), WriteCvtPH2PSZ>,
8907                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8908                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8910 let Predicates = [HasVLX] in {
8911   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8912                        (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
8913                        EVEX_CD8<32, CD8VH>;
8914   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8915                        (bitconvert (v2i64 (X86vzload64 addr:$src))),
8916                        WriteCvtPH2PS>, EVEX, EVEX_V128,
8917                        EVEX_CD8<32, CD8VH>;
8919   // Pattern match vcvtph2ps of a scalar i64 load.
8920   def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
8921               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8922             (VCVTPH2PSZ128rm addr:$src)>;
8925 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8926                            X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8927 let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8928   def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8929              (ins _src.RC:$src1, i32u8imm:$src2),
8930              "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8931              [(set _dest.RC:$dst,
8932                    (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8933              Sched<[RR]>;
8934   let Constraints = "$src0 = $dst" in
8935   def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8936              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8937              "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8938              [(set _dest.RC:$dst,
8939                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8940                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
8941              Sched<[RR]>, EVEX_K;
8942   def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8943              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8944              "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8945              [(set _dest.RC:$dst,
8946                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8947                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8948              Sched<[RR]>, EVEX_KZ;
8949   let hasSideEffects = 0, mayStore = 1 in {
8950     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8951                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8952                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8953                Sched<[MR]>;
8954     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8955                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8956                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8957                 EVEX_K, Sched<[MR]>;
8958   }
8962 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8963                                SchedWrite Sched> {
8964   let hasSideEffects = 0, Uses = [MXCSR] in {
8965     def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8966               (ins _src.RC:$src1, i32u8imm:$src2),
8967               "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}",
8968               [(set _dest.RC:$dst,
8969                     (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8970               EVEX_B, Sched<[Sched]>;
8971     let Constraints = "$src0 = $dst" in
8972     def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8973               (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8974               "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}",
8975               [(set _dest.RC:$dst,
8976                     (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
8977                                   _dest.RC:$src0, _src.KRCWM:$mask))]>,
8978               EVEX_B, Sched<[Sched]>, EVEX_K;
8979     def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8980               (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8981               "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}",
8982               [(set _dest.RC:$dst,
8983                     (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
8984                                   _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8985               EVEX_B, Sched<[Sched]>, EVEX_KZ;
8989 let Predicates = [HasAVX512] in {
8990   defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8991                                     WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8992                     avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8993                                         EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8995   def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8996             (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8999 let Predicates = [HasVLX] in {
9000   defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9001                                        WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9002                                        EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9003   defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9004                                        WriteCvtPS2PH, WriteCvtPS2PHSt>,
9005                                        EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9007   def : Pat<(store (f64 (extractelt
9008                          (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9009                          (iPTR 0))), addr:$dst),
9010             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9011   def : Pat<(store (i64 (extractelt
9012                          (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9013                          (iPTR 0))), addr:$dst),
9014             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9015   def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9016             (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9019 //  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9020 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9021                               string OpcodeStr, Domain d,
9022                               X86FoldableSchedWrite sched = WriteFComX> {
9023   let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9024   def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9025                   !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9026                   EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9029 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9030   defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9031                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9032   defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9033                                    AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9034   defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9035                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9036   defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9037                                    AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9040 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9041   defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9042                                  "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9043                                  EVEX_CD8<32, CD8VT1>;
9044   defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9045                                   "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9046                                   VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9047   defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9048                                  "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9049                                  EVEX_CD8<32, CD8VT1>;
9050   defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9051                                  "comisd", SSEPackedDouble>, TB, PD, EVEX,
9052                                   VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9053   let isCodeGenOnly = 1 in {
9054     defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9055                           sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9056                           EVEX_CD8<32, CD8VT1>;
9057     defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9058                           sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9059                           VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9061     defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9062                           sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9063                           EVEX_CD8<32, CD8VT1>;
9064     defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9065                           sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX,
9066                           VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9067   }
9070 let Defs = [EFLAGS], Predicates = [HasFP16] in {
9071   defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9072                                 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9073                                 EVEX_CD8<16, CD8VT1>;
9074   defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9075                                 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9076                                 EVEX_CD8<16, CD8VT1>;
9077   defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9078                                 "ucomish", SSEPackedSingle>, T_MAP5, EVEX,
9079                                 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9080   defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9081                                 "comish", SSEPackedSingle>, T_MAP5, EVEX,
9082                                 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9083   let isCodeGenOnly = 1 in {
9084     defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9085                                 sse_load_f16, "ucomish", SSEPackedSingle>,
9086                                 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9088     defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9089                                 sse_load_f16, "comish", SSEPackedSingle>,
9090                                 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9091   }
9094 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9095 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9096                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
9097                          Predicate prd = HasAVX512> {
9098   let Predicates = [prd], ExeDomain = _.ExeDomain in {
9099   defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9100                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9101                            "$src2, $src1", "$src1, $src2",
9102                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9103                            EVEX, VVVV, VEX_LIG, Sched<[sched]>;
9104   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9105                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9106                          "$src2, $src1", "$src1, $src2",
9107                          (OpNode (_.VT _.RC:$src1),
9108                           (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG,
9109                           Sched<[sched.Folded, sched.ReadAfterFold]>;
9113 defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9114                                f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9115                                T_MAP6, PD;
9116 defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9117                                  SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9118                                  EVEX_CD8<16, CD8VT1>, T_MAP6, PD;
9119 let Uses = [MXCSR] in {
9120 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9121                                f32x_info>, EVEX_CD8<32, CD8VT1>,
9122                                T8, PD;
9123 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9124                                f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>,
9125                                T8, PD;
9126 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9127                                  SchedWriteFRsqrt.Scl, f32x_info>,
9128                                  EVEX_CD8<32, CD8VT1>, T8, PD;
9129 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9130                                  SchedWriteFRsqrt.Scl, f64x_info>, REX_W,
9131                                  EVEX_CD8<64, CD8VT1>, T8, PD;
9134 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9135 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9136                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9137   let ExeDomain = _.ExeDomain in {
9138   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9139                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
9140                          (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD,
9141                          Sched<[sched]>;
9142   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9143                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9144                          (OpNode (_.VT
9145                            (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD,
9146                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9147   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9148                           (ins _.ScalarMemOp:$src), OpcodeStr,
9149                           "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9150                           (OpNode (_.VT
9151                             (_.BroadcastLdFrag addr:$src)))>,
9152                           EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9153   }
9156 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9157                                 X86SchedWriteWidths sched> {
9158   let Uses = [MXCSR] in {
9159   defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9160                              v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9161   defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9162                              v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9163   }
9164   let Predicates = [HasFP16] in
9165   defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9166                            v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>;
9168   // Define only if AVX512VL feature is present.
9169   let Predicates = [HasVLX], Uses = [MXCSR] in {
9170     defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9171                                   OpNode, sched.XMM, v4f32x_info>,
9172                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
9173     defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9174                                   OpNode, sched.YMM, v8f32x_info>,
9175                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
9176     defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9177                                   OpNode, sched.XMM, v2f64x_info>,
9178                                   EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>;
9179     defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9180                                   OpNode, sched.YMM, v4f64x_info>,
9181                                   EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>;
9182   }
9183   let Predicates = [HasFP16, HasVLX] in {
9184     defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9185                                 OpNode, sched.XMM, v8f16x_info>,
9186                                 EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>;
9187     defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9188                                 OpNode, sched.YMM, v16f16x_info>,
9189                                 EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>;
9190   }
9193 defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9194 defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9196 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9197 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9198                          SDNode OpNode, SDNode OpNodeSAE,
9199                          X86FoldableSchedWrite sched> {
9200   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9201   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9202                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9203                            "$src2, $src1", "$src1, $src2",
9204                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9205                            Sched<[sched]>, SIMD_EXC;
9207   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9208                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9209                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9210                             (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9211                             EVEX_B, Sched<[sched]>;
9213   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9214                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9215                          "$src2, $src1", "$src1, $src2",
9216                          (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9217                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9218   }
9221 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9222                         SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9223   defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9224                            sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
9225   defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9226                            sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
9229 multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9230                         SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9231   let Predicates = [HasFP16] in
9232   defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode,  OpNodeSAE, sched>,
9233                EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV;
9236 let Predicates = [HasERI] in {
9237   defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
9238                                SchedWriteFRcp.Scl>;
9239   defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
9240                                SchedWriteFRsqrt.Scl>;
9243 defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9244                               SchedWriteFRnd.Scl>,
9245                  avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9246                                   SchedWriteFRnd.Scl>;
9247 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9249 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9250                          SDNode OpNode, X86FoldableSchedWrite sched> {
9251   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9252   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9253                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
9254                          (OpNode (_.VT _.RC:$src))>,
9255                          Sched<[sched]>;
9257   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9258                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9259                          (OpNode (_.VT
9260                              (bitconvert (_.LdFrag addr:$src))))>,
9261                           Sched<[sched.Folded, sched.ReadAfterFold]>;
9263   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9264                          (ins _.ScalarMemOp:$src), OpcodeStr,
9265                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9266                          (OpNode (_.VT
9267                                   (_.BroadcastLdFrag addr:$src)))>,
9268                          EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9269   }
9271 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9272                          SDNode OpNode, X86FoldableSchedWrite sched> {
9273   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9274   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9275                         (ins _.RC:$src), OpcodeStr,
9276                         "{sae}, $src", "$src, {sae}",
9277                         (OpNode (_.VT _.RC:$src))>,
9278                         EVEX_B, Sched<[sched]>;
9281 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9282                        SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9283    defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9284               avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9285               T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9286    defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9287               avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9288               T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9291 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9292                                   SDNode OpNode, X86SchedWriteWidths sched> {
9293   // Define only if AVX512VL feature is present.
9294   let Predicates = [HasVLX] in {
9295     defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9296                                 sched.XMM>,
9297                                 EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>;
9298     defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9299                                 sched.YMM>,
9300                                 EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>;
9301     defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9302                                 sched.XMM>,
9303                                 EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9304     defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9305                                 sched.YMM>,
9306                                 EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9307   }
9310 multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9311                        SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9312   let Predicates = [HasFP16] in
9313   defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9314               avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9315               T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9316   let Predicates = [HasFP16, HasVLX] in {
9317     defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9318                                      EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9319     defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9320                                      EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9321   }
9323 let Predicates = [HasERI] in {
9324  defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
9325                             SchedWriteFRsqrt>, EVEX;
9326  defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
9327                             SchedWriteFRcp>, EVEX;
9328  defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
9329                             SchedWriteFAdd>, EVEX;
9331 defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9332                             SchedWriteFRnd>,
9333                  avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9334                                      SchedWriteFRnd>,
9335                  avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9336                                           SchedWriteFRnd>, EVEX;
9338 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9339                                     X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9340   let ExeDomain = _.ExeDomain in
9341   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9342                          (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9343                          (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9344                          EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9347 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9348                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9349   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9350   defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9351                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
9352                          (_.VT (any_fsqrt _.RC:$src)),
9353                          (_.VT (fsqrt _.RC:$src))>, EVEX,
9354                          Sched<[sched]>;
9355   defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9356                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9357                          (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9358                          (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9359                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9360   defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9361                           (ins _.ScalarMemOp:$src), OpcodeStr,
9362                           "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9363                           (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9364                           (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9365                           EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9366   }
9369 let Uses = [MXCSR], mayRaiseFPException = 1 in
9370 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9371                                   X86SchedWriteSizes sched> {
9372   let Predicates = [HasFP16] in
9373   defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9374                                 sched.PH.ZMM, v32f16_info>,
9375                                 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9376   let Predicates = [HasFP16, HasVLX] in {
9377     defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9378                                      sched.PH.XMM, v8f16x_info>,
9379                                      EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>;
9380     defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9381                                      sched.PH.YMM, v16f16x_info>,
9382                                      EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>;
9383   }
9384   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9385                                 sched.PS.ZMM, v16f32_info>,
9386                                 EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9387   defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9388                                 sched.PD.ZMM, v8f64_info>,
9389                                 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9390   // Define only if AVX512VL feature is present.
9391   let Predicates = [HasVLX] in {
9392     defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9393                                      sched.PS.XMM, v4f32x_info>,
9394                                      EVEX_V128, TB, EVEX_CD8<32, CD8VF>;
9395     defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9396                                      sched.PS.YMM, v8f32x_info>,
9397                                      EVEX_V256, TB, EVEX_CD8<32, CD8VF>;
9398     defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9399                                      sched.PD.XMM, v2f64x_info>,
9400                                      EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9401     defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9402                                      sched.PD.YMM, v4f64x_info>,
9403                                      EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9404   }
9407 let Uses = [MXCSR] in
9408 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9409                                         X86SchedWriteSizes sched> {
9410   let Predicates = [HasFP16] in
9411   defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9412                                       sched.PH.ZMM, v32f16_info>,
9413                                       EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9414   defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9415                                       sched.PS.ZMM, v16f32_info>,
9416                                       EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9417   defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9418                                       sched.PD.ZMM, v8f64_info>,
9419                                       EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9422 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9423                               X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9424   let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9425     defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9426                          (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9427                          "$src2, $src1", "$src1, $src2",
9428                          (X86fsqrts (_.VT _.RC:$src1),
9429                                     (_.VT _.RC:$src2))>,
9430                          Sched<[sched]>, SIMD_EXC;
9431     defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9432                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9433                          "$src2, $src1", "$src1, $src2",
9434                          (X86fsqrts (_.VT _.RC:$src1),
9435                                     (_.ScalarIntMemFrags addr:$src2))>,
9436                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9437     let Uses = [MXCSR] in
9438     defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9439                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9440                          "$rc, $src2, $src1", "$src1, $src2, $rc",
9441                          (X86fsqrtRnds (_.VT _.RC:$src1),
9442                                      (_.VT _.RC:$src2),
9443                                      (i32 timm:$rc))>,
9444                          EVEX_B, EVEX_RC, Sched<[sched]>;
9446     let isCodeGenOnly = 1, hasSideEffects = 0 in {
9447       def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9448                 (ins _.FRC:$src1, _.FRC:$src2),
9449                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9450                 Sched<[sched]>, SIMD_EXC;
9451       let mayLoad = 1 in
9452         def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9453                   (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9454                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9455                   Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9456     }
9457   }
9459   let Predicates = [prd] in {
9460     def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9461               (!cast<Instruction>(Name#Zr)
9462                   (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9463   }
9465   let Predicates = [prd, OptForSize] in {
9466     def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9467               (!cast<Instruction>(Name#Zm)
9468                   (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9469   }
9472 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9473                                   X86SchedWriteSizes sched> {
9474   defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9475                         EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS;
9476   defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9477                         EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS;
9478   defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9479                         EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W;
9482 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9483              avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9485 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9487 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9488                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9489   let ExeDomain = _.ExeDomain in {
9490   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9491                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9492                            "$src3, $src2, $src1", "$src1, $src2, $src3",
9493                            (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9494                            (i32 timm:$src3)))>,
9495                            Sched<[sched]>, SIMD_EXC;
9497   let Uses = [MXCSR] in
9498   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9499                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9500                          "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9501                          (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9502                          (i32 timm:$src3)))>, EVEX_B,
9503                          Sched<[sched]>;
9505   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9506                          (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9507                          OpcodeStr,
9508                          "$src3, $src2, $src1", "$src1, $src2, $src3",
9509                          (_.VT (X86RndScales _.RC:$src1,
9510                                 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9511                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9513   let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9514     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9515                (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9516                OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9517                []>, Sched<[sched]>, SIMD_EXC;
9519     let mayLoad = 1 in
9520       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9521                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9522                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9523                  []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9524   }
9525   }
9527   let Predicates = [HasAVX512] in {
9528     def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9529               (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9530                _.FRC:$src1, timm:$src2))>;
9531   }
9533   let Predicates = [HasAVX512, OptForSize] in {
9534     def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9535               (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9536                addr:$src1, timm:$src2))>;
9537   }
9540 let Predicates = [HasFP16] in
9541 defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9542                                            SchedWriteFRnd.Scl, f16x_info>,
9543                                            AVX512PSIi8Base, TA, EVEX, VVVV,
9544                                            EVEX_CD8<16, CD8VT1>;
9546 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9547                                            SchedWriteFRnd.Scl, f32x_info>,
9548                                            AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9549                                            EVEX_CD8<32, CD8VT1>;
9551 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9552                                            SchedWriteFRnd.Scl, f64x_info>,
9553                                            REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9554                                            EVEX_CD8<64, CD8VT1>;
9556 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9557                                 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9558                                 dag OutMask, Predicate BasePredicate> {
9559   let Predicates = [BasePredicate] in {
9560     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9561                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9562                (extractelt _.VT:$dst, (iPTR 0))))),
9563               (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9564                _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9566     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9567                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9568                ZeroFP))),
9569               (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9570                OutMask, _.VT:$src2, _.VT:$src1)>;
9571   }
9574 defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9575                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9576                             fp16imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasFP16>;
9577 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9578                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9579                             fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9580 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9581                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9582                             fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9585 //-------------------------------------------------
9586 // Integer truncate and extend operations
9587 //-------------------------------------------------
9589 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9590                               SDPatternOperator MaskNode,
9591                               X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9592                               X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9593   let ExeDomain = DestInfo.ExeDomain in {
9594   def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9595              (ins SrcInfo.RC:$src),
9596              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9597              [(set DestInfo.RC:$dst,
9598                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9599              EVEX, Sched<[sched]>;
9600   let Constraints = "$src0 = $dst" in
9601   def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9602              (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9603              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9604              [(set DestInfo.RC:$dst,
9605                    (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9606                              (DestInfo.VT DestInfo.RC:$src0),
9607                              SrcInfo.KRCWM:$mask))]>,
9608              EVEX, EVEX_K, Sched<[sched]>;
9609   def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9610              (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9611              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9612              [(set DestInfo.RC:$dst,
9613                    (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9614                              DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9615              EVEX, EVEX_KZ, Sched<[sched]>;
9616   }
9618   let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9619     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9620                (ins x86memop:$dst, SrcInfo.RC:$src),
9621                OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9622                EVEX, Sched<[sched.Folded]>;
9624     def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9625                (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9626                OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9627                EVEX, EVEX_K, Sched<[sched.Folded]>;
9628   }//mayStore = 1, hasSideEffects = 0
9631 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9632                                     PatFrag truncFrag, PatFrag mtruncFrag,
9633                                     string Name> {
9635   def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9636             (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9637                                     addr:$dst, SrcInfo.RC:$src)>;
9639   def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9640                         SrcInfo.KRCWM:$mask),
9641             (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9642                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9645 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9646                         SDNode OpNode256, SDNode OpNode512,
9647                         SDPatternOperator MaskNode128,
9648                         SDPatternOperator MaskNode256,
9649                         SDPatternOperator MaskNode512,
9650                         X86SchedWriteWidths sched,
9651                         AVX512VLVectorVTInfo VTSrcInfo,
9652                         X86VectorVTInfo DestInfoZ128,
9653                         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9654                         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9655                         X86MemOperand x86memopZ, PatFrag truncFrag,
9656                         PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9658   let Predicates = [HasVLX, prd] in {
9659     defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
9660                              VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9661                 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9662                                          mtruncFrag, NAME>, EVEX_V128;
9664     defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
9665                              VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9666                 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9667                                          mtruncFrag, NAME>, EVEX_V256;
9668   }
9669   let Predicates = [prd] in
9670     defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
9671                              VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9672                 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9673                                          mtruncFrag, NAME>, EVEX_V512;
9676 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9677                            X86SchedWriteWidths sched, PatFrag StoreNode,
9678                            PatFrag MaskedStoreNode, SDNode InVecNode,
9679                            SDPatternOperator InVecMaskNode> {
9680   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9681                           InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9682                           avx512vl_i64_info, v16i8x_info, v16i8x_info,
9683                           v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9684                           MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9687 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9688                            SDPatternOperator MaskNode,
9689                            X86SchedWriteWidths sched, PatFrag StoreNode,
9690                            PatFrag MaskedStoreNode, SDNode InVecNode,
9691                            SDPatternOperator InVecMaskNode> {
9692   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9693                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9694                           avx512vl_i64_info, v8i16x_info, v8i16x_info,
9695                           v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9696                           MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9699 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9700                            SDPatternOperator MaskNode,
9701                            X86SchedWriteWidths sched, PatFrag StoreNode,
9702                            PatFrag MaskedStoreNode, SDNode InVecNode,
9703                            SDPatternOperator InVecMaskNode> {
9704   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9705                           InVecMaskNode, MaskNode, MaskNode, sched,
9706                           avx512vl_i64_info, v4i32x_info, v4i32x_info,
9707                           v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9708                           MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9711 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9712                            SDPatternOperator MaskNode,
9713                            X86SchedWriteWidths sched, PatFrag StoreNode,
9714                            PatFrag MaskedStoreNode, SDNode InVecNode,
9715                            SDPatternOperator InVecMaskNode> {
9716   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9717                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9718                           avx512vl_i32_info, v16i8x_info, v16i8x_info,
9719                           v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9720                           MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9723 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9724                            SDPatternOperator MaskNode,
9725                            X86SchedWriteWidths sched, PatFrag StoreNode,
9726                            PatFrag MaskedStoreNode, SDNode InVecNode,
9727                            SDPatternOperator InVecMaskNode> {
9728   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9729                           InVecMaskNode, MaskNode, MaskNode, sched,
9730                           avx512vl_i32_info, v8i16x_info, v8i16x_info,
9731                           v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9732                           MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9735 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9736                            SDPatternOperator MaskNode,
9737                            X86SchedWriteWidths sched, PatFrag StoreNode,
9738                            PatFrag MaskedStoreNode, SDNode InVecNode,
9739                            SDPatternOperator InVecMaskNode> {
9740   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9741                           InVecMaskNode, MaskNode, MaskNode, sched,
9742                           avx512vl_i16_info, v16i8x_info, v16i8x_info,
9743                           v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9744                           MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9747 defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",
9748                                   SchedWriteVecTruncate, truncstorevi8,
9749                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9750 defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",
9751                                   SchedWriteVecTruncate, truncstore_s_vi8,
9752                                   masked_truncstore_s_vi8, X86vtruncs,
9753                                   X86vmtruncs>;
9754 defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb",
9755                                   SchedWriteVecTruncate, truncstore_us_vi8,
9756                                   masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
9758 defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9759                                   SchedWriteVecTruncate, truncstorevi16,
9760                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9761 defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9762                                   SchedWriteVecTruncate, truncstore_s_vi16,
9763                                   masked_truncstore_s_vi16, X86vtruncs,
9764                                   X86vmtruncs>;
9765 defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9766                                   select_truncus, SchedWriteVecTruncate,
9767                                   truncstore_us_vi16, masked_truncstore_us_vi16,
9768                                   X86vtruncus, X86vmtruncus>;
9770 defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9771                                   SchedWriteVecTruncate, truncstorevi32,
9772                                   masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9773 defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9774                                   SchedWriteVecTruncate, truncstore_s_vi32,
9775                                   masked_truncstore_s_vi32, X86vtruncs,
9776                                   X86vmtruncs>;
9777 defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9778                                   select_truncus, SchedWriteVecTruncate,
9779                                   truncstore_us_vi32, masked_truncstore_us_vi32,
9780                                   X86vtruncus, X86vmtruncus>;
9782 defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9783                                   SchedWriteVecTruncate, truncstorevi8,
9784                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9785 defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9786                                   SchedWriteVecTruncate, truncstore_s_vi8,
9787                                   masked_truncstore_s_vi8, X86vtruncs,
9788                                   X86vmtruncs>;
9789 defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9790                                   select_truncus, SchedWriteVecTruncate,
9791                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9792                                   X86vtruncus, X86vmtruncus>;
9794 defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9795                                   SchedWriteVecTruncate, truncstorevi16,
9796                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9797 defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9798                                   SchedWriteVecTruncate, truncstore_s_vi16,
9799                                   masked_truncstore_s_vi16, X86vtruncs,
9800                                   X86vmtruncs>;
9801 defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9802                                   select_truncus, SchedWriteVecTruncate,
9803                                   truncstore_us_vi16, masked_truncstore_us_vi16,
9804                                   X86vtruncus, X86vmtruncus>;
9806 defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9807                                   SchedWriteVecTruncate, truncstorevi8,
9808                                   masked_truncstorevi8, X86vtrunc,
9809                                   X86vmtrunc>;
9810 defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9811                                   SchedWriteVecTruncate, truncstore_s_vi8,
9812                                   masked_truncstore_s_vi8, X86vtruncs,
9813                                   X86vmtruncs>;
9814 defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9815                                   select_truncus, SchedWriteVecTruncate,
9816                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9817                                   X86vtruncus, X86vmtruncus>;
9819 let Predicates = [HasAVX512, NoVLX] in {
9820 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9821          (v8i16 (EXTRACT_SUBREG
9822                  (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9823                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9824 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9825          (v4i32 (EXTRACT_SUBREG
9826                  (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9827                                            VR256X:$src, sub_ymm)))), sub_xmm))>;
9830 let Predicates = [HasBWI, NoVLX] in {
9831 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9832          (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9833                                             VR256X:$src, sub_ymm))), sub_xmm))>;
9836 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9837 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9838                            X86VectorVTInfo DestInfo,
9839                            X86VectorVTInfo SrcInfo> {
9840   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9841                                  DestInfo.RC:$src0,
9842                                  SrcInfo.KRCWM:$mask)),
9843             (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9844                                                  SrcInfo.KRCWM:$mask,
9845                                                  SrcInfo.RC:$src)>;
9847   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9848                                  DestInfo.ImmAllZerosV,
9849                                  SrcInfo.KRCWM:$mask)),
9850             (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9851                                                   SrcInfo.RC:$src)>;
9854 let Predicates = [HasVLX] in {
9855 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9856 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9857 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9860 let Predicates = [HasAVX512] in {
9861 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9862 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9863 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9865 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9866 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9867 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9869 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9870 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9871 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9874 multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9875               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9876               X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9877   let ExeDomain = DestInfo.ExeDomain in {
9878   defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9879                     (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9880                     (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9881                   EVEX, Sched<[sched]>;
9883   defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9884                   (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9885                   (DestInfo.VT (LdFrag addr:$src))>,
9886                 EVEX, Sched<[sched.Folded]>;
9887   }
9890 multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
9891           SDNode OpNode, SDNode InVecNode, string ExtTy,
9892           X86SchedWriteWidths sched,
9893           PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9894   let Predicates = [HasVLX, HasBWI] in {
9895     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
9896                     v16i8x_info, i64mem, LdFrag, InVecNode>,
9897                      EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG;
9899     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
9900                     v16i8x_info, i128mem, LdFrag, OpNode>,
9901                      EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG;
9902   }
9903   let Predicates = [HasBWI] in {
9904     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
9905                     v32i8x_info, i256mem, LdFrag, OpNode>,
9906                      EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG;
9907   }
9910 multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
9911           SDNode OpNode, SDNode InVecNode, string ExtTy,
9912           X86SchedWriteWidths sched,
9913           PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9914   let Predicates = [HasVLX, HasAVX512] in {
9915     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
9916                    v16i8x_info, i32mem, LdFrag, InVecNode>,
9917                          EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG;
9919     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
9920                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9921                          EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG;
9922   }
9923   let Predicates = [HasAVX512] in {
9924     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
9925                    v16i8x_info, i128mem, LdFrag, OpNode>,
9926                          EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG;
9927   }
9930 multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
9931                               SDNode InVecNode, string ExtTy,
9932                               X86SchedWriteWidths sched,
9933                               PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9934   let Predicates = [HasVLX, HasAVX512] in {
9935     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
9936                    v16i8x_info, i16mem, LdFrag, InVecNode>,
9937                      EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG;
9939     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
9940                    v16i8x_info, i32mem, LdFrag, InVecNode>,
9941                      EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG;
9942   }
9943   let Predicates = [HasAVX512] in {
9944     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
9945                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9946                      EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG;
9947   }
9950 multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
9951          SDNode OpNode, SDNode InVecNode, string ExtTy,
9952          X86SchedWriteWidths sched,
9953          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9954   let Predicates = [HasVLX, HasAVX512] in {
9955     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
9956                    v8i16x_info, i64mem, LdFrag, InVecNode>,
9957                      EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG;
9959     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
9960                    v8i16x_info, i128mem, LdFrag, OpNode>,
9961                      EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG;
9962   }
9963   let Predicates = [HasAVX512] in {
9964     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
9965                    v16i16x_info, i256mem, LdFrag, OpNode>,
9966                      EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG;
9967   }
9970 multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
9971          SDNode OpNode, SDNode InVecNode, string ExtTy,
9972          X86SchedWriteWidths sched,
9973          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9974   let Predicates = [HasVLX, HasAVX512] in {
9975     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
9976                    v8i16x_info, i32mem, LdFrag, InVecNode>,
9977                      EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG;
9979     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
9980                    v8i16x_info, i64mem, LdFrag, InVecNode>,
9981                      EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG;
9982   }
9983   let Predicates = [HasAVX512] in {
9984     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
9985                    v8i16x_info, i128mem, LdFrag, OpNode>,
9986                      EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG;
9987   }
9990 multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
9991          SDNode OpNode, SDNode InVecNode, string ExtTy,
9992          X86SchedWriteWidths sched,
9993          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9995   let Predicates = [HasVLX, HasAVX512] in {
9996     defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
9997                    v4i32x_info, i64mem, LdFrag, InVecNode>,
9998                      EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128;
10000     defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10001                    v4i32x_info, i128mem, LdFrag, OpNode>,
10002                      EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256;
10003   }
10004   let Predicates = [HasAVX512] in {
10005     defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10006                    v8i32x_info, i256mem, LdFrag, OpNode>,
10007                      EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512;
10008   }
10011 defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
10012 defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
10013 defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq",       zext_invec, "z", SchedWriteVecExtend>;
10014 defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
10015 defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
10016 defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
10018 defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
10019 defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
10020 defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq",       sext_invec, "s", SchedWriteVecExtend>;
10021 defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
10022 defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
10023 defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
10026 // Patterns that we also need any extend versions of. aext_vector_inreg
10027 // is currently legalized to zext_vector_inreg.
10028 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10029   // 256-bit patterns
10030   let Predicates = [HasVLX, HasBWI] in {
10031     def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10032               (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10033   }
10035   let Predicates = [HasVLX] in {
10036     def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10037               (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10039     def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10040               (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10041   }
10043   // 512-bit patterns
10044   let Predicates = [HasBWI] in {
10045     def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10046               (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10047   }
10048   let Predicates = [HasAVX512] in {
10049     def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10050               (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10051     def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10052               (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10054     def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10055               (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10057     def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10058               (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10059   }
10062 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10063                                  SDNode InVecOp> :
10064     AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10065   // 128-bit patterns
10066   let Predicates = [HasVLX, HasBWI] in {
10067   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10068             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10069   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10070             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10071   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10072             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10073   }
10074   let Predicates = [HasVLX] in {
10075   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10076             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10077   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10078             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10080   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10081             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10083   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10084             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10085   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10086             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10087   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10088             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10090   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10091             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10092   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10093             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10095   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10096             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10097   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10098             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10099   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10100             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10101   }
10102   let Predicates = [HasVLX] in {
10103   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10104             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10105   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10106             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10107   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10108             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10110   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10111             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10112   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10113             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10115   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10116             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10117   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10118             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10119   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10120             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10121   }
10122   // 512-bit patterns
10123   let Predicates = [HasAVX512] in {
10124   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10125             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10126   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10127             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10128   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10129             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10130   }
10133 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10134 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10136 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10137 // ext+trunc aggressively making it impossible to legalize the DAG to this
10138 // pattern directly.
10139 let Predicates = [HasAVX512, NoBWI] in {
10140 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10141          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10142 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10143          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10146 //===----------------------------------------------------------------------===//
10147 // GATHER - SCATTER Operations
10149 // FIXME: Improve scheduling of gather/scatter instructions.
10150 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10151                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10152   let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10153       ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10154   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10155             (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10156             !strconcat(OpcodeStr#_.Suffix,
10157             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10158             []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10159             Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10162 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10163                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10164   defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10165                                       vy512xmem>, EVEX_V512, REX_W;
10166   defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10167                                       vz512mem>, EVEX_V512, REX_W;
10168 let Predicates = [HasVLX] in {
10169   defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10170                               vx256xmem>, EVEX_V256, REX_W;
10171   defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10172                               vy256xmem>, EVEX_V256, REX_W;
10173   defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10174                               vx128xmem>, EVEX_V128, REX_W;
10175   defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10176                               vx128xmem>, EVEX_V128, REX_W;
10180 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10181                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10182   defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10183                                        EVEX_V512;
10184   defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10185                                        EVEX_V512;
10186 let Predicates = [HasVLX] in {
10187   defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10188                                           vy256xmem>, EVEX_V256;
10189   defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10190                                           vy128xmem>, EVEX_V256;
10191   defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10192                                           vx128xmem>, EVEX_V128;
10193   defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10194                                           vx64xmem, VK2WM>, EVEX_V128;
10199 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10200                avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10202 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10203                 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10205 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10206                           X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10208 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10209     hasSideEffects = 0 in
10211   def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10212             (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10213             !strconcat(OpcodeStr#_.Suffix,
10214             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10215             []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10216             Sched<[WriteStore]>;
10219 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10220                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10221   defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10222                                       vy512xmem>, EVEX_V512, REX_W;
10223   defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10224                                       vz512mem>, EVEX_V512, REX_W;
10225 let Predicates = [HasVLX] in {
10226   defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10227                               vx256xmem>, EVEX_V256, REX_W;
10228   defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10229                               vy256xmem>, EVEX_V256, REX_W;
10230   defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10231                               vx128xmem>, EVEX_V128, REX_W;
10232   defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10233                               vx128xmem>, EVEX_V128, REX_W;
10237 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10238                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10239   defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10240                                        EVEX_V512;
10241   defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10242                                        EVEX_V512;
10243 let Predicates = [HasVLX] in {
10244   defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10245                                           vy256xmem>, EVEX_V256;
10246   defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10247                                           vy128xmem>, EVEX_V256;
10248   defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10249                                           vx128xmem>, EVEX_V128;
10250   defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10251                                           vx64xmem, VK2WM>, EVEX_V128;
10255 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10256                avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10258 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10259                 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10261 // prefetch
10262 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10263                        RegisterClass KRC, X86MemOperand memop> {
10264   let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10265   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10266             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10267             EVEX, EVEX_K, Sched<[WriteLoad]>;
10270 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10271                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10273 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10274                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10276 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10277                      VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10279 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10280                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10282 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10283                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10285 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10286                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10288 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10289                      VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10291 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10292                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10294 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10295                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10297 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10298                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10300 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10301                      VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10303 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10304                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10306 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10307                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10309 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10310                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10312 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10313                      VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10315 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10316                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10318 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10319 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10320                   !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10321                   [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10322                   EVEX, Sched<[Sched]>;
10325 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10326                                  string OpcodeStr, Predicate prd> {
10327 let Predicates = [prd] in
10328   defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10330   let Predicates = [prd, HasVLX] in {
10331     defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10332     defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10333   }
10336 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10337 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W;
10338 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10339 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
10341 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10342     def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10343                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10344                         [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10345                         EVEX, Sched<[WriteMove]>;
10348 // Use 512bit version to implement 128/256 bit in case NoVLX.
10349 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10350                                            X86VectorVTInfo _,
10351                                            string Name> {
10353   def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10354             (_.KVT (COPY_TO_REGCLASS
10355                      (!cast<Instruction>(Name#"Zrr")
10356                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10357                                       _.RC:$src, _.SubRegIdx)),
10358                    _.KRC))>;
10361 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10362                                    AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10363   let Predicates = [prd] in
10364     defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10365                                             EVEX_V512;
10367   let Predicates = [prd, HasVLX] in {
10368     defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10369                                               EVEX_V256;
10370     defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10371                                                EVEX_V128;
10372   }
10373   let Predicates = [prd, NoVLX] in {
10374     defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10375     defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10376   }
10379 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10380                                               avx512vl_i8_info, HasBWI>;
10381 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10382                                               avx512vl_i16_info, HasBWI>, REX_W;
10383 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10384                                               avx512vl_i32_info, HasDQI>;
10385 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10386                                               avx512vl_i64_info, HasDQI>, REX_W;
10388 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10389 // is available, but BWI is not. We can't handle this in lowering because
10390 // a target independent DAG combine likes to combine sext and trunc.
10391 let Predicates = [HasDQI, NoBWI] in {
10392   def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10393             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10394   def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10395             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10398 let Predicates = [HasDQI, NoBWI, HasVLX] in {
10399   def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10400             (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10403 //===----------------------------------------------------------------------===//
10404 // AVX-512 - COMPRESS and EXPAND
10407 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10408                                  string OpcodeStr, X86FoldableSchedWrite sched> {
10409   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10410               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10411               (null_frag)>, AVX5128IBase,
10412               Sched<[sched]>;
10414   let mayStore = 1, hasSideEffects = 0 in
10415   def mr : AVX5128I<opc, MRMDestMem, (outs),
10416               (ins _.MemOp:$dst, _.RC:$src),
10417               OpcodeStr # "\t{$src, $dst|$dst, $src}",
10418               []>, EVEX_CD8<_.EltSize, CD8VT1>,
10419               Sched<[sched.Folded]>;
10421   def mrk : AVX5128I<opc, MRMDestMem, (outs),
10422               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10423               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10424               []>,
10425               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10426               Sched<[sched.Folded]>;
10429 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10430   def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10431             (!cast<Instruction>(Name#_.ZSuffix#mrk)
10432                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10434   def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10435             (!cast<Instruction>(Name#_.ZSuffix#rrk)
10436                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10437   def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10438             (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10439                             _.KRCWM:$mask, _.RC:$src)>;
10442 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10443                                  X86FoldableSchedWrite sched,
10444                                  AVX512VLVectorVTInfo VTInfo,
10445                                  Predicate Pred = HasAVX512> {
10446   let Predicates = [Pred] in
10447   defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10448            compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10450   let Predicates = [Pred, HasVLX] in {
10451     defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10452                 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10453     defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10454                 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10455   }
10458 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10459 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10460                                           avx512vl_i32_info>, EVEX;
10461 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10462                                           avx512vl_i64_info>, EVEX, REX_W;
10463 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10464                                           avx512vl_f32_info>, EVEX;
10465 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10466                                           avx512vl_f64_info>, EVEX, REX_W;
10468 // expand
10469 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10470                                  string OpcodeStr, X86FoldableSchedWrite sched> {
10471   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10472               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10473               (null_frag)>, AVX5128IBase,
10474               Sched<[sched]>;
10476   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10477               (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10478               (null_frag)>,
10479             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10480             Sched<[sched.Folded, sched.ReadAfterFold]>;
10483 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10485   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10486             (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10487                                         _.KRCWM:$mask, addr:$src)>;
10489   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10490             (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10491                                         _.KRCWM:$mask, addr:$src)>;
10493   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10494                                                (_.VT _.RC:$src0))),
10495             (!cast<Instruction>(Name#_.ZSuffix#rmk)
10496                             _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10498   def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10499             (!cast<Instruction>(Name#_.ZSuffix#rrk)
10500                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10501   def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10502             (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10503                             _.KRCWM:$mask, _.RC:$src)>;
10506 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10507                                X86FoldableSchedWrite sched,
10508                                AVX512VLVectorVTInfo VTInfo,
10509                                Predicate Pred = HasAVX512> {
10510   let Predicates = [Pred] in
10511   defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10512            expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10514   let Predicates = [Pred, HasVLX] in {
10515     defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10516                 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10517     defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10518                 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10519   }
10522 // FIXME: Is there a better scheduler class for VPEXPAND?
10523 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10524                                       avx512vl_i32_info>, EVEX;
10525 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10526                                       avx512vl_i64_info>, EVEX, REX_W;
10527 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10528                                       avx512vl_f32_info>, EVEX;
10529 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10530                                       avx512vl_f64_info>, EVEX, REX_W;
10532 //handle instruction  reg_vec1 = op(reg_vec,imm)
10533 //                               op(mem_vec,imm)
10534 //                               op(broadcast(eltVt),imm)
10535 //all instruction created with FROUND_CURRENT
10536 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10537                                       SDPatternOperator OpNode,
10538                                       SDPatternOperator MaskOpNode,
10539                                       X86FoldableSchedWrite sched,
10540                                       X86VectorVTInfo _> {
10541   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10542   defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10543                       (ins _.RC:$src1, i32u8imm:$src2),
10544                       OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10545                       (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10546                       (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10547                       Sched<[sched]>;
10548   defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10549                     (ins _.MemOp:$src1, i32u8imm:$src2),
10550                     OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10551                     (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10552                             (i32 timm:$src2)),
10553                     (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10554                                 (i32 timm:$src2))>,
10555                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10556   defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10557                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10558                     OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10559                     "${src1}"#_.BroadcastStr#", $src2",
10560                     (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10561                             (i32 timm:$src2)),
10562                     (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10563                                 (i32 timm:$src2))>, EVEX_B,
10564                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10565   }
10568 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10569 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10570                                           SDNode OpNode, X86FoldableSchedWrite sched,
10571                                           X86VectorVTInfo _> {
10572   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10573   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10574                       (ins _.RC:$src1, i32u8imm:$src2),
10575                       OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10576                       "$src1, {sae}, $src2",
10577                       (OpNode (_.VT _.RC:$src1),
10578                               (i32 timm:$src2))>,
10579                       EVEX_B, Sched<[sched]>;
10582 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10583             AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10584             SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10585             Predicate prd>{
10586   let Predicates = [prd] in {
10587     defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10588                                            sched.ZMM, _.info512>,
10589                 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10590                                                sched.ZMM, _.info512>, EVEX_V512;
10591   }
10592   let Predicates = [prd, HasVLX] in {
10593     defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10594                                            sched.XMM, _.info128>, EVEX_V128;
10595     defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10596                                            sched.YMM, _.info256>, EVEX_V256;
10597   }
10600 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10601 //                               op(reg_vec2,mem_vec,imm)
10602 //                               op(reg_vec2,broadcast(eltVt),imm)
10603 //all instruction created with FROUND_CURRENT
10604 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10605                                 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10606   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10607   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10608                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10609                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10610                       (OpNode (_.VT _.RC:$src1),
10611                               (_.VT _.RC:$src2),
10612                               (i32 timm:$src3))>,
10613                       Sched<[sched]>;
10614   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10615                     (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10616                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10617                     (OpNode (_.VT _.RC:$src1),
10618                             (_.VT (bitconvert (_.LdFrag addr:$src2))),
10619                             (i32 timm:$src3))>,
10620                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10621   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10622                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10623                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10624                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
10625                     (OpNode (_.VT _.RC:$src1),
10626                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10627                             (i32 timm:$src3))>, EVEX_B,
10628                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10629   }
10632 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10633 //                               op(reg_vec2,mem_vec,imm)
10634 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10635                               X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10636                               X86VectorVTInfo SrcInfo>{
10637   let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in {
10638   defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10639                   (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10640                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10641                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10642                                (SrcInfo.VT SrcInfo.RC:$src2),
10643                                (i8 timm:$src3)))>,
10644                   Sched<[sched]>;
10645   defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10646                 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10647                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10648                 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10649                              (SrcInfo.VT (bitconvert
10650                                                 (SrcInfo.LdFrag addr:$src2))),
10651                              (i8 timm:$src3)))>,
10652                 Sched<[sched.Folded, sched.ReadAfterFold]>;
10653   }
10656 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10657 //                               op(reg_vec2,mem_vec,imm)
10658 //                               op(reg_vec2,broadcast(eltVt),imm)
10659 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10660                            X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10661   avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10663   let ExeDomain = _.ExeDomain, ImmT = Imm8 in
10664   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10665                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10666                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10667                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
10668                     (OpNode (_.VT _.RC:$src1),
10669                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10670                             (i8 timm:$src3))>, EVEX_B,
10671                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10674 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10675 //                                      op(reg_vec2,mem_scalar,imm)
10676 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10677                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10678   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10679   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10680                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10681                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10682                       (OpNode (_.VT _.RC:$src1),
10683                               (_.VT _.RC:$src2),
10684                               (i32 timm:$src3))>,
10685                       Sched<[sched]>;
10686   defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10687                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10688                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10689                     (OpNode (_.VT _.RC:$src1),
10690                             (_.ScalarIntMemFrags addr:$src2),
10691                             (i32 timm:$src3))>,
10692                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10693   }
10696 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10697 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10698                                     SDNode OpNode, X86FoldableSchedWrite sched,
10699                                     X86VectorVTInfo _> {
10700   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10701   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10702                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10703                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10704                       "$src1, $src2, {sae}, $src3",
10705                       (OpNode (_.VT _.RC:$src1),
10706                               (_.VT _.RC:$src2),
10707                               (i32 timm:$src3))>,
10708                       EVEX_B, Sched<[sched]>;
10711 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10712 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10713                                     X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10714   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10715   defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10716                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10717                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10718                       "$src1, $src2, {sae}, $src3",
10719                       (OpNode (_.VT _.RC:$src1),
10720                               (_.VT _.RC:$src2),
10721                               (i32 timm:$src3))>,
10722                       EVEX_B, Sched<[sched]>;
10725 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10726             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10727             SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10728   let Predicates = [prd] in {
10729     defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10730                 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10731                                   EVEX_V512;
10733   }
10734   let Predicates = [prd, HasVLX] in {
10735     defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10736                                   EVEX_V128;
10737     defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10738                                   EVEX_V256;
10739   }
10742 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10743                    X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10744                    AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10745   let Predicates = [Pred] in {
10746     defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10747                            SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV;
10748   }
10749   let Predicates = [Pred, HasVLX] in {
10750     defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10751                            SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV;
10752     defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10753                            SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV;
10754   }
10757 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10758                                   bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10759                                   Predicate Pred = HasAVX512> {
10760   let Predicates = [Pred] in {
10761     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10762                                 EVEX_V512;
10763   }
10764   let Predicates = [Pred, HasVLX] in {
10765     defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10766                                 EVEX_V128;
10767     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10768                                 EVEX_V256;
10769   }
10772 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10773                   X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10774                   SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10775   let Predicates = [prd] in {
10776      defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10777               avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10778   }
10781 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10782                     bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10783                     SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
10784                     X86SchedWriteWidths sched, Predicate prd>{
10785   defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
10786                             opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
10787                             AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
10788   defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10789                             opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10790                             AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
10791   defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10792                             opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10793                             AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W;
10796 defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10797                               X86VReduce, X86VReduce, X86VReduceSAE,
10798                               SchedWriteFRnd, HasDQI>;
10799 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10800                               X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10801                               SchedWriteFRnd, HasAVX512>;
10802 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10803                               X86VGetMant, X86VGetMant, X86VGetMantSAE,
10804                               SchedWriteFRnd, HasAVX512>;
10806 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10807                                                 0x50, X86VRange, X86VRangeSAE,
10808                                                 SchedWriteFAdd, HasDQI>,
10809       AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10810 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10811                                                 0x50, X86VRange, X86VRangeSAE,
10812                                                 SchedWriteFAdd, HasDQI>,
10813       AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10815 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10816       f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10817       AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10818 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10819       0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10820       AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10822 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10823       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10824       AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10825 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10826       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10827       AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10828 defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
10829       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
10830       AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10832 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10833       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10834       AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10835 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10836       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10837       AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10838 defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
10839       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
10840       AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10842 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10843                                           X86FoldableSchedWrite sched,
10844                                           X86VectorVTInfo _,
10845                                           X86VectorVTInfo CastInfo> {
10846   let ExeDomain = _.ExeDomain in {
10847   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10848                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10849                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10850                   (_.VT (bitconvert
10851                          (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10852                                                   (i8 timm:$src3)))))>,
10853                   Sched<[sched]>;
10854   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10855                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10856                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10857                 (_.VT
10858                  (bitconvert
10859                   (CastInfo.VT (X86Shuf128 _.RC:$src1,
10860                                            (CastInfo.LdFrag addr:$src2),
10861                                            (i8 timm:$src3)))))>,
10862                 Sched<[sched.Folded, sched.ReadAfterFold]>;
10863   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10864                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10865                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10866                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
10867                     (_.VT
10868                      (bitconvert
10869                       (CastInfo.VT
10870                        (X86Shuf128 _.RC:$src1,
10871                                    (_.BroadcastLdFrag addr:$src2),
10872                                    (i8 timm:$src3)))))>, EVEX_B,
10873                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10874   }
10877 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10878                                    AVX512VLVectorVTInfo _,
10879                                    AVX512VLVectorVTInfo CastInfo, bits<8> opc>{
10880   let Predicates = [HasAVX512] in
10881   defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10882                                           _.info512, CastInfo.info512>, EVEX_V512;
10884   let Predicates = [HasAVX512, HasVLX] in
10885   defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10886                                              _.info256, CastInfo.info256>, EVEX_V256;
10889 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10890       avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10891 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10892       avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10893 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10894       avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10895 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10896       avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10898 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10899                          X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10900   let ExeDomain = _.ExeDomain in {
10901   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10902                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10903                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10904                   (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10905                   Sched<[sched]>;
10906   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10907                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10908                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10909                 (_.VT (X86VAlign _.RC:$src1,
10910                                  (bitconvert (_.LdFrag addr:$src2)),
10911                                  (i8 timm:$src3)))>,
10912                 Sched<[sched.Folded, sched.ReadAfterFold]>;
10914   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10915                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10916                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10917                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10918                    (X86VAlign _.RC:$src1,
10919                               (_.VT (_.BroadcastLdFrag addr:$src2)),
10920                               (i8 timm:$src3))>, EVEX_B,
10921                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10922   }
10925 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10926                                 AVX512VLVectorVTInfo _> {
10927   let Predicates = [HasAVX512] in {
10928     defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10929                                 AVX512AIi8Base, EVEX, VVVV, EVEX_V512;
10930   }
10931   let Predicates = [HasAVX512, HasVLX] in {
10932     defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10933                                 AVX512AIi8Base, EVEX, VVVV, EVEX_V128;
10934     // We can't really override the 256-bit version so change it back to unset.
10935     defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10936                                 AVX512AIi8Base, EVEX, VVVV, EVEX_V256;
10937   }
10940 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10941                                    avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10942 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10943                                    avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10944                                    REX_W;
10946 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10947                                          SchedWriteShuffle, avx512vl_i8_info,
10948                                          avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10950 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10951 // into vpalignr.
10952 def ValignqImm32XForm : SDNodeXForm<timm, [{
10953   return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10954 }]>;
10955 def ValignqImm8XForm : SDNodeXForm<timm, [{
10956   return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10957 }]>;
10958 def ValigndImm8XForm : SDNodeXForm<timm, [{
10959   return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10960 }]>;
10962 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10963                                         X86VectorVTInfo From, X86VectorVTInfo To,
10964                                         SDNodeXForm ImmXForm> {
10965   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10966                                  (bitconvert
10967                                   (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10968                                                    timm:$src3))),
10969                                  To.RC:$src0)),
10970             (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10971                                                   To.RC:$src1, To.RC:$src2,
10972                                                   (ImmXForm timm:$src3))>;
10974   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10975                                  (bitconvert
10976                                   (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10977                                                    timm:$src3))),
10978                                  To.ImmAllZerosV)),
10979             (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10980                                                    To.RC:$src1, To.RC:$src2,
10981                                                    (ImmXForm timm:$src3))>;
10983   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10984                                  (bitconvert
10985                                   (From.VT (OpNode From.RC:$src1,
10986                                                    (From.LdFrag addr:$src2),
10987                                            timm:$src3))),
10988                                  To.RC:$src0)),
10989             (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10990                                                   To.RC:$src1, addr:$src2,
10991                                                   (ImmXForm timm:$src3))>;
10993   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10994                                  (bitconvert
10995                                   (From.VT (OpNode From.RC:$src1,
10996                                                    (From.LdFrag addr:$src2),
10997                                            timm:$src3))),
10998                                  To.ImmAllZerosV)),
10999             (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11000                                                    To.RC:$src1, addr:$src2,
11001                                                    (ImmXForm timm:$src3))>;
11004 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11005                                            X86VectorVTInfo From,
11006                                            X86VectorVTInfo To,
11007                                            SDNodeXForm ImmXForm> :
11008       avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11009   def : Pat<(From.VT (OpNode From.RC:$src1,
11010                              (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11011                              timm:$src3)),
11012             (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11013                                                   (ImmXForm timm:$src3))>;
11015   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11016                                  (bitconvert
11017                                   (From.VT (OpNode From.RC:$src1,
11018                                            (bitconvert
11019                                             (To.VT (To.BroadcastLdFrag addr:$src2))),
11020                                            timm:$src3))),
11021                                  To.RC:$src0)),
11022             (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11023                                                    To.RC:$src1, addr:$src2,
11024                                                    (ImmXForm timm:$src3))>;
11026   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11027                                  (bitconvert
11028                                   (From.VT (OpNode From.RC:$src1,
11029                                            (bitconvert
11030                                             (To.VT (To.BroadcastLdFrag addr:$src2))),
11031                                            timm:$src3))),
11032                                  To.ImmAllZerosV)),
11033             (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11034                                                     To.RC:$src1, addr:$src2,
11035                                                     (ImmXForm timm:$src3))>;
11038 let Predicates = [HasAVX512] in {
11039   // For 512-bit we lower to the widest element type we can. So we only need
11040   // to handle converting valignq to valignd.
11041   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11042                                          v16i32_info, ValignqImm32XForm>;
11045 let Predicates = [HasVLX] in {
11046   // For 128-bit we lower to the widest element type we can. So we only need
11047   // to handle converting valignq to valignd.
11048   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11049                                          v4i32x_info, ValignqImm32XForm>;
11050   // For 256-bit we lower to the widest element type we can. So we only need
11051   // to handle converting valignq to valignd.
11052   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11053                                          v8i32x_info, ValignqImm32XForm>;
11056 let Predicates = [HasVLX, HasBWI] in {
11057   // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11058   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11059                                       v16i8x_info, ValignqImm8XForm>;
11060   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11061                                       v16i8x_info, ValigndImm8XForm>;
11064 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11065                 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11066                 EVEX_CD8<8, CD8VF>;
11068 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11069                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11070   let ExeDomain = _.ExeDomain in {
11071   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11072                     (ins _.RC:$src1), OpcodeStr,
11073                     "$src1", "$src1",
11074                     (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11075                     Sched<[sched]>;
11077   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11078                   (ins _.MemOp:$src1), OpcodeStr,
11079                   "$src1", "$src1",
11080                   (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11081             EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11082             Sched<[sched.Folded]>;
11083   }
11086 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11087                             X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11088            avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11089   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11090                   (ins _.ScalarMemOp:$src1), OpcodeStr,
11091                   "${src1}"#_.BroadcastStr,
11092                   "${src1}"#_.BroadcastStr,
11093                   (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11094              EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11095              Sched<[sched.Folded]>;
11098 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11099                               X86SchedWriteWidths sched,
11100                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11101   let Predicates = [prd] in
11102     defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11103                              EVEX_V512;
11105   let Predicates = [prd, HasVLX] in {
11106     defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11107                               EVEX_V256;
11108     defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11109                               EVEX_V128;
11110   }
11113 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11114                                X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11115                                Predicate prd> {
11116   let Predicates = [prd] in
11117     defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11118                               EVEX_V512;
11120   let Predicates = [prd, HasVLX] in {
11121     defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11122                                  EVEX_V256;
11123     defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11124                                  EVEX_V128;
11125   }
11128 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11129                                  SDNode OpNode, X86SchedWriteWidths sched,
11130                                  Predicate prd> {
11131   defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11132                                avx512vl_i64_info, prd>, REX_W;
11133   defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11134                                avx512vl_i32_info, prd>;
11137 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11138                                  SDNode OpNode, X86SchedWriteWidths sched,
11139                                  Predicate prd> {
11140   defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11141                               avx512vl_i16_info, prd>, WIG;
11142   defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11143                               avx512vl_i8_info, prd>, WIG;
11146 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11147                                   bits<8> opc_d, bits<8> opc_q,
11148                                   string OpcodeStr, SDNode OpNode,
11149                                   X86SchedWriteWidths sched> {
11150   defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11151                                     HasAVX512>,
11152               avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11153                                     HasBWI>;
11156 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11157                                     SchedWriteVecALU>;
11159 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11160 let Predicates = [HasAVX512, NoVLX] in {
11161   def : Pat<(v4i64 (abs VR256X:$src)),
11162             (EXTRACT_SUBREG
11163                 (VPABSQZrr
11164                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11165              sub_ymm)>;
11166   def : Pat<(v2i64 (abs VR128X:$src)),
11167             (EXTRACT_SUBREG
11168                 (VPABSQZrr
11169                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11170              sub_xmm)>;
11173 // Use 512bit version to implement 128/256 bit.
11174 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11175                                  AVX512VLVectorVTInfo _, Predicate prd> {
11176   let Predicates = [prd, NoVLX] in {
11177     def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11178               (EXTRACT_SUBREG
11179                 (!cast<Instruction>(InstrStr # "Zrr")
11180                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11181                                  _.info256.RC:$src1,
11182                                  _.info256.SubRegIdx)),
11183               _.info256.SubRegIdx)>;
11185     def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11186               (EXTRACT_SUBREG
11187                 (!cast<Instruction>(InstrStr # "Zrr")
11188                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11189                                  _.info128.RC:$src1,
11190                                  _.info128.SubRegIdx)),
11191               _.info128.SubRegIdx)>;
11192   }
11195 defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11196                                         SchedWriteVecIMul, HasCDI>;
11198 // FIXME: Is there a better scheduler class for VPCONFLICT?
11199 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11200                                         SchedWriteVecALU, HasCDI>;
11202 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11203 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11204 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11206 //===---------------------------------------------------------------------===//
11207 // Counts number of ones - VPOPCNTD and VPOPCNTQ
11208 //===---------------------------------------------------------------------===//
11210 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11211 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11212                                      SchedWriteVecALU, HasVPOPCNTDQ>;
11214 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11215 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11217 //===---------------------------------------------------------------------===//
11218 // Replicate Single FP - MOVSHDUP and MOVSLDUP
11219 //===---------------------------------------------------------------------===//
11221 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11222                             X86SchedWriteWidths sched> {
11223   defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11224                                       avx512vl_f32_info, HasAVX512>, TB, XS;
11227 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11228                                   SchedWriteFShuffle>;
11229 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11230                                   SchedWriteFShuffle>;
11232 //===----------------------------------------------------------------------===//
11233 // AVX-512 - MOVDDUP
11234 //===----------------------------------------------------------------------===//
11236 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11237                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11238   let ExeDomain = _.ExeDomain in {
11239   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11240                    (ins _.RC:$src), OpcodeStr, "$src", "$src",
11241                    (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11242                    Sched<[sched]>;
11243   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11244                  (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11245                  (_.VT (_.BroadcastLdFrag addr:$src))>,
11246                  EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11247                  Sched<[sched.Folded]>;
11248   }
11251 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11252                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11253   defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11254                            VTInfo.info512>, EVEX_V512;
11256   let Predicates = [HasAVX512, HasVLX] in {
11257     defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11258                                 VTInfo.info256>, EVEX_V256;
11259     defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11260                                    VTInfo.info128>, EVEX_V128;
11261   }
11264 multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11265                           X86SchedWriteWidths sched> {
11266   defm NAME:      avx512_movddup_common<opc, OpcodeStr, sched,
11267                                         avx512vl_f64_info>, TB, XD, REX_W;
11270 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11272 let Predicates = [HasVLX] in {
11273 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11274           (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11276 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11277                         (v2f64 VR128X:$src0)),
11278           (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11279                            (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11280 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11281                         immAllZerosV),
11282           (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11285 //===----------------------------------------------------------------------===//
11286 // AVX-512 - Unpack Instructions
11287 //===----------------------------------------------------------------------===//
11289 let Uses = []<Register>, mayRaiseFPException = 0 in {
11290 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11291                                  SchedWriteFShuffleSizes, 0, 1>;
11292 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11293                                  SchedWriteFShuffleSizes>;
11296 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11297                                        SchedWriteShuffle, HasBWI>;
11298 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11299                                        SchedWriteShuffle, HasBWI>;
11300 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11301                                        SchedWriteShuffle, HasBWI>;
11302 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11303                                        SchedWriteShuffle, HasBWI>;
11305 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11306                                        SchedWriteShuffle, HasAVX512>;
11307 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11308                                        SchedWriteShuffle, HasAVX512>;
11309 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11310                                         SchedWriteShuffle, HasAVX512>;
11311 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11312                                         SchedWriteShuffle, HasAVX512>;
11314 //===----------------------------------------------------------------------===//
11315 // AVX-512 - Extract & Insert Integer Instructions
11316 //===----------------------------------------------------------------------===//
11318 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11319                                                             X86VectorVTInfo _> {
11320   def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11321               (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11322               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11323               [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11324                        addr:$dst)]>,
11325               EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11328 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11329   let Predicates = [HasBWI] in {
11330     def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11331                   (ins _.RC:$src1, u8imm:$src2),
11332                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11333                   [(set GR32orGR64:$dst,
11334                         (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11335                   EVEX, TA, PD, Sched<[WriteVecExtract]>;
11337     defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD;
11338   }
11341 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11342   let Predicates = [HasBWI] in {
11343     def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11344                   (ins _.RC:$src1, u8imm:$src2),
11345                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11346                   [(set GR32orGR64:$dst,
11347                         (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11348                   EVEX, TB, PD, Sched<[WriteVecExtract]>;
11350     let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11351     def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11352                    (ins _.RC:$src1, u8imm:$src2),
11353                    OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11354                    EVEX, TA, PD, Sched<[WriteVecExtract]>;
11356     defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD;
11357   }
11360 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11361                                                             RegisterClass GRC> {
11362   let Predicates = [HasDQI] in {
11363     def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11364                   (ins _.RC:$src1, u8imm:$src2),
11365                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11366                   [(set GRC:$dst,
11367                       (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11368                   EVEX, TA, PD, Sched<[WriteVecExtract]>;
11370     def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11371                 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11372                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11373                 [(store (extractelt (_.VT _.RC:$src1),
11374                                     imm:$src2),addr:$dst)]>,
11375                 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD,
11376                 Sched<[WriteVecExtractSt]>;
11377   }
11380 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG;
11381 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG;
11382 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11383 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W;
11385 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11386                                             X86VectorVTInfo _, PatFrag LdFrag,
11387                                             SDPatternOperator immoperator> {
11388   def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11389       (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11390       OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11391       [(set _.RC:$dst,
11392           (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11393       EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11396 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11397                                             X86VectorVTInfo _, PatFrag LdFrag> {
11398   let Predicates = [HasBWI] in {
11399     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11400         (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11401         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11402         [(set _.RC:$dst,
11403             (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV,
11404         Sched<[WriteVecInsert]>;
11406     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11407   }
11410 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11411                                          X86VectorVTInfo _, RegisterClass GRC> {
11412   let Predicates = [HasDQI] in {
11413     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11414         (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11415         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11416         [(set _.RC:$dst,
11417             (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11418         EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>;
11420     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11421                                     _.ScalarLdFrag, imm>, TA, PD;
11422   }
11425 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11426                                      extloadi8>, TA, PD, WIG;
11427 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11428                                      extloadi16>, TB, PD, WIG;
11429 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11430 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W;
11432 let Predicates = [HasAVX512, NoBWI] in {
11433   def : Pat<(X86pinsrb VR128:$src1,
11434                        (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11435                        timm:$src3),
11436             (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11437                        timm:$src3)>;
11440 let Predicates = [HasBWI] in {
11441   def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
11442             (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
11443                         GR8:$src2, sub_8bit), timm:$src3)>;
11444   def : Pat<(X86pinsrb VR128:$src1,
11445                        (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11446                        timm:$src3),
11447             (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11448                         timm:$src3)>;
11451 // Always select FP16 instructions if available.
11452 let Predicates = [HasBWI], AddedComplexity = -10 in {
11453   def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
11454   def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
11455   def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
11456   def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
11459 //===----------------------------------------------------------------------===//
11460 // VSHUFPS - VSHUFPD Operations
11461 //===----------------------------------------------------------------------===//
11463 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11464   defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11465                                     SchedWriteFShuffle>,
11466                                     EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11467                                     TA, EVEX, VVVV;
11470 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB;
11471 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W;
11473 //===----------------------------------------------------------------------===//
11474 // AVX-512 - Byte shift Left/Right
11475 //===----------------------------------------------------------------------===//
11477 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11478                                Format MRMm, string OpcodeStr,
11479                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11480   def ri : AVX512<opc, MRMr,
11481              (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11482              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11483              [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11484              Sched<[sched]>;
11485   def mi : AVX512<opc, MRMm,
11486            (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11487            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11488            [(set _.RC:$dst,(_.VT (OpNode
11489                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
11490                                  (i8 timm:$src2))))]>,
11491            Sched<[sched.Folded, sched.ReadAfterFold]>;
11494 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11495                                    Format MRMm, string OpcodeStr,
11496                                    X86SchedWriteWidths sched, Predicate prd>{
11497   let Predicates = [prd] in
11498     defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11499                                  sched.ZMM, v64i8_info>, EVEX_V512;
11500   let Predicates = [prd, HasVLX] in {
11501     defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11502                                     sched.YMM, v32i8x_info>, EVEX_V256;
11503     defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11504                                     sched.XMM, v16i8x_info>, EVEX_V128;
11505   }
11507 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11508                                        SchedWriteShuffle, HasBWI>,
11509                                        AVX512PDIi8Base, EVEX, VVVV, WIG;
11510 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11511                                        SchedWriteShuffle, HasBWI>,
11512                                        AVX512PDIi8Base, EVEX, VVVV, WIG;
11514 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11515                                 string OpcodeStr, X86FoldableSchedWrite sched,
11516                                 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11517   let isCommutable = 1 in
11518   def rr : AVX512BI<opc, MRMSrcReg,
11519              (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11520              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11521              [(set _dst.RC:$dst,(_dst.VT
11522                                 (OpNode (_src.VT _src.RC:$src1),
11523                                         (_src.VT _src.RC:$src2))))]>,
11524              Sched<[sched]>;
11525   def rm : AVX512BI<opc, MRMSrcMem,
11526            (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11527            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11528            [(set _dst.RC:$dst,(_dst.VT
11529                               (OpNode (_src.VT _src.RC:$src1),
11530                               (_src.VT (bitconvert
11531                                         (_src.LdFrag addr:$src2))))))]>,
11532            Sched<[sched.Folded, sched.ReadAfterFold]>;
11535 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11536                                     string OpcodeStr, X86SchedWriteWidths sched,
11537                                     Predicate prd> {
11538   let Predicates = [prd] in
11539     defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11540                                   v8i64_info, v64i8_info>, EVEX_V512;
11541   let Predicates = [prd, HasVLX] in {
11542     defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11543                                      v4i64x_info, v32i8x_info>, EVEX_V256;
11544     defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11545                                      v2i64x_info, v16i8x_info>, EVEX_V128;
11546   }
11549 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11550                                         SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG;
11552 // Transforms to swizzle an immediate to enable better matching when
11553 // memory operand isn't in the right place.
11554 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11555   // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11556   uint8_t Imm = N->getZExtValue();
11557   // Swap bits 1/4 and 3/6.
11558   uint8_t NewImm = Imm & 0xa5;
11559   if (Imm & 0x02) NewImm |= 0x10;
11560   if (Imm & 0x10) NewImm |= 0x02;
11561   if (Imm & 0x08) NewImm |= 0x40;
11562   if (Imm & 0x40) NewImm |= 0x08;
11563   return getI8Imm(NewImm, SDLoc(N));
11564 }]>;
11565 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11566   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11567   uint8_t Imm = N->getZExtValue();
11568   // Swap bits 2/4 and 3/5.
11569   uint8_t NewImm = Imm & 0xc3;
11570   if (Imm & 0x04) NewImm |= 0x10;
11571   if (Imm & 0x10) NewImm |= 0x04;
11572   if (Imm & 0x08) NewImm |= 0x20;
11573   if (Imm & 0x20) NewImm |= 0x08;
11574   return getI8Imm(NewImm, SDLoc(N));
11575 }]>;
11576 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11577   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11578   uint8_t Imm = N->getZExtValue();
11579   // Swap bits 1/2 and 5/6.
11580   uint8_t NewImm = Imm & 0x99;
11581   if (Imm & 0x02) NewImm |= 0x04;
11582   if (Imm & 0x04) NewImm |= 0x02;
11583   if (Imm & 0x20) NewImm |= 0x40;
11584   if (Imm & 0x40) NewImm |= 0x20;
11585   return getI8Imm(NewImm, SDLoc(N));
11586 }]>;
11587 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11588   // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11589   uint8_t Imm = N->getZExtValue();
11590   // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11591   uint8_t NewImm = Imm & 0x81;
11592   if (Imm & 0x02) NewImm |= 0x04;
11593   if (Imm & 0x04) NewImm |= 0x10;
11594   if (Imm & 0x08) NewImm |= 0x40;
11595   if (Imm & 0x10) NewImm |= 0x02;
11596   if (Imm & 0x20) NewImm |= 0x08;
11597   if (Imm & 0x40) NewImm |= 0x20;
11598   return getI8Imm(NewImm, SDLoc(N));
11599 }]>;
11600 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11601   // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11602   uint8_t Imm = N->getZExtValue();
11603   // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11604   uint8_t NewImm = Imm & 0x81;
11605   if (Imm & 0x02) NewImm |= 0x10;
11606   if (Imm & 0x04) NewImm |= 0x02;
11607   if (Imm & 0x08) NewImm |= 0x20;
11608   if (Imm & 0x10) NewImm |= 0x04;
11609   if (Imm & 0x20) NewImm |= 0x40;
11610   if (Imm & 0x40) NewImm |= 0x08;
11611   return getI8Imm(NewImm, SDLoc(N));
11612 }]>;
11614 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11615                           X86FoldableSchedWrite sched, X86VectorVTInfo _,
11616                           string Name>{
11617   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11618   defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11619                       (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11620                       OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11621                       (OpNode (_.VT _.RC:$src1),
11622                               (_.VT _.RC:$src2),
11623                               (_.VT _.RC:$src3),
11624                               (i8 timm:$src4)), 1, 1>,
11625                       AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
11626   defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11627                     (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11628                     OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11629                     (OpNode (_.VT _.RC:$src1),
11630                             (_.VT _.RC:$src2),
11631                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
11632                             (i8 timm:$src4)), 1, 0>,
11633                     AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11634                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11635   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11636                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11637                     OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11638                     "$src2, ${src3}"#_.BroadcastStr#", $src4",
11639                     (OpNode (_.VT _.RC:$src1),
11640                             (_.VT _.RC:$src2),
11641                             (_.VT (_.BroadcastLdFrag addr:$src3)),
11642                             (i8 timm:$src4)), 1, 0>, EVEX_B,
11643                     AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11644                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11645   }// Constraints = "$src1 = $dst"
11647   // Additional patterns for matching passthru operand in other positions.
11648   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11649                    (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11650                    _.RC:$src1)),
11651             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11652              _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11653   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11654                    (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11655                    _.RC:$src1)),
11656             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11657              _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11659   // Additional patterns for matching zero masking with loads in other
11660   // positions.
11661   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11662                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11663                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11664                    _.ImmAllZerosV)),
11665             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11666              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11667   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11668                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11669                     _.RC:$src2, (i8 timm:$src4)),
11670                    _.ImmAllZerosV)),
11671             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11672              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11674   // Additional patterns for matching masked loads with different
11675   // operand orders.
11676   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11677                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11678                     _.RC:$src2, (i8 timm:$src4)),
11679                    _.RC:$src1)),
11680             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11681              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11682   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11683                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11684                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11685                    _.RC:$src1)),
11686             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11687              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11688   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11689                    (OpNode _.RC:$src2, _.RC:$src1,
11690                     (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11691                    _.RC:$src1)),
11692             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11693              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11694   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11695                    (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11696                     _.RC:$src1, (i8 timm:$src4)),
11697                    _.RC:$src1)),
11698             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11699              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11700   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11701                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11702                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11703                    _.RC:$src1)),
11704             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11705              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11707   // Additional patterns for matching zero masking with broadcasts in other
11708   // positions.
11709   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11710                    (OpNode (_.BroadcastLdFrag addr:$src3),
11711                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11712                    _.ImmAllZerosV)),
11713             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11714              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11715              (VPTERNLOG321_imm8 timm:$src4))>;
11716   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11717                    (OpNode _.RC:$src1,
11718                     (_.BroadcastLdFrag addr:$src3),
11719                     _.RC:$src2, (i8 timm:$src4)),
11720                    _.ImmAllZerosV)),
11721             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11722              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11723              (VPTERNLOG132_imm8 timm:$src4))>;
11725   // Additional patterns for matching masked broadcasts with different
11726   // operand orders.
11727   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11728                    (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11729                     _.RC:$src2, (i8 timm:$src4)),
11730                    _.RC:$src1)),
11731             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11732              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11733   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11734                    (OpNode (_.BroadcastLdFrag addr:$src3),
11735                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11736                    _.RC:$src1)),
11737             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11738              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11739   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11740                    (OpNode _.RC:$src2, _.RC:$src1,
11741                     (_.BroadcastLdFrag addr:$src3),
11742                     (i8 timm:$src4)), _.RC:$src1)),
11743             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11744              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11745   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11746                    (OpNode _.RC:$src2,
11747                     (_.BroadcastLdFrag addr:$src3),
11748                     _.RC:$src1, (i8 timm:$src4)),
11749                    _.RC:$src1)),
11750             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11751              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11752   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11753                    (OpNode (_.BroadcastLdFrag addr:$src3),
11754                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11755                    _.RC:$src1)),
11756             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11757              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11760 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11761                                  AVX512VLVectorVTInfo _> {
11762   let Predicates = [HasAVX512] in
11763     defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11764                                _.info512, NAME>, EVEX_V512;
11765   let Predicates = [HasAVX512, HasVLX] in {
11766     defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11767                                _.info128, NAME>, EVEX_V128;
11768     defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11769                                _.info256, NAME>, EVEX_V256;
11770   }
11773 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11774                                         avx512vl_i32_info>;
11775 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11776                                         avx512vl_i64_info>, REX_W;
11778 // Patterns to implement vnot using vpternlog instead of creating all ones
11779 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11780 // so that the result is only dependent on src0. But we use the same source
11781 // for all operands to prevent a false dependency.
11782 // TODO: We should maybe have a more generalized algorithm for folding to
11783 // vpternlog.
11784 let Predicates = [HasAVX512] in {
11785   def : Pat<(v64i8 (vnot VR512:$src)),
11786             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11787   def : Pat<(v32i16 (vnot VR512:$src)),
11788             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11789   def : Pat<(v16i32 (vnot VR512:$src)),
11790             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11791   def : Pat<(v8i64 (vnot VR512:$src)),
11792             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11795 let Predicates = [HasAVX512, NoVLX] in {
11796   def : Pat<(v16i8 (vnot VR128X:$src)),
11797             (EXTRACT_SUBREG
11798              (VPTERNLOGQZrri
11799               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11800               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11801               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11802               (i8 15)), sub_xmm)>;
11803   def : Pat<(v8i16 (vnot VR128X:$src)),
11804             (EXTRACT_SUBREG
11805              (VPTERNLOGQZrri
11806               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11807               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11808               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11809               (i8 15)), sub_xmm)>;
11810   def : Pat<(v4i32 (vnot VR128X:$src)),
11811             (EXTRACT_SUBREG
11812              (VPTERNLOGQZrri
11813               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11814               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11815               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11816               (i8 15)), sub_xmm)>;
11817   def : Pat<(v2i64 (vnot VR128X:$src)),
11818             (EXTRACT_SUBREG
11819              (VPTERNLOGQZrri
11820               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11821               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11822               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11823               (i8 15)), sub_xmm)>;
11825   def : Pat<(v32i8 (vnot VR256X:$src)),
11826             (EXTRACT_SUBREG
11827              (VPTERNLOGQZrri
11828               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11829               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11830               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11831               (i8 15)), sub_ymm)>;
11832   def : Pat<(v16i16 (vnot VR256X:$src)),
11833             (EXTRACT_SUBREG
11834              (VPTERNLOGQZrri
11835               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11836               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11837               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11838               (i8 15)), sub_ymm)>;
11839   def : Pat<(v8i32 (vnot VR256X:$src)),
11840             (EXTRACT_SUBREG
11841              (VPTERNLOGQZrri
11842               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11843               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11844               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11845               (i8 15)), sub_ymm)>;
11846   def : Pat<(v4i64 (vnot VR256X:$src)),
11847             (EXTRACT_SUBREG
11848              (VPTERNLOGQZrri
11849               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11850               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11851               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11852               (i8 15)), sub_ymm)>;
11855 let Predicates = [HasVLX] in {
11856   def : Pat<(v16i8 (vnot VR128X:$src)),
11857             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11858   def : Pat<(v8i16 (vnot VR128X:$src)),
11859             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11860   def : Pat<(v4i32 (vnot VR128X:$src)),
11861             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11862   def : Pat<(v2i64 (vnot VR128X:$src)),
11863             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11865   def : Pat<(v32i8 (vnot VR256X:$src)),
11866             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11867   def : Pat<(v16i16 (vnot VR256X:$src)),
11868             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11869   def : Pat<(v8i32 (vnot VR256X:$src)),
11870             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11871   def : Pat<(v4i64 (vnot VR256X:$src)),
11872             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11875 //===----------------------------------------------------------------------===//
11876 // AVX-512 - FixupImm
11877 //===----------------------------------------------------------------------===//
11879 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11880                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
11881                                   X86VectorVTInfo TblVT>{
11882   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11883       Uses = [MXCSR], mayRaiseFPException = 1 in {
11884     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11885                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11886                          OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11887                         (X86VFixupimm (_.VT _.RC:$src1),
11888                                       (_.VT _.RC:$src2),
11889                                       (TblVT.VT _.RC:$src3),
11890                                       (i32 timm:$src4))>, Sched<[sched]>;
11891     defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11892                       (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11893                       OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11894                       (X86VFixupimm (_.VT _.RC:$src1),
11895                                     (_.VT _.RC:$src2),
11896                                     (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11897                                     (i32 timm:$src4))>,
11898                       Sched<[sched.Folded, sched.ReadAfterFold]>;
11899     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11900                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11901                     OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11902                     "$src2, ${src3}"#_.BroadcastStr#", $src4",
11903                       (X86VFixupimm (_.VT _.RC:$src1),
11904                                     (_.VT _.RC:$src2),
11905                                     (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11906                                     (i32 timm:$src4))>,
11907                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11908   } // Constraints = "$src1 = $dst"
11911 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11912                                       X86FoldableSchedWrite sched,
11913                                       X86VectorVTInfo _, X86VectorVTInfo TblVT>
11914   : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11915 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
11916   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11917                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11918                       OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11919                       "$src2, $src3, {sae}, $src4",
11920                       (X86VFixupimmSAE (_.VT _.RC:$src1),
11921                                        (_.VT _.RC:$src2),
11922                                        (TblVT.VT _.RC:$src3),
11923                                        (i32 timm:$src4))>,
11924                       EVEX_B, Sched<[sched]>;
11925   }
11928 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11929                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
11930                                   X86VectorVTInfo _src3VT> {
11931   let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11932       ExeDomain = _.ExeDomain in {
11933     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11934                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11935                       OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11936                       (X86VFixupimms (_.VT _.RC:$src1),
11937                                      (_.VT _.RC:$src2),
11938                                      (_src3VT.VT _src3VT.RC:$src3),
11939                                      (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
11940     let Uses = [MXCSR] in
11941     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11942                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11943                       OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11944                       "$src2, $src3, {sae}, $src4",
11945                       (X86VFixupimmSAEs (_.VT _.RC:$src1),
11946                                         (_.VT _.RC:$src2),
11947                                         (_src3VT.VT _src3VT.RC:$src3),
11948                                         (i32 timm:$src4))>,
11949                       EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11950     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11951                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11952                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11953                      (X86VFixupimms (_.VT _.RC:$src1),
11954                                     (_.VT _.RC:$src2),
11955                                     (_src3VT.VT (scalar_to_vector
11956                                               (_src3VT.ScalarLdFrag addr:$src3))),
11957                                     (i32 timm:$src4))>,
11958                      Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
11959   }
11962 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11963                                       AVX512VLVectorVTInfo _Vec,
11964                                       AVX512VLVectorVTInfo _Tbl> {
11965   let Predicates = [HasAVX512] in
11966     defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11967                                 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11968                                 EVEX, VVVV, EVEX_V512;
11969   let Predicates = [HasAVX512, HasVLX] in {
11970     defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11971                             _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11972                             EVEX, VVVV, EVEX_V128;
11973     defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11974                             _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11975                             EVEX, VVVV, EVEX_V256;
11976   }
11979 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11980                                            SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11981                           AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
11982 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11983                                            SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11984                           AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
11985 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11986                          avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11987 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11988                          avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
11990 // Patterns used to select SSE scalar fp arithmetic instructions from
11991 // either:
11993 // (1) a scalar fp operation followed by a blend
11995 // The effect is that the backend no longer emits unnecessary vector
11996 // insert instructions immediately after SSE scalar fp instructions
11997 // like addss or mulss.
11999 // For example, given the following code:
12000 //   __m128 foo(__m128 A, __m128 B) {
12001 //     A[0] += B[0];
12002 //     return A;
12003 //   }
12005 // Previously we generated:
12006 //   addss %xmm0, %xmm1
12007 //   movss %xmm1, %xmm0
12009 // We now generate:
12010 //   addss %xmm1, %xmm0
12012 // (2) a vector packed single/double fp operation followed by a vector insert
12014 // The effect is that the backend converts the packed fp instruction
12015 // followed by a vector insert into a single SSE scalar fp instruction.
12017 // For example, given the following code:
12018 //   __m128 foo(__m128 A, __m128 B) {
12019 //     __m128 C = A + B;
12020 //     return (__m128) {c[0], a[1], a[2], a[3]};
12021 //   }
12023 // Previously we generated:
12024 //   addps %xmm0, %xmm1
12025 //   movss %xmm1, %xmm0
12027 // We now generate:
12028 //   addss %xmm1, %xmm0
12030 // TODO: Some canonicalization in lowering would simplify the number of
12031 // patterns we have to try to match.
12032 multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12033                                           string OpcPrefix, SDNode MoveNode,
12034                                           X86VectorVTInfo _, PatLeaf ZeroFP> {
12035   let Predicates = [HasAVX512] in {
12036     // extracted scalar math op with insert via movss
12037     def : Pat<(MoveNode
12038                (_.VT VR128X:$dst),
12039                (_.VT (scalar_to_vector
12040                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12041                           _.FRC:$src)))),
12042               (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12043                (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12044     def : Pat<(MoveNode
12045                (_.VT VR128X:$dst),
12046                (_.VT (scalar_to_vector
12047                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12048                           (_.ScalarLdFrag addr:$src))))),
12049               (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12051     // extracted masked scalar math op with insert via movss
12052     def : Pat<(MoveNode (_.VT VR128X:$src1),
12053                (scalar_to_vector
12054                 (X86selects_mask VK1WM:$mask,
12055                             (MaskedOp (_.EltVT
12056                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12057                                       _.FRC:$src2),
12058                             _.FRC:$src0))),
12059               (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12060                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12061                VK1WM:$mask, _.VT:$src1,
12062                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12063     def : Pat<(MoveNode (_.VT VR128X:$src1),
12064                (scalar_to_vector
12065                 (X86selects_mask VK1WM:$mask,
12066                             (MaskedOp (_.EltVT
12067                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12068                                       (_.ScalarLdFrag addr:$src2)),
12069                             _.FRC:$src0))),
12070               (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12071                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12072                VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12074     // extracted masked scalar math op with insert via movss
12075     def : Pat<(MoveNode (_.VT VR128X:$src1),
12076                (scalar_to_vector
12077                 (X86selects_mask VK1WM:$mask,
12078                             (MaskedOp (_.EltVT
12079                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12080                                       _.FRC:$src2), (_.EltVT ZeroFP)))),
12081       (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12082           VK1WM:$mask, _.VT:$src1,
12083           (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12084     def : Pat<(MoveNode (_.VT VR128X:$src1),
12085                (scalar_to_vector
12086                 (X86selects_mask VK1WM:$mask,
12087                             (MaskedOp (_.EltVT
12088                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12089                                       (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12090       (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12091   }
12094 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12095 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12096 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12097 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12099 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12100 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12101 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12102 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12104 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12105 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12106 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12107 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12109 multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12110                                              SDNode Move, X86VectorVTInfo _> {
12111   let Predicates = [HasAVX512] in {
12112     def : Pat<(_.VT (Move _.VT:$dst,
12113                      (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12114               (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12115   }
12118 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12119 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12120 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12122 //===----------------------------------------------------------------------===//
12123 // AES instructions
12124 //===----------------------------------------------------------------------===//
12126 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12127   let Predicates = [HasVLX, HasVAES] in {
12128     defm Z128 : AESI_binop_rm_int<Op, OpStr,
12129                                   !cast<Intrinsic>(IntPrefix),
12130                                   loadv2i64, 0, VR128X, i128mem>,
12131                   EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
12132     defm Z256 : AESI_binop_rm_int<Op, OpStr,
12133                                   !cast<Intrinsic>(IntPrefix#"_256"),
12134                                   loadv4i64, 0, VR256X, i256mem>,
12135                   EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
12136     }
12137     let Predicates = [HasAVX512, HasVAES] in
12138     defm Z    : AESI_binop_rm_int<Op, OpStr,
12139                                   !cast<Intrinsic>(IntPrefix#"_512"),
12140                                   loadv8i64, 0, VR512, i512mem>,
12141                   EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
12144 defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12145 defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12146 defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12147 defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12149 //===----------------------------------------------------------------------===//
12150 // PCLMUL instructions - Carry less multiplication
12151 //===----------------------------------------------------------------------===//
12153 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12154 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12155                               EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
12157 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12158 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12159                               EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
12161 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12162                                 int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256,
12163                                 EVEX_CD8<64, CD8VF>, WIG;
12166 // Aliases
12167 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12168 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12169 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12171 //===----------------------------------------------------------------------===//
12172 // VBMI2
12173 //===----------------------------------------------------------------------===//
12175 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12176                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12177   let Constraints = "$src1 = $dst",
12178       ExeDomain   = VTI.ExeDomain in {
12179     defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12180                 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12181                 "$src3, $src2", "$src2, $src3",
12182                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12183                 T8, PD, EVEX, VVVV, Sched<[sched]>;
12184     defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12185                 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12186                 "$src3, $src2", "$src2, $src3",
12187                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12188                         (VTI.VT (VTI.LdFrag addr:$src3))))>,
12189                 T8, PD, EVEX, VVVV,
12190                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12191   }
12194 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12195                                X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12196          : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12197   let Constraints = "$src1 = $dst",
12198       ExeDomain   = VTI.ExeDomain in
12199   defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12200               (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12201               "${src3}"#VTI.BroadcastStr#", $src2",
12202               "$src2, ${src3}"#VTI.BroadcastStr,
12203               (OpNode VTI.RC:$src1, VTI.RC:$src2,
12204                (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12205               T8, PD, EVEX, VVVV, EVEX_B,
12206               Sched<[sched.Folded, sched.ReadAfterFold]>;
12209 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12210                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12211   let Predicates = [HasVBMI2] in
12212   defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12213                                    EVEX_V512;
12214   let Predicates = [HasVBMI2, HasVLX] in {
12215     defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12216                                    EVEX_V256;
12217     defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12218                                    EVEX_V128;
12219   }
12222 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12223                                       X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12224   let Predicates = [HasVBMI2] in
12225   defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12226                                     EVEX_V512;
12227   let Predicates = [HasVBMI2, HasVLX] in {
12228     defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12229                                     EVEX_V256;
12230     defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12231                                     EVEX_V128;
12232   }
12234 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12235                            SDNode OpNode, X86SchedWriteWidths sched> {
12236   defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12237              avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
12238   defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12239              avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12240   defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12241              avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
12244 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12245                            SDNode OpNode, X86SchedWriteWidths sched> {
12246   defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12247              avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12248              REX_W, EVEX_CD8<16, CD8VF>;
12249   defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12250              OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
12251   defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12252              sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
12255 // Concat & Shift
12256 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12257 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12258 defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12259 defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12261 // Compress
12262 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12263                                          avx512vl_i8_info, HasVBMI2>, EVEX;
12264 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12265                                           avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12266 // Expand
12267 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12268                                       avx512vl_i8_info, HasVBMI2>, EVEX;
12269 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12270                                       avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12272 //===----------------------------------------------------------------------===//
12273 // VNNI
12274 //===----------------------------------------------------------------------===//
12276 let Constraints = "$src1 = $dst" in
12277 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12278                     X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12279                     bit IsCommutable> {
12280   let ExeDomain = VTI.ExeDomain in {
12281   defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12282                                    (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12283                                    "$src3, $src2", "$src2, $src3",
12284                                    (VTI.VT (OpNode VTI.RC:$src1,
12285                                             VTI.RC:$src2, VTI.RC:$src3)),
12286                                    IsCommutable, IsCommutable>,
12287                                    EVEX, VVVV, T8, PD, Sched<[sched]>;
12288   defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12289                                    (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12290                                    "$src3, $src2", "$src2, $src3",
12291                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12292                                             (VTI.VT (VTI.LdFrag addr:$src3))))>,
12293                                    EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8, PD,
12294                                    Sched<[sched.Folded, sched.ReadAfterFold,
12295                                           sched.ReadAfterFold]>;
12296   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12297                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12298                                    OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12299                                    "$src2, ${src3}"#VTI.BroadcastStr,
12300                                    (OpNode VTI.RC:$src1, VTI.RC:$src2,
12301                                     (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12302                                    EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B,
12303                                    T8, PD, Sched<[sched.Folded, sched.ReadAfterFold,
12304                                                 sched.ReadAfterFold]>;
12305   }
12308 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12309                        X86SchedWriteWidths sched, bit IsCommutable> {
12310   let Predicates = [HasVNNI] in
12311   defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12312                            IsCommutable>, EVEX_V512;
12313   let Predicates = [HasVNNI, HasVLX] in {
12314     defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12315                            IsCommutable>, EVEX_V256;
12316     defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12317                            IsCommutable>, EVEX_V128;
12318   }
12321 // FIXME: Is there a better scheduler class for VPDP?
12322 defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12323 defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12324 defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12325 defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12327 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
12328 let Predicates = [HasVNNI] in {
12329   def : Pat<(v16i32 (add VR512:$src1,
12330                          (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12331             (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12332   def : Pat<(v16i32 (add VR512:$src1,
12333                          (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12334             (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12336 let Predicates = [HasVNNI,HasVLX] in {
12337   def : Pat<(v8i32 (add VR256X:$src1,
12338                         (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12339             (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12340   def : Pat<(v8i32 (add VR256X:$src1,
12341                         (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12342             (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12343   def : Pat<(v4i32 (add VR128X:$src1,
12344                         (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12345             (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12346   def : Pat<(v4i32 (add VR128X:$src1,
12347                         (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12348             (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12351 //===----------------------------------------------------------------------===//
12352 // Bit Algorithms
12353 //===----------------------------------------------------------------------===//
12355 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12356 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12357                                    avx512vl_i8_info, HasBITALG>;
12358 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12359                                    avx512vl_i16_info, HasBITALG>, REX_W;
12361 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12362 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12364 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12365   defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12366                                 (ins VTI.RC:$src1, VTI.RC:$src2),
12367                                 "vpshufbitqmb",
12368                                 "$src2, $src1", "$src1, $src2",
12369                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12370                                 (VTI.VT VTI.RC:$src2)),
12371                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12372                                 (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD,
12373                                 Sched<[sched]>;
12374   defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12375                                 (ins VTI.RC:$src1, VTI.MemOp:$src2),
12376                                 "vpshufbitqmb",
12377                                 "$src2, $src1", "$src1, $src2",
12378                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12379                                 (VTI.VT (VTI.LdFrag addr:$src2))),
12380                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12381                                 (VTI.VT (VTI.LdFrag addr:$src2)))>,
12382                                 EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD,
12383                                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12386 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12387   let Predicates = [HasBITALG] in
12388   defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12389   let Predicates = [HasBITALG, HasVLX] in {
12390     defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12391     defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12392   }
12395 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12396 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12398 //===----------------------------------------------------------------------===//
12399 // GFNI
12400 //===----------------------------------------------------------------------===//
12402 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12403                                    X86SchedWriteWidths sched> {
12404   let Predicates = [HasGFNI, HasAVX512] in
12405   defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12406                                 EVEX_V512;
12407   let Predicates = [HasGFNI, HasVLX] in {
12408     defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12409                                 EVEX_V256;
12410     defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12411                                 EVEX_V128;
12412   }
12415 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12416                                           SchedWriteVecALU>,
12417                                           EVEX_CD8<8, CD8VF>, T8;
12419 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12420                                       X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12421                                       X86VectorVTInfo BcstVTI>
12422            : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12423   let ExeDomain = VTI.ExeDomain in
12424   defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12425                 (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3),
12426                 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12427                 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12428                 (OpNode (VTI.VT VTI.RC:$src1),
12429                  (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12430                  (i8 timm:$src3))>, EVEX_B,
12431                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12434 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12435                                      X86SchedWriteWidths sched> {
12436   let Predicates = [HasGFNI, HasAVX512] in
12437   defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12438                                            v64i8_info, v8i64_info>, EVEX_V512;
12439   let Predicates = [HasGFNI, HasVLX] in {
12440     defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12441                                            v32i8x_info, v4i64x_info>, EVEX_V256;
12442     defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12443                                            v16i8x_info, v2i64x_info>, EVEX_V128;
12444   }
12447 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12448                          X86GF2P8affineinvqb, SchedWriteVecIMul>,
12449                          EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12450 defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12451                          X86GF2P8affineqb, SchedWriteVecIMul>,
12452                          EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12455 //===----------------------------------------------------------------------===//
12456 // AVX5124FMAPS
12457 //===----------------------------------------------------------------------===//
12459 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12460     Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12461 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12462                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12463                     "v4fmaddps", "$src3, $src2", "$src2, $src3",
12464                     []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12465                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12467 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12468                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12469                      "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12470                      []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12471                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12473 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12474                     (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12475                     "v4fmaddss", "$src3, $src2", "$src2, $src3",
12476                     []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12477                     Sched<[SchedWriteFMA.Scl.Folded]>;
12479 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12480                      (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12481                      "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12482                      []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12483                      Sched<[SchedWriteFMA.Scl.Folded]>;
12486 //===----------------------------------------------------------------------===//
12487 // AVX5124VNNIW
12488 //===----------------------------------------------------------------------===//
12490 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12491     Constraints = "$src1 = $dst" in {
12492 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12493                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12494                      "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12495                     []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12496                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12498 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12499                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12500                      "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12501                      []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12502                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12505 let hasSideEffects = 0 in {
12506   let mayStore = 1, SchedRW = [WriteFStoreX] in
12507   def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12508   let mayLoad = 1, SchedRW = [WriteFLoadX] in
12509   def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12512 //===----------------------------------------------------------------------===//
12513 // VP2INTERSECT
12514 //===----------------------------------------------------------------------===//
12516 multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12517   def rr : I<0x68, MRMSrcReg,
12518                   (outs _.KRPC:$dst),
12519                   (ins _.RC:$src1, _.RC:$src2),
12520                   !strconcat("vp2intersect", _.Suffix,
12521                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12522                   [(set _.KRPC:$dst, (X86vp2intersect
12523                             _.RC:$src1, (_.VT _.RC:$src2)))]>,
12524                   EVEX, VVVV, T8, XD, Sched<[sched]>;
12526   def rm : I<0x68, MRMSrcMem,
12527                   (outs _.KRPC:$dst),
12528                   (ins  _.RC:$src1, _.MemOp:$src2),
12529                   !strconcat("vp2intersect", _.Suffix,
12530                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12531                   [(set _.KRPC:$dst, (X86vp2intersect
12532                             _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12533                   EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>,
12534                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12536   def rmb : I<0x68, MRMSrcMem,
12537                   (outs _.KRPC:$dst),
12538                   (ins _.RC:$src1, _.ScalarMemOp:$src2),
12539                   !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12540                              ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12541                   [(set _.KRPC:$dst, (X86vp2intersect
12542                              _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12543                   EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12544                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12547 multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12548   let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12549     defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12551   let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12552     defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12553     defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12554   }
12557 let ExeDomain = SSEPackedInt in {
12558 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12559 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
12562 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12563                              X86SchedWriteWidths sched,
12564                              AVX512VLVectorVTInfo _SrcVTInfo,
12565                              AVX512VLVectorVTInfo _DstVTInfo,
12566                              SDNode OpNode, Predicate prd,
12567                              bit IsCommutable = 0> {
12568   let Predicates = [prd] in
12569     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12570                                    _SrcVTInfo.info512, _DstVTInfo.info512,
12571                                    _SrcVTInfo.info512, IsCommutable>,
12572                                    EVEX_V512, EVEX_CD8<32, CD8VF>;
12573   let Predicates = [HasVLX, prd] in {
12574     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12575                                       _SrcVTInfo.info256, _DstVTInfo.info256,
12576                                       _SrcVTInfo.info256, IsCommutable>,
12577                                      EVEX_V256, EVEX_CD8<32, CD8VF>;
12578     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12579                                       _SrcVTInfo.info128, _DstVTInfo.info128,
12580                                       _SrcVTInfo.info128, IsCommutable>,
12581                                       EVEX_V128, EVEX_CD8<32, CD8VF>;
12582   }
12585 let ExeDomain = SSEPackedSingle in
12586 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12587                                         SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12588                                         avx512vl_f32_info, avx512vl_bf16_info,
12589                                         X86cvtne2ps2bf16, HasBF16, 0>, T8, XD;
12591 // Truncate Float to BFloat16
12592 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12593                              X86SchedWriteWidths sched> {
12594   let ExeDomain = SSEPackedSingle in {
12595   let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12596     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info,
12597                             X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12598   }
12599   let Predicates = [HasBF16, HasVLX] in {
12600     let Uses = []<Register>, mayRaiseFPException = 0 in {
12601     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info,
12602                                null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12603                                VK4WM>, EVEX_V128;
12604     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info,
12605                                X86cvtneps2bf16, X86cvtneps2bf16,
12606                                sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12607     }
12608   } // Predicates = [HasBF16, HasVLX]
12609   } // ExeDomain = SSEPackedSingle
12611   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12612                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12613                   VR128X:$src), 0>;
12614   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12615                   (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12616                   f128mem:$src), 0, "intel">;
12617   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12618                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12619                   VR256X:$src), 0>;
12620   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12621                   (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12622                   f256mem:$src), 0, "intel">;
12625 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12626                                        SchedWriteCvtPD2PS>, T8, XS,
12627                                        EVEX_CD8<32, CD8VF>;
12629 let Predicates = [HasBF16, HasVLX] in {
12630   // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12631   // patterns have been disabled with null_frag.
12632   def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12633             (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12634   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0),
12635                               VK4WM:$mask),
12636             (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12637   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV,
12638                               VK4WM:$mask),
12639             (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12641   def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12642             (VCVTNEPS2BF16Z128rm addr:$src)>;
12643   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0),
12644                               VK4WM:$mask),
12645             (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12646   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV,
12647                               VK4WM:$mask),
12648             (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12650   def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32
12651                                      (X86VBroadcastld32 addr:$src)))),
12652             (VCVTNEPS2BF16Z128rmb addr:$src)>;
12653   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12654                               (v8bf16 VR128X:$src0), VK4WM:$mask),
12655             (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12656   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12657                               v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
12658             (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12660   def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
12661             (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12662   def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
12663             (VCVTNEPS2BF16Z128rm addr:$src)>;
12665   def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
12666             (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12667   def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
12668             (VCVTNEPS2BF16Z256rm addr:$src)>;
12670   def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
12671             (VPBROADCASTWZ128rm addr:$src)>;
12672   def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
12673             (VPBROADCASTWZ256rm addr:$src)>;
12675   def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12676             (VPBROADCASTWZ128rr VR128X:$src)>;
12677   def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12678             (VPBROADCASTWZ256rr VR128X:$src)>;
12680   def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))),
12681             (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12682   def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
12683             (VCVTNEPS2BF16Z256rm addr:$src)>;
12685   // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12688 let Predicates = [HasBF16] in {
12689   def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
12690             (VPBROADCASTWZrm addr:$src)>;
12692   def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12693             (VPBROADCASTWZrr VR128X:$src)>;
12695   def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))),
12696             (VCVTNEPS2BF16Zrr VR512:$src)>;
12697   def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))),
12698             (VCVTNEPS2BF16Zrm addr:$src)>;
12699   // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12702 let Constraints = "$src1 = $dst" in {
12703 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12704                               X86FoldableSchedWrite sched,
12705                               X86VectorVTInfo _, X86VectorVTInfo src_v> {
12706   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12707                            (ins src_v.RC:$src2, src_v.RC:$src3),
12708                            OpcodeStr, "$src3, $src2", "$src2, $src3",
12709                            (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12710                            EVEX, VVVV, Sched<[sched]>;
12712   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12713                                (ins src_v.RC:$src2, src_v.MemOp:$src3),
12714                                OpcodeStr, "$src3, $src2", "$src2, $src3",
12715                                (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12716                                (src_v.LdFrag addr:$src3)))>, EVEX, VVVV,
12717                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12719   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12720                   (ins src_v.RC:$src2, f32mem:$src3),
12721                   OpcodeStr,
12722                   !strconcat("${src3}", _.BroadcastStr,", $src2"),
12723                   !strconcat("$src2, ${src3}", _.BroadcastStr),
12724                   (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12725                   (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12726                   EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
12729 } // Constraints = "$src1 = $dst"
12731 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12732                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12733                                  AVX512VLVectorVTInfo src_v, Predicate prd> {
12734   let Predicates = [prd] in {
12735     defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12736                                    src_v.info512>, EVEX_V512;
12737   }
12738   let Predicates = [HasVLX, prd] in {
12739     defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12740                                    src_v.info256>, EVEX_V256;
12741     defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12742                                    src_v.info128>, EVEX_V128;
12743   }
12746 let ExeDomain = SSEPackedSingle in
12747 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12748                                        avx512vl_f32_info, avx512vl_bf16_info,
12749                                        HasBF16>, T8, XS, EVEX_CD8<32, CD8VF>;
12751 //===----------------------------------------------------------------------===//
12752 // AVX512FP16
12753 //===----------------------------------------------------------------------===//
12755 let Predicates = [HasFP16] in {
12756 // Move word ( r/m16) to Packed word
12757 def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
12758                       "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
12759 def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
12760                       "vmovw\t{$src, $dst|$dst, $src}",
12761                       [(set VR128X:$dst,
12762                         (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
12763                       T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
12765 def : Pat<(f16 (bitconvert GR16:$src)),
12766           (f16 (COPY_TO_REGCLASS
12767                 (VMOVW2SHrr
12768                  (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
12769                 FR16X))>;
12770 def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
12771           (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12772 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
12773           (VMOVW2SHrr GR32:$src)>;
12774 // FIXME: We should really find a way to improve these patterns.
12775 def : Pat<(v8i32 (X86vzmovl
12776                   (insert_subvector undef,
12777                                     (v4i32 (scalar_to_vector
12778                                             (and GR32:$src, 0xffff))),
12779                                     (iPTR 0)))),
12780           (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12781 def : Pat<(v16i32 (X86vzmovl
12782                    (insert_subvector undef,
12783                                      (v4i32 (scalar_to_vector
12784                                              (and GR32:$src, 0xffff))),
12785                                      (iPTR 0)))),
12786           (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12788 def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
12789           (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12791 // AVX 128-bit movw instruction write zeros in the high 128-bit part.
12792 def : Pat<(v8i16 (X86vzload16 addr:$src)),
12793           (VMOVWrm addr:$src)>;
12794 def : Pat<(v16i16 (X86vzload16 addr:$src)),
12795           (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12797 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
12798 def : Pat<(v32i16 (X86vzload16 addr:$src)),
12799           (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12801 def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
12802           (VMOVWrm addr:$src)>;
12803 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
12804           (VMOVWrm addr:$src)>;
12805 def : Pat<(v8i32 (X86vzmovl
12806                   (insert_subvector undef,
12807                                     (v4i32 (scalar_to_vector
12808                                             (i32 (zextloadi16 addr:$src)))),
12809                                     (iPTR 0)))),
12810           (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12811 def : Pat<(v16i32 (X86vzmovl
12812                    (insert_subvector undef,
12813                                      (v4i32 (scalar_to_vector
12814                                              (i32 (zextloadi16 addr:$src)))),
12815                                      (iPTR 0)))),
12816           (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12818 // Move word from xmm register to r/m16
12819 def VMOVSH2Wrr  : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
12820                        "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>;
12821 def VMOVWmr  : AVX512<0x7E, MRMDestMem, (outs),
12822                        (ins i16mem:$dst, VR128X:$src),
12823                        "vmovw\t{$src, $dst|$dst, $src}",
12824                        [(store (i16 (extractelt (v8i16 VR128X:$src),
12825                                      (iPTR 0))), addr:$dst)]>,
12826                        T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
12828 def : Pat<(i16 (bitconvert FR16X:$src)),
12829           (i16 (EXTRACT_SUBREG
12830                 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
12831                 sub_16bit))>;
12832 def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
12833           (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
12835 // Allow "vmovw" to use GR64
12836 let hasSideEffects = 0 in {
12837   def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
12838                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
12839   def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
12840                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
12844 // Convert 16-bit float to i16/u16
12845 multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12846                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12847                           AVX512VLVectorVTInfo _Dst,
12848                           AVX512VLVectorVTInfo _Src,
12849                           X86SchedWriteWidths sched> {
12850   let Predicates = [HasFP16] in {
12851     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12852                             OpNode, MaskOpNode, sched.ZMM>,
12853              avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
12854                                OpNodeRnd, sched.ZMM>, EVEX_V512;
12855   }
12856   let Predicates = [HasFP16, HasVLX] in {
12857     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12858                                OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12859     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12860                                OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12861   }
12864 // Convert 16-bit float to i16/u16 truncate
12865 multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12866                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12867                            AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
12868                            X86SchedWriteWidths sched> {
12869   let Predicates = [HasFP16] in {
12870     defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12871                             OpNode, MaskOpNode, sched.ZMM>,
12872              avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
12873                                OpNodeRnd, sched.ZMM>, EVEX_V512;
12874   }
12875   let Predicates = [HasFP16, HasVLX] in {
12876     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12877                                OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12878     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12879                                OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12880   }
12883 defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
12884                                 X86cvtp2UIntRnd, avx512vl_i16_info,
12885                                 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
12886                                 T_MAP5, EVEX_CD8<16, CD8VF>;
12887 defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
12888                                 X86VUintToFpRnd, avx512vl_f16_info,
12889                                 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
12890                                 T_MAP5, XD, EVEX_CD8<16, CD8VF>;
12891 defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
12892                                 X86cvttp2si, X86cvttp2siSAE,
12893                                 avx512vl_i16_info, avx512vl_f16_info,
12894                                 SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>;
12895 defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
12896                                 X86cvttp2ui, X86cvttp2uiSAE,
12897                                 avx512vl_i16_info, avx512vl_f16_info,
12898                                 SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>;
12899 defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
12900                                 X86cvtp2IntRnd, avx512vl_i16_info,
12901                                 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
12902                                 T_MAP5, PD, EVEX_CD8<16, CD8VF>;
12903 defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
12904                                 X86VSintToFpRnd, avx512vl_f16_info,
12905                                 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
12906                                 T_MAP5, XS, EVEX_CD8<16, CD8VF>;
12908 // Convert Half to Signed/Unsigned Doubleword
12909 multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12910                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12911                            X86SchedWriteWidths sched> {
12912   let Predicates = [HasFP16] in {
12913     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
12914                             MaskOpNode, sched.ZMM>,
12915              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
12916                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
12917   }
12918   let Predicates = [HasFP16, HasVLX] in {
12919     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
12920                                MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
12921     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
12922                                MaskOpNode, sched.YMM>, EVEX_V256;
12923   }
12926 // Convert Half to Signed/Unsigned Doubleword with truncation
12927 multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12928                             SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12929                             X86SchedWriteWidths sched> {
12930   let Predicates = [HasFP16] in {
12931     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
12932                             MaskOpNode, sched.ZMM>,
12933              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
12934                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
12935   }
12936   let Predicates = [HasFP16, HasVLX] in {
12937     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
12938                                MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
12939     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
12940                                MaskOpNode, sched.YMM>, EVEX_V256;
12941   }
12945 defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
12946                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
12947                                  EVEX_CD8<16, CD8VH>;
12948 defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
12949                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5,
12950                                  EVEX_CD8<16, CD8VH>;
12952 defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
12953                                 X86cvttp2si, X86cvttp2siSAE,
12954                                 SchedWriteCvtPS2DQ>, T_MAP5, XS,
12955                                 EVEX_CD8<16, CD8VH>;
12957 defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
12958                                  X86cvttp2ui, X86cvttp2uiSAE,
12959                                  SchedWriteCvtPS2DQ>, T_MAP5,
12960                                  EVEX_CD8<16, CD8VH>;
12962 // Convert Half to Signed/Unsigned Quardword
12963 multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12964                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12965                            X86SchedWriteWidths sched> {
12966   let Predicates = [HasFP16] in {
12967     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
12968                             MaskOpNode, sched.ZMM>,
12969              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
12970                                OpNodeRnd, sched.ZMM>, EVEX_V512;
12971   }
12972   let Predicates = [HasFP16, HasVLX] in {
12973     // Explicitly specified broadcast string, since we take only 2 elements
12974     // from v8f16x_info source
12975     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
12976                                MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
12977                                EVEX_V128;
12978     // Explicitly specified broadcast string, since we take only 4 elements
12979     // from v8f16x_info source
12980     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
12981                                MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
12982                                EVEX_V256;
12983   }
12986 // Convert Half to Signed/Unsigned Quardword with truncation
12987 multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12988                             SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12989                             X86SchedWriteWidths sched> {
12990   let Predicates = [HasFP16] in {
12991     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
12992                             MaskOpNode, sched.ZMM>,
12993              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
12994                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
12995   }
12996   let Predicates = [HasFP16, HasVLX] in {
12997     // Explicitly specified broadcast string, since we take only 2 elements
12998     // from v8f16x_info source
12999     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13000                                MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13001     // Explicitly specified broadcast string, since we take only 4 elements
13002     // from v8f16x_info source
13003     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13004                                MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13005   }
13008 defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13009                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13010                                  EVEX_CD8<16, CD8VQ>;
13012 defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13013                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13014                                  EVEX_CD8<16, CD8VQ>;
13016 defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13017                                  X86cvttp2si, X86cvttp2siSAE,
13018                                  SchedWriteCvtPS2DQ>, T_MAP5, PD,
13019                                  EVEX_CD8<16, CD8VQ>;
13021 defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13022                                  X86cvttp2ui, X86cvttp2uiSAE,
13023                                  SchedWriteCvtPS2DQ>, T_MAP5, PD,
13024                                  EVEX_CD8<16, CD8VQ>;
13026 // Convert Signed/Unsigned Quardword to Half
13027 multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13028                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13029                            X86SchedWriteWidths sched> {
13030   // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13031   // 512 memory forms of these instructions in Asm Parcer. They have the same
13032   // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13033   // due to the same reason.
13034   let Predicates = [HasFP16] in {
13035     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13036                             MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13037              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13038                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13039   }
13040   let Predicates = [HasFP16, HasVLX] in {
13041     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13042                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13043                                i128mem, VK2WM>, EVEX_V128;
13044     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13045                                null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13046                                i256mem, VK4WM>, EVEX_V256;
13047   }
13049   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13050                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13051                   VR128X:$src), 0, "att">;
13052   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13053                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13054                   VK2WM:$mask, VR128X:$src), 0, "att">;
13055   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13056                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13057                   VK2WM:$mask, VR128X:$src), 0, "att">;
13058   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13059                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13060                   i64mem:$src), 0, "att">;
13061   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13062                   "$dst {${mask}}, ${src}{1to2}}",
13063                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13064                   VK2WM:$mask, i64mem:$src), 0, "att">;
13065   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13066                   "$dst {${mask}} {z}, ${src}{1to2}}",
13067                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13068                   VK2WM:$mask, i64mem:$src), 0, "att">;
13070   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13071                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13072                   VR256X:$src), 0, "att">;
13073   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13074                   "$dst {${mask}}, $src}",
13075                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13076                   VK4WM:$mask, VR256X:$src), 0, "att">;
13077   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13078                   "$dst {${mask}} {z}, $src}",
13079                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13080                   VK4WM:$mask, VR256X:$src), 0, "att">;
13081   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13082                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13083                   i64mem:$src), 0, "att">;
13084   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13085                   "$dst {${mask}}, ${src}{1to4}}",
13086                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13087                   VK4WM:$mask, i64mem:$src), 0, "att">;
13088   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13089                   "$dst {${mask}} {z}, ${src}{1to4}}",
13090                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13091                   VK4WM:$mask, i64mem:$src), 0, "att">;
13093   def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13094                   (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13095                   VR512:$src), 0, "att">;
13096   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13097                   "$dst {${mask}}, $src}",
13098                   (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13099                   VK8WM:$mask, VR512:$src), 0, "att">;
13100   def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13101                   "$dst {${mask}} {z}, $src}",
13102                   (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13103                   VK8WM:$mask, VR512:$src), 0, "att">;
13104   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13105                   (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13106                   i64mem:$src), 0, "att">;
13107   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13108                   "$dst {${mask}}, ${src}{1to8}}",
13109                   (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13110                   VK8WM:$mask, i64mem:$src), 0, "att">;
13111   def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13112                   "$dst {${mask}} {z}, ${src}{1to8}}",
13113                   (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13114                   VK8WM:$mask, i64mem:$src), 0, "att">;
13117 defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13118                             X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5,
13119                             EVEX_CD8<64, CD8VF>;
13121 defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13122                             X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD,
13123                             EVEX_CD8<64, CD8VF>;
13125 // Convert half to signed/unsigned int 32/64
13126 defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13127                                    X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13128                                    T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13129 defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13130                                    X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13131                                    T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13132 defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13133                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13134                                    T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13135 defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13136                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13137                                    T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13139 defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13140                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13141                         "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13142 defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13143                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13144                         "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13145 defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13146                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13147                         "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13148 defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13149                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13150                         "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13152 let Predicates = [HasFP16] in {
13153   defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13154                                    v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13155                                    T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13156   defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13157                                    v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13158                                    T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13159   defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13160                                     v8f16x_info, i32mem, loadi32,
13161                                     "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13162   defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13163                                     v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13164                                     T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13165   def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13166               (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13168   def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13169               (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13172   def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13173             (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13174   def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13175             (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13177   def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13178             (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13179   def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13180             (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13182   def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13183             (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13184   def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13185             (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13187   def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13188             (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13189   def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13190             (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13192   // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13193   // which produce unnecessary vmovsh instructions
13194   def : Pat<(v8f16 (X86Movsh
13195                      (v8f16 VR128X:$dst),
13196                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13197             (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13199   def : Pat<(v8f16 (X86Movsh
13200                      (v8f16 VR128X:$dst),
13201                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13202             (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13204   def : Pat<(v8f16 (X86Movsh
13205                      (v8f16 VR128X:$dst),
13206                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13207             (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13209   def : Pat<(v8f16 (X86Movsh
13210                      (v8f16 VR128X:$dst),
13211                      (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13212             (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13214   def : Pat<(v8f16 (X86Movsh
13215                      (v8f16 VR128X:$dst),
13216                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13217             (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13219   def : Pat<(v8f16 (X86Movsh
13220                      (v8f16 VR128X:$dst),
13221                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13222             (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13224   def : Pat<(v8f16 (X86Movsh
13225                      (v8f16 VR128X:$dst),
13226                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13227             (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13229   def : Pat<(v8f16 (X86Movsh
13230                      (v8f16 VR128X:$dst),
13231                      (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13232             (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13233 } // Predicates = [HasFP16]
13235 let Predicates = [HasFP16, HasVLX] in {
13236   // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13237   // patterns have been disabled with null_frag.
13238   def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13239             (VCVTQQ2PHZ256rr VR256X:$src)>;
13240   def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13241                            VK4WM:$mask),
13242             (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13243   def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13244                            VK4WM:$mask),
13245             (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13247   def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13248             (VCVTQQ2PHZ256rm addr:$src)>;
13249   def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13250                            VK4WM:$mask),
13251             (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13252   def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13253                            VK4WM:$mask),
13254             (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13256   def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13257             (VCVTQQ2PHZ256rmb addr:$src)>;
13258   def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13259                            (v8f16 VR128X:$src0), VK4WM:$mask),
13260             (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13261   def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13262                            v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13263             (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13265   def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13266             (VCVTQQ2PHZ128rr VR128X:$src)>;
13267   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13268                            VK2WM:$mask),
13269             (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13270   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13271                            VK2WM:$mask),
13272             (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13274   def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13275             (VCVTQQ2PHZ128rm addr:$src)>;
13276   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13277                            VK2WM:$mask),
13278             (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13279   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13280                            VK2WM:$mask),
13281             (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13283   def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13284             (VCVTQQ2PHZ128rmb addr:$src)>;
13285   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13286                            (v8f16 VR128X:$src0), VK2WM:$mask),
13287             (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13288   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13289                            v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13290             (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13292   // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13293   // patterns have been disabled with null_frag.
13294   def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13295             (VCVTUQQ2PHZ256rr VR256X:$src)>;
13296   def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13297                            VK4WM:$mask),
13298             (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13299   def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13300                            VK4WM:$mask),
13301             (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13303   def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13304             (VCVTUQQ2PHZ256rm addr:$src)>;
13305   def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13306                            VK4WM:$mask),
13307             (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13308   def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13309                            VK4WM:$mask),
13310             (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13312   def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13313             (VCVTUQQ2PHZ256rmb addr:$src)>;
13314   def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13315                            (v8f16 VR128X:$src0), VK4WM:$mask),
13316             (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13317   def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13318                            v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13319             (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13321   def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13322             (VCVTUQQ2PHZ128rr VR128X:$src)>;
13323   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13324                            VK2WM:$mask),
13325             (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13326   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13327                            VK2WM:$mask),
13328             (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13330   def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13331             (VCVTUQQ2PHZ128rm addr:$src)>;
13332   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13333                            VK2WM:$mask),
13334             (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13335   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13336                            VK2WM:$mask),
13337             (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13339   def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13340             (VCVTUQQ2PHZ128rmb addr:$src)>;
13341   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13342                            (v8f16 VR128X:$src0), VK2WM:$mask),
13343             (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13344   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13345                            v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13346             (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13349 let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13350   multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13351     defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13352             (ins _.RC:$src2, _.RC:$src3),
13353             OpcodeStr, "$src3, $src2", "$src2, $src3",
13354             (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV;
13356     defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13357             (ins _.RC:$src2, _.MemOp:$src3),
13358             OpcodeStr, "$src3, $src2", "$src2, $src3",
13359             (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV;
13361     defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13362             (ins _.RC:$src2, _.ScalarMemOp:$src3),
13363             OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13364             (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV;
13365   }
13366 } // Constraints = "@earlyclobber $dst, $src1 = $dst"
13368 multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13369                                  X86VectorVTInfo _> {
13370   let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13371   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13372           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13373           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13374           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13375           EVEX, VVVV, EVEX_B, EVEX_RC;
13379 multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13380   let Predicates = [HasFP16] in {
13381     defm Z    : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13382                 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13383                       EVEX_V512, Sched<[WriteFMAZ]>;
13384   }
13385   let Predicates = [HasVLX, HasFP16] in {
13386     defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13387     defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13388   }
13391 multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13392                                  SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13393   let Predicates = [HasFP16] in {
13394     defm Z    : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13395                                  WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13396                 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13397                                        "", "@earlyclobber $dst">, EVEX_V512;
13398   }
13399   let Predicates = [HasVLX, HasFP16] in {
13400     defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13401                                  WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13402     defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13403                                  WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13404   }
13408 let Uses = [MXCSR] in {
13409   defm VFMADDCPH  : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13410                                     T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13411   defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13412                                     T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13414   defm VFMULCPH  : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13415                                          x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13416   defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13417                                          x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13421 multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13422                                    bit IsCommutable> {
13423   let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13424     defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13425                         (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13426                         "$src3, $src2", "$src2, $src3",
13427                         (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13428                         Sched<[WriteFMAX]>;
13429     defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13430                         (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13431                         "$src3, $src2", "$src2, $src3",
13432                         (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13433                         Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13434     defm rb : AVX512_maskable_3src<opc,  MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13435                         (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13436                         "$rc, $src3, $src2", "$src2, $src3, $rc",
13437                         (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13438                         EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13439   }
13442 multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13443                                      SDNode OpNodeRnd, bit IsCommutable> {
13444   let Predicates = [HasFP16] in {
13445     defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13446                         (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13447                         "$src2, $src1", "$src1, $src2",
13448                         (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13449                         IsCommutable, IsCommutable, IsCommutable,
13450                         X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13451     defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13452                         (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13453                         "$src2, $src1", "$src1, $src2",
13454                         (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13455                         0, 0, 0, X86selects, "@earlyclobber $dst">,
13456                         Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13457     defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13458                         (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13459                         "$rc, $src2, $src1", "$src1, $src2, $rc",
13460                         (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13461                         0, 0, 0, X86selects, "@earlyclobber $dst">,
13462                         EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13463   }
13466 let Uses = [MXCSR] in {
13467   defm VFMADDCSHZ  : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13468                                     T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13469   defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13470                                     T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13472   defm VFMULCSHZ  : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13473                                     T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
13474   defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13475                                     T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;