Add gfx950 mfma instructions to ROCDL dialect (#123361)
[llvm-project.git] / llvm / lib / Target / X86 / X86InstrAMX.td
blob1beaaafb159e30ae96c83d9aae948b40d853ef2a
1 //===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file describes the instructions that make up the Intel AMX instruction
10 // set.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // AMX instructions
17 multiclass AMX_TILE_COMMON<string Suffix, Predicate HasEGPR> {
18 let Predicates = [HasAMXTILE, HasEGPR, In64BitMode] in {
19   let hasSideEffects = 1,
20       Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
21   def LDTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
22                            "ldtilecfg\t$src",
23                            [(int_x86_ldtilecfg addr:$src)]>,
24                          T8, PS;
25   let hasSideEffects = 1 in
26   def STTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
27                            "sttilecfg\t$src",
28                            [(int_x86_sttilecfg addr:$src)]>,
29                          T8, PD;
30   let mayLoad = 1 in
31   def TILELOADD#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
32                            (ins sibmem:$src),
33                            "tileloadd\t{$src, $dst|$dst, $src}", []>,
34                          T8, XD;
35   let mayLoad = 1 in
36   def TILELOADDT1#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
37                              (ins sibmem:$src),
38                              "tileloaddt1\t{$src, $dst|$dst, $src}", []>,
39                            T8, PD;
40   let mayStore = 1 in
41   def TILESTORED#Suffix : I<0x4b, MRMDestMemFSIB, (outs),
42                             (ins sibmem:$dst, TILE:$src),
43                             "tilestored\t{$src, $dst|$dst, $src}", []>,
44                           T8, XS;
48 let SchedRW = [WriteSystem] in {
49   defm "" : AMX_TILE_COMMON<"", NoEGPR>, VEX;
50   defm "" : AMX_TILE_COMMON<"_EVEX", HasEGPR>, EVEX, NoCD8;
52   let Predicates = [HasAMXTILE, In64BitMode] in {
53     let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
54     def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
55                         "tilerelease", [(int_x86_tilerelease)]>, VEX, T8, PS;
56     def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins),
57                      "tilezero\t$dst", []>,
58                      VEX, T8, XD;
60     // Pseduo instruction for RA.
61     let isPseudo = true, mayLoad = 1, hasSideEffects = 1,
62         Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
63     def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>;
64     let isPseudo = true, mayLoad = 1 in
65     def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
66                                                      GR16:$src2,
67                                                      opaquemem:$src3), []>;
68     let isPseudo = true, mayLoad = 1 in
69     def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
70                                                        GR16:$src2,
71                                                        opaquemem:$src3), []>;
72     let isPseudo = true, mayStore = 1 in
73     def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
74                                             GR16:$src2, opaquemem:$src3,
75                                             TILE:$src4), []>;
76     let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
77         canFoldAsLoad = 1, usesCustomInserter = 1 in
78       def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
79                                 [(set TILE:$dst, (int_x86_tilezero_internal
80                                   GR16:$src1, GR16:$src2))]>;
82     let usesCustomInserter = 1 in {
83       // Pseudo instructions, using immediates instead of tile registers.
84       // To be translated to the actual instructions in X86ISelLowering.cpp
85       let mayLoad = 1 in
86       def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
87       let mayLoad = 1 in
88       def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1,
89                                           sibmem:$src2), []>;
90       let mayStore = 1 in
91       def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>;
92       def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
93                               [(int_x86_tilezero timm:$src)]>;
94     }
95   } // Predicates
96 } // SchedRW
98 let Predicates = [HasAMXINT8, In64BitMode] in {
99   let SchedRW = [WriteSystem] in {
100     let Constraints = "$src1 = $dst" in {
101       def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
102                       (ins TILE:$src1, TILE:$src2, TILE:$src3),
103                       "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
104                       VEX, VVVV, T8, XD;
105       def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
106                       (ins TILE:$src1, TILE:$src2, TILE:$src3),
107                       "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
108                       VEX, VVVV, T8, XS;
109       def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
110                       (ins TILE:$src1, TILE:$src2, TILE:$src3),
111                       "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
112                       VEX, VVVV, T8, PD;
113       def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
114                       (ins TILE:$src1, TILE:$src2, TILE:$src3),
115                       "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
116                       VEX, VVVV, T8;
117     }
119     // Pseduo instruction for RA.
120     let isPseudo = true, Constraints = "$src4 = $dst" in {
121       def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
122                               GR16:$src2, GR16:$src3, TILE:$src4,
123                               TILE:$src5, TILE:$src6),
124                               [(set TILE: $dst,
125                               (int_x86_tdpbssd_internal GR16:$src1, GR16:$src2,
126                               GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
127       def PTDPBSUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
128                               GR16:$src2, GR16:$src3, TILE:$src4,
129                               TILE:$src5, TILE:$src6),
130                               [(set TILE: $dst,
131                               (int_x86_tdpbsud_internal GR16:$src1, GR16:$src2,
132                                GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
133       def PTDPBUSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
134                               GR16:$src2, GR16:$src3, TILE:$src4,
135                               TILE:$src5, TILE:$src6),
136                               [(set TILE: $dst,
137                               (int_x86_tdpbusd_internal GR16:$src1, GR16:$src2,
138                               GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
139       def PTDPBUUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
140                               GR16:$src2, GR16:$src3, TILE:$src4,
141                               TILE:$src5, TILE:$src6),
142                               [(set TILE: $dst,
143                               (int_x86_tdpbuud_internal GR16:$src1, GR16:$src2,
144                               GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
145     }
147     let usesCustomInserter = 1 in {
148       // Pseudo instructions, using immediates instead of tile registers.
149       // To be translated to the actual instructions in X86ISelLowering.cpp
150       def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1,
151                              u8imm:$src2, u8imm:$src3),
152                              [(int_x86_tdpbssd timm:$src1,
153                                timm:$src2, timm:$src3)]>;
154       def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1,
155                              u8imm:$src2, u8imm:$src3),
156                              [(int_x86_tdpbsud timm:$src1,
157                                timm:$src2, timm:$src3)]>;
158       def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1,
159                              u8imm:$src2, u8imm:$src3),
160                              [(int_x86_tdpbusd timm:$src1,
161                                timm:$src2, timm:$src3)]>;
162       def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1,
163                              u8imm:$src2, u8imm:$src3),
164                              [(int_x86_tdpbuud timm:$src1,
165                                timm:$src2, timm:$src3)]>;
166     }
167   }
168 } // HasAMXTILE
170 let Predicates = [HasAMXBF16, In64BitMode] in {
171   let SchedRW = [WriteSystem] in {
172     let Constraints = "$src1 = $dst" in
173     def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
174                       (ins TILE:$src1, TILE:$src2, TILE:$src3),
175                       "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
176                       []>, VEX, VVVV, T8, XS;
178     // Pseduo instruction for RA.
179     let isPseudo = true, Constraints = "$src4 = $dst" in
180       def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
181                                  GR16:$src2, GR16:$src3, TILE:$src4,
182                                  TILE:$src5, TILE:$src6),
183                                  [(set TILE: $dst,
184                                   (int_x86_tdpbf16ps_internal GR16:$src1,
185                                    GR16:$src2, GR16:$src3, TILE:$src4,
186                                    TILE:$src5, TILE:$src6))]>;
188     let usesCustomInserter = 1 in {
189       // Pseudo instructions, using immediates instead of tile registers.
190       // To be translated to the actual instructions in X86ISelLowering.cpp
191       def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1,
192                                u8imm:$src2, u8imm:$src3),
193                                [(int_x86_tdpbf16ps timm:$src1,
194                                  timm:$src2, timm:$src3)]>;
195     }
196   }
197 } // HasAMXTILE, HasAMXBF16
199 //AMX-FP16
200 let Predicates = [HasAMXFP16, In64BitMode] in {
201   let SchedRW = [WriteSystem] in {
202     let Constraints = "$src1 = $dst" in {
203       def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
204                         (ins TILE:$src1, TILE:$src2, TILE:$src3),
205                         "tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
206                         []>, VEX, VVVV, T8, XD;
207     }
209     // Pseduo instruction for RA.
210     let isPseudo = true, Constraints = "$src4 = $dst" in {
211       def PTDPFP16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
212                                  GR16:$src2, GR16:$src3, TILE:$src4,
213                                  TILE:$src5, TILE:$src6),
214                                  [(set TILE: $dst,
215                                   (int_x86_tdpfp16ps_internal GR16:$src1,
216                                    GR16:$src2, GR16:$src3, TILE:$src4,
217                                    TILE:$src5, TILE:$src6))]>;
218     }
220     let  usesCustomInserter = 1 in {
221       def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1,
222                                u8imm:$src2, u8imm:$src3),
223                                [(int_x86_tdpfp16ps timm:$src1,
224                                  timm:$src2, timm:$src3)]>;
225     }
226   }
227 } // HasAMXTILE, HasAMXFP16
229 let Predicates = [HasAMXCOMPLEX, In64BitMode] in {
230   let SchedRW = [WriteSystem] in {
231     let Constraints = "$src1 = $dst" in {
232       def TCMMIMFP16PS   : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
233                             (ins TILE:$src1, TILE:$src2, TILE:$src3),
234                             "tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
235                             []>, T8, PD, VEX, VVVV;
236       def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
237                             (ins TILE:$src1, TILE:$src2, TILE:$src3),
238                             "tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
239                             []>, VEX, VVVV, WIG, T8;
241     } // Constraints = "$src1 = $dst"
243     let Constraints = "$src4 = $dst" in {
244       def PTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
245                                   GR16:$src2, GR16:$src3, TILE:$src4,
246                                   TILE:$src5, TILE:$src6),
247                                   [(set TILE: $dst,
248                                   (int_x86_tcmmimfp16ps_internal GR16:$src1, GR16:$src2,
249                                    GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
250       def PTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
251                                   GR16:$src2, GR16:$src3, TILE:$src4,
252                                   TILE:$src5, TILE:$src6),
253                                   [(set TILE: $dst,
254                                   (int_x86_tcmmrlfp16ps_internal GR16:$src1, GR16:$src2,
255                                    GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
256     }
258     let usesCustomInserter = 1 in {
259       def PTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1,
260                                 u8imm:$src2, u8imm:$src3),
261                                 [(int_x86_tcmmimfp16ps timm:$src1,
262                                   timm:$src2, timm:$src3)]>;
263       def PTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1,
264                                 u8imm:$src2, u8imm:$src3),
265                                 [(int_x86_tcmmrlfp16ps timm:$src1,
266                                   timm:$src2, timm:$src3)]>;
267     }
268   } // SchedRW = [WriteSystem]
271 // AMX-FP8
272 let Predicates = [HasAMXFP8, In64BitMode] in {
273   let SchedRW = [WriteSystem] in {
274     let Constraints = "$src1 = $dst" in {
275       class AMX_FP8_BASE<bits<8> Opcode, string Opstr> :
276         I<Opcode, MRMSrcReg4VOp3, (outs TILE:$dst),
277           (ins TILE:$src1, TILE:$src2, TILE:$src3),
278           !strconcat(Opstr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
279           []>, VEX, VVVV;
280     }
282     def TDPBF8PS : AMX_FP8_BASE<0xfd, "tdpbf8ps">, T_MAP5, PS;
283     def TDPBHF8PS : AMX_FP8_BASE<0xfd, "tdpbhf8ps">, T_MAP5, XD;
284     def TDPHBF8PS : AMX_FP8_BASE<0xfd, "tdphbf8ps">, T_MAP5, XS;
285     def TDPHF8PS : AMX_FP8_BASE<0xfd, "tdphf8ps">, T_MAP5, PD;
287     let usesCustomInserter = 1 in {
288       // Pseudo instructions, using immediates instead of tile registers.
289       // To be translated to the actual instructions in X86ISelLowering.cpp
290       def PTDPBF8PS : PseudoI<(outs),
291                               (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
292                               [(int_x86_tdpbf8ps timm:$src1, timm:$src2,
293                                 timm:$src3)]>;
294       def PTDPBHF8PS : PseudoI<(outs),
295                                (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
296                                [(int_x86_tdpbhf8ps timm:$src1, timm:$src2,
297                                  timm:$src3)]>;
298       def PTDPHBF8PS : PseudoI<(outs),
299                                (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
300                                [(int_x86_tdphbf8ps timm:$src1, timm:$src2,
301                                  timm:$src3)]>;
302       def PTDPHF8PS : PseudoI<(outs),
303                               (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
304                               [(int_x86_tdphf8ps timm:$src1, timm:$src2,
305                                 timm:$src3)]>;
306     }
308     let Constraints = "$src4 = $dst" in {
309       def PTDPBF8PSV : PseudoI<(outs TILE:$dst),
310                                (ins GR16:$src1, GR16:$src2, GR16:$src3,
311                                     TILE:$src4, TILE:$src5, TILE:$src6),
312                                [(set TILE:$dst,
313                                 (int_x86_tdpbf8ps_internal GR16:$src1,
314                                  GR16:$src2, GR16:$src3, TILE:$src4,
315                                  TILE:$src5, TILE:$src6))]>;
316       def PTDPBHF8PSV : PseudoI<(outs TILE:$dst),
317                                (ins GR16:$src1, GR16:$src2, GR16:$src3,
318                                     TILE:$src4, TILE:$src5, TILE:$src6),
319                                [(set TILE:$dst,
320                                 (int_x86_tdpbhf8ps_internal GR16:$src1,
321                                  GR16:$src2, GR16:$src3, TILE:$src4,
322                                  TILE:$src5, TILE:$src6))]>;
323       def PTDPHBF8PSV : PseudoI<(outs TILE:$dst),
324                                (ins GR16:$src1, GR16:$src2, GR16:$src3,
325                                     TILE:$src4, TILE:$src5, TILE:$src6),
326                                [(set TILE:$dst,
327                                 (int_x86_tdphbf8ps_internal GR16:$src1,
328                                  GR16:$src2, GR16:$src3, TILE:$src4,
329                                  TILE:$src5, TILE:$src6))]>;
330       def PTDPHF8PSV : PseudoI<(outs TILE:$dst),
331                                (ins GR16:$src1, GR16:$src2, GR16:$src3,
332                                     TILE:$src4, TILE:$src5, TILE:$src6),
333                                [(set TILE:$dst,
334                                 (int_x86_tdphf8ps_internal GR16:$src1,
335                                  GR16:$src2, GR16:$src3, TILE:$src4,
336                                  TILE:$src5, TILE:$src6))]>;
337     }
338   }
341 let Predicates = [HasAMXTILE, In64BitMode], isPseudo = true, SchedRW = [WriteSystem] in {
342   let mayStore = 1 in
343   def PTILEPAIRSTORE : PseudoI<(outs), (ins opaquemem:$src1, TILEPair:$src2), []>;
344   let mayLoad = 1 in
345   def PTILEPAIRLOAD : PseudoI<(outs TILEPair:$dst), (ins opaquemem:$src), []>;
348 multiclass T2RPNTLVW_Base<bits<8> op1, bits<8> op2, string rs, string suffix> {
349   def Z0#rs#suffix    : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
350                           "t2rpntlvwz0" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PS;
351   def Z0#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
352                           "t2rpntlvwz0" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PS;
353   def Z1#rs#suffix    : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
354                           "t2rpntlvwz1" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PD;
355   def Z1#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
356                           "t2rpntlvwz1" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PD;
359 let Predicates = [HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
360   defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "">, T8, VEX;
362 let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
363   defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "_EVEX">, T8, EVEX, NoCD8;
365 let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
366   defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "">, T_MAP5, VEX;
368 let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
369   defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "_EVEX">, T_MAP5, EVEX, NoCD8;
371 let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
372   let SchedRW = [WriteSystem] in {
373     def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
374                         "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS;
375     let isPseudo = true in {
376       def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst),
377                                   (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
378                                   []>;
379       def PT2RPNTLVWZ0T1V : PseudoI<(outs TILEPair:$dst),
380                                   (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
381                                   []>;
382       def PT2RPNTLVWZ1V : PseudoI<(outs TILEPair:$dst),
383                                   (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
384                                   []>;
385       def PT2RPNTLVWZ1T1V : PseudoI<(outs TILEPair:$dst),
386                                   (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
387                                   []>;
388     }
390     def PTTRANSPOSEDV : PseudoI<(outs TILE:$dst),
391                                 (ins GR16:$src1, GR16:$src2, TILE:$src),
392                                 [(set TILE: $dst,
393                                  (int_x86_ttransposed_internal GR16:$src1, GR16:$src2,
394                                   TILE:$src))]>;
396     let usesCustomInserter = 1 in {
397       def PT2RPNTLVWZ0 : PseudoI<(outs), (ins u8imm:$dst,
398                                  sibmem:$src1), []>;
399       def PT2RPNTLVWZ0T1 : PseudoI<(outs), (ins u8imm:$dst,
400                                    sibmem:$src1), []>;
401       def PT2RPNTLVWZ1 : PseudoI<(outs), (ins u8imm:$dst,
402                                  sibmem:$src1), []>;
403       def PT2RPNTLVWZ1T1 : PseudoI<(outs), (ins u8imm:$dst,
404                                    sibmem:$src1), []>;
405       def PTTRANSPOSED : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src),
406                                  [(int_x86_ttransposed timm:$dst, timm:$src)]>;
407     }
408   }
409 } // HasAMXTILE, HasAMXTRANSPOSE
411 let Predicates = [HasAMXBF16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
412   let Constraints = "$src1 = $dst" in
413     def TTDPBF16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
414                        (ins TILE:$src1, TILE:$src2, TILE:$src3),
415                        "ttdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
416                        []>, VEX, VVVV, T8,XS;
417   let Constraints = "$src4 = $dst" in
418     def PTTDPBF16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
419                                 GR16:$src2, GR16:$src3, TILE:$src4,
420                                 TILE:$src5, TILE:$src6),
421                                 [(set TILE: $dst,
422                                   (int_x86_ttdpbf16ps_internal GR16:$src1, GR16:$src2,
423                                    GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
424   let usesCustomInserter = 1 in
425     def PTTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
426                               [(int_x86_ttdpbf16ps timm:$src1, timm:$src2, timm:$src3)]>;
429 let Predicates = [HasAMXFP16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
430   let Constraints = "$src1 = $dst" in
431     def TTDPFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
432                        (ins TILE:$src1, TILE:$src2, TILE:$src3),
433                        "ttdpfp16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
434                        []>, VEX, VVVV, T8,XD;
435   let Constraints = "$src4 = $dst" in
436     def PTTDPFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
437                                 GR16:$src2, GR16:$src3, TILE:$src4,
438                                 TILE:$src5, TILE:$src6),
439                                 [(set TILE: $dst,
440                                   (int_x86_ttdpfp16ps_internal GR16:$src1, GR16:$src2,
441                                    GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
442   let usesCustomInserter = 1 in
443     def PTTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
444                               [(int_x86_ttdpfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
447 let Predicates = [HasAMXCOMPLEX, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
448   let Constraints = "$src1 = $dst" in {
449     def TTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
450                           (ins TILE:$src1, TILE:$src2, TILE:$src3),
451                           "ttcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
452                           []>, VEX, VVVV, T8,XD;
453     def TTCMMRLFP16PS: I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
454                          (ins TILE:$src1, TILE:$src2, TILE:$src3),
455                          "ttcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
456                          []>, VEX, VVVV, T8,XS;
457     def TCONJTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
458                           (ins TILE:$src1, TILE:$src2, TILE:$src3),
459                           "tconjtcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
460                           []>, VEX, VVVV, WIG, T8,PS;
461   }
462   def TCONJTFP16 : I<0x6b, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
463                      "tconjtfp16\t{$src, $dst|$dst, $src}", []>, VEX, T8,PD;
465   let Constraints = "$src4 = $dst" in {
466     def PTTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
467                                   GR16:$src2, GR16:$src3, TILE:$src4,
468                                   TILE:$src5, TILE:$src6),
469                                   [(set TILE: $dst,
470                                     (int_x86_ttcmmimfp16ps_internal GR16:$src1, GR16:$src2,
471                                      GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
472     def PTTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
473                                   GR16:$src2, GR16:$src3, TILE:$src4,
474                                   TILE:$src5, TILE:$src6),
475                                   [(set TILE: $dst,
476                                     (int_x86_ttcmmrlfp16ps_internal GR16:$src1, GR16:$src2,
477                                      GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
478     def PTCONJTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
479                                       GR16:$src2, GR16:$src3, TILE:$src4,
480                                       TILE:$src5, TILE:$src6),
481                                       [(set TILE: $dst,
482                                         (int_x86_tconjtcmmimfp16ps_internal GR16:$src1, GR16:$src2,
483                                          GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
484   }
485   def PTCONJTFP16V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3),
486                              [(set TILE: $dst, (int_x86_tconjtfp16_internal GR16:$src1, GR16:$src2, TILE:$src3))]>;
488   let usesCustomInserter = 1 in {
489     def PTTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
490                                  [(int_x86_ttcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
491     def PTTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
492                                  [(int_x86_ttcmmrlfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
493     def PTCONJTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
494                                      [(int_x86_tconjtcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
495     def PTCONJTFP16 : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src),
496                               [(int_x86_tconjtfp16 timm:$dst, timm:$src)]>;
497   }
500 let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
501   let isPseudo = true in {
502     def PT2RPNTLVWZ0RSV   : PseudoI<(outs TILEPair:$dst),
503                               (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
504                               []>;
505     def PT2RPNTLVWZ0RST1V : PseudoI<(outs TILEPair:$dst),
506                               (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
507                               []>;
508     def PT2RPNTLVWZ1RSV   : PseudoI<(outs TILEPair:$dst),
509                               (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
510                               []>;
511     def PT2RPNTLVWZ1RST1V : PseudoI<(outs TILEPair:$dst),
512                               (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
513                               []>;
514   }
515   let  usesCustomInserter = 1 in {
516     def PT2RPNTLVWZ0RS   : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
517     def PT2RPNTLVWZ0RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
518     def PT2RPNTLVWZ1RS   : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
519     def PT2RPNTLVWZ1RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
520   }
521 } // HasAMXMOVRS, HasAMXTRANSPOSE
523 multiclass TILELOADDRS_Base<string suffix> {
524   def suffix    : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
525                     "tileloaddrs\t{$src1, $dst|$dst, $src1}", []>, T8, XD;
526   def T1#suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
527                     "tileloaddrst1\t{$src1, $dst|$dst, $src1}", []>, T8, PD;
530 let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in
531   defm TILELOADDRS : TILELOADDRS_Base<"">, VEX;
533 let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
534   defm TILELOADDRS : TILELOADDRS_Base<"_EVEX">, EVEX, NoCD8;
536 let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
537   let isPseudo = true, mayLoad = 1 in {
538     def PTILELOADDRSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
539                                                   GR16:$src2,
540                                                   opaquemem:$src3), []>;
541     def PTILELOADDRST1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
542                                                     GR16:$src2,
543                                                     opaquemem:$src3), []>;
544   }
546   let usesCustomInserter = 1, mayLoad = 1 in {
547     def PTILELOADDRS : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
548     def PTILELOADDRST1 : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
549   }
550 } // HasAMXMOVRS, In64BitMode
552 multiclass m_tcvtrowd2ps {
553   let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
554     let SchedRW = [WriteSystem] in {
555       def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
556                     (ins TILE:$src1, i32u8imm:$src2),
557                     "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
558                     []>, TA,XS, EVEX, EVEX_V512;
559       def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst),
560                   (ins TILE:$src1, GR32:$src2),
561                   "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
562                   []>, T8,XS, EVEX, VVVV, EVEX_V512;
563     }
564   } // HasAMXAVX512, HasAVX10_2_512, In64BitMode
567 defm TCVTROWD2PS : m_tcvtrowd2ps;
569 let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
570   let SchedRW = [WriteSystem] in {
571     let  usesCustomInserter = 1 in {
572       def PTCVTROWD2PSrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
573                                     [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, imm:$src2))]>;
574       def PTCVTROWD2PSrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
575                                     [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, GR32:$src2))]>;
576     }
578     def PTCVTROWD2PSrriV : PseudoI<(outs VR512:$dst),
579                                    (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
580                                     [(set VR512: $dst,
581                                       (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2,
582                                        TILE:$src3, imm:$src4))]>;
583     def PTCVTROWD2PSrreV : PseudoI<(outs VR512:$dst),
584                                    (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
585                                    [(set VR512: $dst,
586                                      (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2,
587                                       TILE:$src3, GR32:$src4))]>;
588     def PTCVTROWPS2BF16HrriV : PseudoI<(outs VR512:$dst),
589                                        (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
590                                        [(set VR512: $dst,
591                                          (int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2,
592                                           TILE:$src3, imm:$src4))]>;
593     def PTCVTROWPS2BF16HrreV : PseudoI<(outs VR512:$dst),
594                                        (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
595                                        [(set VR512: $dst,
596                                          (int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2,
597                                           TILE:$src3, GR32:$src4))]>;
598     def PTCVTROWPS2BF16LrriV : PseudoI<(outs VR512:$dst),
599                                        (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
600                                        [(set VR512: $dst,
601                                          (int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2,
602                                           TILE:$src3, imm:$src4))]>;
603     def PTCVTROWPS2BF16LrreV : PseudoI<(outs VR512:$dst),
604                                        (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
605                                        [(set VR512: $dst,
606                                          (int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2,
607                                           TILE:$src3, GR32:$src4))]>;
608     def PTCVTROWPS2PHHrriV : PseudoI<(outs VR512:$dst),
609                                      (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
610                                      [(set VR512: $dst,
611                                        (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2,
612                                         TILE:$src3, imm:$src4))]>;
613     def PTCVTROWPS2PHHrreV : PseudoI<(outs VR512:$dst),
614                                      (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
615                                      [(set VR512: $dst,
616                                        (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2,
617                                         TILE:$src3, GR32:$src4))]>;
618     def PTCVTROWPS2PHLrriV : PseudoI<(outs VR512:$dst),
619                                      (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
620                                      [(set VR512: $dst,
621                                        (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2,
622                                         TILE:$src3, imm:$src4))]>;
623     def PTCVTROWPS2PHLrreV : PseudoI<(outs VR512:$dst),
624                                      (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
625                                      [(set VR512: $dst,
626                                        (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2,
627                                         TILE:$src3, GR32:$src4))]>;
628   }
631 multiclass AMXAVX512_BASE<bits<8> Opcode1, bits<8> Opcode2, string Opstr,
632                                 Prefix P1, Prefix P2> {
633   let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode], SchedRW = [WriteSystem] in {
634     let OpPrefix = P1 in
635       def rre : I<Opcode1, MRMSrcReg4VOp3, (outs VR512:$dst),
636                   (ins TILE:$src1, GR32:$src2),
637                   !strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
638                   []>, EVEX, VVVV, EVEX_V512, T8;
639     let OpPrefix = P2 in
640       def rri : Ii8<Opcode2, MRMSrcReg, (outs VR512:$dst),
641                     (ins TILE:$src1, i32u8imm:$src2),
642                     !strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
643                     []>, EVEX, EVEX_V512, TA;
644     let usesCustomInserter = 1 in {
645       def "P"#NAME#"rre" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
646                                    [(set VR512:$dst,
647                                     (!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, GR32:$src2))]>;
648       def "P"#NAME#"rri" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
649                                    [(set VR512:$dst,
650                                     (!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, imm:$src2))]>;
651     }
652   }
655 defm TCVTROWPS2PHH : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2phh", PS, PS>;
656 defm TCVTROWPS2PHL : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2phl", PD, XD>;
657 defm TCVTROWPS2BF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2bf16h", XD, XD>;
658 defm TCVTROWPS2BF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2bf16l", XS, XS>;
660 multiclass m_tilemovrow {
661   let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
662     let SchedRW = [WriteSystem] in {
663       def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
664                     (ins TILE:$src1, u8imm:$src2),
665                     "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}",
666                     []>, TA,PD, EVEX, EVEX_V512;
667       def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst),
668                   (ins TILE:$src1, GR32:$src2),
669                   "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}",
670                   []>, T8,PD, EVEX, VVVV, EVEX_V512;
671     }
672   } // HasAMXAVX512, HasAVX10_2_512, In64BitMode
675 defm TILEMOVROW : m_tilemovrow;
677 let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
678   let SchedRW = [WriteSystem] in {
679     let  usesCustomInserter = 1 in {
680       def PTILEMOVROWrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
681                                    [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, imm:$src2))]>;
682       def PTILEMOVROWrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
683                                    [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, GR32:$src2))]>;
684     }
686     def PTILEMOVROWrriV : PseudoI<(outs VR512:$dst),
687                                   (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
688                                   [(set VR512: $dst,
689                                     (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2,
690                                      TILE:$src3, imm:$src4))]>;
691     def PTILEMOVROWrreV : PseudoI<(outs VR512:$dst),
692                                   (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
693                                   [(set VR512: $dst,
694                                     (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2,
695                                      TILE:$src3, GR32:$src4))]>;
696   }
699 let Predicates = [HasAMXTF32, In64BitMode] in {
700   let SchedRW = [WriteSystem] in {
701     let Constraints = "$src1 = $dst" in {
702       def TMMULTF32PS: I<0x48, MRMSrcReg4VOp3, (outs TILE:$dst),
703                          (ins TILE:$src1, TILE:$src2, TILE:$src3),
704                          "tmmultf32ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
705                          []>, VEX, VVVV, T8, PD;
706     }
707     let Constraints = "$src4 = $dst" in {
708       def PTMMULTF32PSV : PseudoI<(outs TILE:$dst),
709                                   (ins GR16:$src1, GR16:$src2, GR16:$src3,
710                                    TILE:$src4, TILE:$src5, TILE:$src6),
711                                   [(set TILE:$dst,
712                                     (int_x86_tmmultf32ps_internal GR16:$src1,
713                                      GR16:$src2, GR16:$src3, TILE:$src4,
714                                      TILE:$src5, TILE:$src6))]>;
715     }
716     let usesCustomInserter = 1 in {
717       def PTMMULTF32PS : PseudoI<(outs),
718                                  (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
719                                  [(int_x86_tmmultf32ps timm:$src1, timm:$src2,
720                                    timm:$src3)]>;
721     }
722   } // SchedRW = [WriteSystem]
723 } // HasAMXTF32
725 let Predicates = [HasAMXTF32, HasAMXTRANSPOSE, In64BitMode] in {
726   let SchedRW = [WriteSystem] in {
727     let Constraints = "$src1 = $dst" in {
728       def TTMMULTF32PS: I<0x48, MRMSrcReg4VOp3, (outs TILE:$dst),
729                          (ins TILE:$src1, TILE:$src2, TILE:$src3),
730                          "ttmmultf32ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
731                          []>, VEX, VVVV, T8, PS;
732     }
733     let Constraints = "$src4 = $dst" in {
734       def PTTMMULTF32PSV : PseudoI<(outs TILE:$dst),
735                                    (ins GR16:$src1, GR16:$src2, GR16:$src3,
736                                     TILE:$src4, TILE:$src5, TILE:$src6),
737                                    [(set TILE:$dst,
738                                      (int_x86_ttmmultf32ps_internal GR16:$src1,
739                                       GR16:$src2, GR16:$src3, TILE:$src4,
740                                       TILE:$src5, TILE:$src6))]>;
741     }
742     let usesCustomInserter = 1 in {
743       def PTTMMULTF32PS : PseudoI<(outs),
744                                   (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
745                                   [(int_x86_ttmmultf32ps timm:$src1, timm:$src2,
746                                     timm:$src3)]>;
747     }
748   } // SchedRW = [WriteSystem]
749 } // HasAMXTF32, HasAMXTRANSPOSE