1 //===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the instructions that make up the Intel AMX instruction
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
17 multiclass AMX_TILE_COMMON<string Suffix, Predicate HasEGPR> {
18 let Predicates = [HasAMXTILE, HasEGPR, In64BitMode] in {
19 let hasSideEffects = 1,
20 Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
21 def LDTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
23 [(int_x86_ldtilecfg addr:$src)]>,
25 let hasSideEffects = 1 in
26 def STTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
28 [(int_x86_sttilecfg addr:$src)]>,
31 def TILELOADD#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
33 "tileloadd\t{$src, $dst|$dst, $src}", []>,
36 def TILELOADDT1#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
38 "tileloaddt1\t{$src, $dst|$dst, $src}", []>,
41 def TILESTORED#Suffix : I<0x4b, MRMDestMemFSIB, (outs),
42 (ins sibmem:$dst, TILE:$src),
43 "tilestored\t{$src, $dst|$dst, $src}", []>,
48 let SchedRW = [WriteSystem] in {
49 defm "" : AMX_TILE_COMMON<"", NoEGPR>, VEX;
50 defm "" : AMX_TILE_COMMON<"_EVEX", HasEGPR>, EVEX, NoCD8;
52 let Predicates = [HasAMXTILE, In64BitMode] in {
53 let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
54 def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
55 "tilerelease", [(int_x86_tilerelease)]>, VEX, T8, PS;
56 def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins),
57 "tilezero\t$dst", []>,
60 // Pseduo instruction for RA.
61 let isPseudo = true, mayLoad = 1, hasSideEffects = 1,
62 Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
63 def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>;
64 let isPseudo = true, mayLoad = 1 in
65 def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
67 opaquemem:$src3), []>;
68 let isPseudo = true, mayLoad = 1 in
69 def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
71 opaquemem:$src3), []>;
72 let isPseudo = true, mayStore = 1 in
73 def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
74 GR16:$src2, opaquemem:$src3,
76 let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
77 canFoldAsLoad = 1, usesCustomInserter = 1 in
78 def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
79 [(set TILE:$dst, (int_x86_tilezero_internal
80 GR16:$src1, GR16:$src2))]>;
82 let usesCustomInserter = 1 in {
83 // Pseudo instructions, using immediates instead of tile registers.
84 // To be translated to the actual instructions in X86ISelLowering.cpp
86 def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
88 def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1,
91 def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>;
92 def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
93 [(int_x86_tilezero timm:$src)]>;
98 let Predicates = [HasAMXINT8, In64BitMode] in {
99 let SchedRW = [WriteSystem] in {
100 let Constraints = "$src1 = $dst" in {
101 def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
102 (ins TILE:$src1, TILE:$src2, TILE:$src3),
103 "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
105 def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
106 (ins TILE:$src1, TILE:$src2, TILE:$src3),
107 "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
109 def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
110 (ins TILE:$src1, TILE:$src2, TILE:$src3),
111 "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
113 def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
114 (ins TILE:$src1, TILE:$src2, TILE:$src3),
115 "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
119 // Pseduo instruction for RA.
120 let isPseudo = true, Constraints = "$src4 = $dst" in {
121 def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
122 GR16:$src2, GR16:$src3, TILE:$src4,
123 TILE:$src5, TILE:$src6),
125 (int_x86_tdpbssd_internal GR16:$src1, GR16:$src2,
126 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
127 def PTDPBSUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
128 GR16:$src2, GR16:$src3, TILE:$src4,
129 TILE:$src5, TILE:$src6),
131 (int_x86_tdpbsud_internal GR16:$src1, GR16:$src2,
132 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
133 def PTDPBUSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
134 GR16:$src2, GR16:$src3, TILE:$src4,
135 TILE:$src5, TILE:$src6),
137 (int_x86_tdpbusd_internal GR16:$src1, GR16:$src2,
138 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
139 def PTDPBUUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
140 GR16:$src2, GR16:$src3, TILE:$src4,
141 TILE:$src5, TILE:$src6),
143 (int_x86_tdpbuud_internal GR16:$src1, GR16:$src2,
144 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
147 let usesCustomInserter = 1 in {
148 // Pseudo instructions, using immediates instead of tile registers.
149 // To be translated to the actual instructions in X86ISelLowering.cpp
150 def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1,
151 u8imm:$src2, u8imm:$src3),
152 [(int_x86_tdpbssd timm:$src1,
153 timm:$src2, timm:$src3)]>;
154 def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1,
155 u8imm:$src2, u8imm:$src3),
156 [(int_x86_tdpbsud timm:$src1,
157 timm:$src2, timm:$src3)]>;
158 def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1,
159 u8imm:$src2, u8imm:$src3),
160 [(int_x86_tdpbusd timm:$src1,
161 timm:$src2, timm:$src3)]>;
162 def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1,
163 u8imm:$src2, u8imm:$src3),
164 [(int_x86_tdpbuud timm:$src1,
165 timm:$src2, timm:$src3)]>;
170 let Predicates = [HasAMXBF16, In64BitMode] in {
171 let SchedRW = [WriteSystem] in {
172 let Constraints = "$src1 = $dst" in
173 def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
174 (ins TILE:$src1, TILE:$src2, TILE:$src3),
175 "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
176 []>, VEX, VVVV, T8, XS;
178 // Pseduo instruction for RA.
179 let isPseudo = true, Constraints = "$src4 = $dst" in
180 def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
181 GR16:$src2, GR16:$src3, TILE:$src4,
182 TILE:$src5, TILE:$src6),
184 (int_x86_tdpbf16ps_internal GR16:$src1,
185 GR16:$src2, GR16:$src3, TILE:$src4,
186 TILE:$src5, TILE:$src6))]>;
188 let usesCustomInserter = 1 in {
189 // Pseudo instructions, using immediates instead of tile registers.
190 // To be translated to the actual instructions in X86ISelLowering.cpp
191 def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1,
192 u8imm:$src2, u8imm:$src3),
193 [(int_x86_tdpbf16ps timm:$src1,
194 timm:$src2, timm:$src3)]>;
197 } // HasAMXTILE, HasAMXBF16
200 let Predicates = [HasAMXFP16, In64BitMode] in {
201 let SchedRW = [WriteSystem] in {
202 let Constraints = "$src1 = $dst" in {
203 def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
204 (ins TILE:$src1, TILE:$src2, TILE:$src3),
205 "tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
206 []>, VEX, VVVV, T8, XD;
209 // Pseduo instruction for RA.
210 let isPseudo = true, Constraints = "$src4 = $dst" in {
211 def PTDPFP16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
212 GR16:$src2, GR16:$src3, TILE:$src4,
213 TILE:$src5, TILE:$src6),
215 (int_x86_tdpfp16ps_internal GR16:$src1,
216 GR16:$src2, GR16:$src3, TILE:$src4,
217 TILE:$src5, TILE:$src6))]>;
220 let usesCustomInserter = 1 in {
221 def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1,
222 u8imm:$src2, u8imm:$src3),
223 [(int_x86_tdpfp16ps timm:$src1,
224 timm:$src2, timm:$src3)]>;
227 } // HasAMXTILE, HasAMXFP16
229 let Predicates = [HasAMXCOMPLEX, In64BitMode] in {
230 let SchedRW = [WriteSystem] in {
231 let Constraints = "$src1 = $dst" in {
232 def TCMMIMFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
233 (ins TILE:$src1, TILE:$src2, TILE:$src3),
234 "tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
235 []>, T8, PD, VEX, VVVV;
236 def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
237 (ins TILE:$src1, TILE:$src2, TILE:$src3),
238 "tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
239 []>, VEX, VVVV, WIG, T8;
241 } // Constraints = "$src1 = $dst"
243 let Constraints = "$src4 = $dst" in {
244 def PTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
245 GR16:$src2, GR16:$src3, TILE:$src4,
246 TILE:$src5, TILE:$src6),
248 (int_x86_tcmmimfp16ps_internal GR16:$src1, GR16:$src2,
249 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
250 def PTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
251 GR16:$src2, GR16:$src3, TILE:$src4,
252 TILE:$src5, TILE:$src6),
254 (int_x86_tcmmrlfp16ps_internal GR16:$src1, GR16:$src2,
255 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
258 let usesCustomInserter = 1 in {
259 def PTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1,
260 u8imm:$src2, u8imm:$src3),
261 [(int_x86_tcmmimfp16ps timm:$src1,
262 timm:$src2, timm:$src3)]>;
263 def PTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1,
264 u8imm:$src2, u8imm:$src3),
265 [(int_x86_tcmmrlfp16ps timm:$src1,
266 timm:$src2, timm:$src3)]>;
268 } // SchedRW = [WriteSystem]
272 let Predicates = [HasAMXFP8, In64BitMode] in {
273 let SchedRW = [WriteSystem] in {
274 let Constraints = "$src1 = $dst" in {
275 class AMX_FP8_BASE<bits<8> Opcode, string Opstr> :
276 I<Opcode, MRMSrcReg4VOp3, (outs TILE:$dst),
277 (ins TILE:$src1, TILE:$src2, TILE:$src3),
278 !strconcat(Opstr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
282 def TDPBF8PS : AMX_FP8_BASE<0xfd, "tdpbf8ps">, T_MAP5, PS;
283 def TDPBHF8PS : AMX_FP8_BASE<0xfd, "tdpbhf8ps">, T_MAP5, XD;
284 def TDPHBF8PS : AMX_FP8_BASE<0xfd, "tdphbf8ps">, T_MAP5, XS;
285 def TDPHF8PS : AMX_FP8_BASE<0xfd, "tdphf8ps">, T_MAP5, PD;
287 let usesCustomInserter = 1 in {
288 // Pseudo instructions, using immediates instead of tile registers.
289 // To be translated to the actual instructions in X86ISelLowering.cpp
290 def PTDPBF8PS : PseudoI<(outs),
291 (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
292 [(int_x86_tdpbf8ps timm:$src1, timm:$src2,
294 def PTDPBHF8PS : PseudoI<(outs),
295 (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
296 [(int_x86_tdpbhf8ps timm:$src1, timm:$src2,
298 def PTDPHBF8PS : PseudoI<(outs),
299 (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
300 [(int_x86_tdphbf8ps timm:$src1, timm:$src2,
302 def PTDPHF8PS : PseudoI<(outs),
303 (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
304 [(int_x86_tdphf8ps timm:$src1, timm:$src2,
308 let Constraints = "$src4 = $dst" in {
309 def PTDPBF8PSV : PseudoI<(outs TILE:$dst),
310 (ins GR16:$src1, GR16:$src2, GR16:$src3,
311 TILE:$src4, TILE:$src5, TILE:$src6),
313 (int_x86_tdpbf8ps_internal GR16:$src1,
314 GR16:$src2, GR16:$src3, TILE:$src4,
315 TILE:$src5, TILE:$src6))]>;
316 def PTDPBHF8PSV : PseudoI<(outs TILE:$dst),
317 (ins GR16:$src1, GR16:$src2, GR16:$src3,
318 TILE:$src4, TILE:$src5, TILE:$src6),
320 (int_x86_tdpbhf8ps_internal GR16:$src1,
321 GR16:$src2, GR16:$src3, TILE:$src4,
322 TILE:$src5, TILE:$src6))]>;
323 def PTDPHBF8PSV : PseudoI<(outs TILE:$dst),
324 (ins GR16:$src1, GR16:$src2, GR16:$src3,
325 TILE:$src4, TILE:$src5, TILE:$src6),
327 (int_x86_tdphbf8ps_internal GR16:$src1,
328 GR16:$src2, GR16:$src3, TILE:$src4,
329 TILE:$src5, TILE:$src6))]>;
330 def PTDPHF8PSV : PseudoI<(outs TILE:$dst),
331 (ins GR16:$src1, GR16:$src2, GR16:$src3,
332 TILE:$src4, TILE:$src5, TILE:$src6),
334 (int_x86_tdphf8ps_internal GR16:$src1,
335 GR16:$src2, GR16:$src3, TILE:$src4,
336 TILE:$src5, TILE:$src6))]>;
341 let Predicates = [HasAMXTILE, In64BitMode], isPseudo = true, SchedRW = [WriteSystem] in {
343 def PTILEPAIRSTORE : PseudoI<(outs), (ins opaquemem:$src1, TILEPair:$src2), []>;
345 def PTILEPAIRLOAD : PseudoI<(outs TILEPair:$dst), (ins opaquemem:$src), []>;
348 multiclass T2RPNTLVW_Base<bits<8> op1, bits<8> op2, string rs, string suffix> {
349 def Z0#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
350 "t2rpntlvwz0" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PS;
351 def Z0#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
352 "t2rpntlvwz0" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PS;
353 def Z1#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
354 "t2rpntlvwz1" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PD;
355 def Z1#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
356 "t2rpntlvwz1" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PD;
359 let Predicates = [HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
360 defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "">, T8, VEX;
362 let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
363 defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "_EVEX">, T8, EVEX, NoCD8;
365 let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
366 defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "">, T_MAP5, VEX;
368 let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
369 defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "_EVEX">, T_MAP5, EVEX, NoCD8;
371 let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
372 let SchedRW = [WriteSystem] in {
373 def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
374 "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS;
375 let isPseudo = true in {
376 def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst),
377 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
379 def PT2RPNTLVWZ0T1V : PseudoI<(outs TILEPair:$dst),
380 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
382 def PT2RPNTLVWZ1V : PseudoI<(outs TILEPair:$dst),
383 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
385 def PT2RPNTLVWZ1T1V : PseudoI<(outs TILEPair:$dst),
386 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
390 def PTTRANSPOSEDV : PseudoI<(outs TILE:$dst),
391 (ins GR16:$src1, GR16:$src2, TILE:$src),
393 (int_x86_ttransposed_internal GR16:$src1, GR16:$src2,
396 let usesCustomInserter = 1 in {
397 def PT2RPNTLVWZ0 : PseudoI<(outs), (ins u8imm:$dst,
399 def PT2RPNTLVWZ0T1 : PseudoI<(outs), (ins u8imm:$dst,
401 def PT2RPNTLVWZ1 : PseudoI<(outs), (ins u8imm:$dst,
403 def PT2RPNTLVWZ1T1 : PseudoI<(outs), (ins u8imm:$dst,
405 def PTTRANSPOSED : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src),
406 [(int_x86_ttransposed timm:$dst, timm:$src)]>;
409 } // HasAMXTILE, HasAMXTRANSPOSE
411 let Predicates = [HasAMXBF16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
412 let Constraints = "$src1 = $dst" in
413 def TTDPBF16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
414 (ins TILE:$src1, TILE:$src2, TILE:$src3),
415 "ttdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
416 []>, VEX, VVVV, T8,XS;
417 let Constraints = "$src4 = $dst" in
418 def PTTDPBF16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
419 GR16:$src2, GR16:$src3, TILE:$src4,
420 TILE:$src5, TILE:$src6),
422 (int_x86_ttdpbf16ps_internal GR16:$src1, GR16:$src2,
423 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
424 let usesCustomInserter = 1 in
425 def PTTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
426 [(int_x86_ttdpbf16ps timm:$src1, timm:$src2, timm:$src3)]>;
429 let Predicates = [HasAMXFP16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
430 let Constraints = "$src1 = $dst" in
431 def TTDPFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
432 (ins TILE:$src1, TILE:$src2, TILE:$src3),
433 "ttdpfp16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
434 []>, VEX, VVVV, T8,XD;
435 let Constraints = "$src4 = $dst" in
436 def PTTDPFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
437 GR16:$src2, GR16:$src3, TILE:$src4,
438 TILE:$src5, TILE:$src6),
440 (int_x86_ttdpfp16ps_internal GR16:$src1, GR16:$src2,
441 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
442 let usesCustomInserter = 1 in
443 def PTTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
444 [(int_x86_ttdpfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
447 let Predicates = [HasAMXCOMPLEX, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
448 let Constraints = "$src1 = $dst" in {
449 def TTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
450 (ins TILE:$src1, TILE:$src2, TILE:$src3),
451 "ttcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
452 []>, VEX, VVVV, T8,XD;
453 def TTCMMRLFP16PS: I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
454 (ins TILE:$src1, TILE:$src2, TILE:$src3),
455 "ttcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
456 []>, VEX, VVVV, T8,XS;
457 def TCONJTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
458 (ins TILE:$src1, TILE:$src2, TILE:$src3),
459 "tconjtcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
460 []>, VEX, VVVV, WIG, T8,PS;
462 def TCONJTFP16 : I<0x6b, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
463 "tconjtfp16\t{$src, $dst|$dst, $src}", []>, VEX, T8,PD;
465 let Constraints = "$src4 = $dst" in {
466 def PTTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
467 GR16:$src2, GR16:$src3, TILE:$src4,
468 TILE:$src5, TILE:$src6),
470 (int_x86_ttcmmimfp16ps_internal GR16:$src1, GR16:$src2,
471 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
472 def PTTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
473 GR16:$src2, GR16:$src3, TILE:$src4,
474 TILE:$src5, TILE:$src6),
476 (int_x86_ttcmmrlfp16ps_internal GR16:$src1, GR16:$src2,
477 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
478 def PTCONJTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
479 GR16:$src2, GR16:$src3, TILE:$src4,
480 TILE:$src5, TILE:$src6),
482 (int_x86_tconjtcmmimfp16ps_internal GR16:$src1, GR16:$src2,
483 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
485 def PTCONJTFP16V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3),
486 [(set TILE: $dst, (int_x86_tconjtfp16_internal GR16:$src1, GR16:$src2, TILE:$src3))]>;
488 let usesCustomInserter = 1 in {
489 def PTTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
490 [(int_x86_ttcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
491 def PTTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
492 [(int_x86_ttcmmrlfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
493 def PTCONJTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
494 [(int_x86_tconjtcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
495 def PTCONJTFP16 : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src),
496 [(int_x86_tconjtfp16 timm:$dst, timm:$src)]>;
500 let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
501 let isPseudo = true in {
502 def PT2RPNTLVWZ0RSV : PseudoI<(outs TILEPair:$dst),
503 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
505 def PT2RPNTLVWZ0RST1V : PseudoI<(outs TILEPair:$dst),
506 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
508 def PT2RPNTLVWZ1RSV : PseudoI<(outs TILEPair:$dst),
509 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
511 def PT2RPNTLVWZ1RST1V : PseudoI<(outs TILEPair:$dst),
512 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
515 let usesCustomInserter = 1 in {
516 def PT2RPNTLVWZ0RS : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
517 def PT2RPNTLVWZ0RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
518 def PT2RPNTLVWZ1RS : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
519 def PT2RPNTLVWZ1RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
521 } // HasAMXMOVRS, HasAMXTRANSPOSE
523 multiclass TILELOADDRS_Base<string suffix> {
524 def suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
525 "tileloaddrs\t{$src1, $dst|$dst, $src1}", []>, T8, XD;
526 def T1#suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
527 "tileloaddrst1\t{$src1, $dst|$dst, $src1}", []>, T8, PD;
530 let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in
531 defm TILELOADDRS : TILELOADDRS_Base<"">, VEX;
533 let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
534 defm TILELOADDRS : TILELOADDRS_Base<"_EVEX">, EVEX, NoCD8;
536 let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
537 let isPseudo = true, mayLoad = 1 in {
538 def PTILELOADDRSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
540 opaquemem:$src3), []>;
541 def PTILELOADDRST1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
543 opaquemem:$src3), []>;
546 let usesCustomInserter = 1, mayLoad = 1 in {
547 def PTILELOADDRS : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
548 def PTILELOADDRST1 : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
550 } // HasAMXMOVRS, In64BitMode
552 multiclass m_tcvtrowd2ps {
553 let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
554 let SchedRW = [WriteSystem] in {
555 def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
556 (ins TILE:$src1, i32u8imm:$src2),
557 "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
558 []>, TA,XS, EVEX, EVEX_V512;
559 def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst),
560 (ins TILE:$src1, GR32:$src2),
561 "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
562 []>, T8,XS, EVEX, VVVV, EVEX_V512;
564 } // HasAMXAVX512, HasAVX10_2_512, In64BitMode
567 defm TCVTROWD2PS : m_tcvtrowd2ps;
569 let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
570 let SchedRW = [WriteSystem] in {
571 let usesCustomInserter = 1 in {
572 def PTCVTROWD2PSrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
573 [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, imm:$src2))]>;
574 def PTCVTROWD2PSrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
575 [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, GR32:$src2))]>;
578 def PTCVTROWD2PSrriV : PseudoI<(outs VR512:$dst),
579 (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
581 (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2,
582 TILE:$src3, imm:$src4))]>;
583 def PTCVTROWD2PSrreV : PseudoI<(outs VR512:$dst),
584 (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
586 (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2,
587 TILE:$src3, GR32:$src4))]>;
588 def PTCVTROWPS2BF16HrriV : PseudoI<(outs VR512:$dst),
589 (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
591 (int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2,
592 TILE:$src3, imm:$src4))]>;
593 def PTCVTROWPS2BF16HrreV : PseudoI<(outs VR512:$dst),
594 (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
596 (int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2,
597 TILE:$src3, GR32:$src4))]>;
598 def PTCVTROWPS2BF16LrriV : PseudoI<(outs VR512:$dst),
599 (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
601 (int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2,
602 TILE:$src3, imm:$src4))]>;
603 def PTCVTROWPS2BF16LrreV : PseudoI<(outs VR512:$dst),
604 (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
606 (int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2,
607 TILE:$src3, GR32:$src4))]>;
608 def PTCVTROWPS2PHHrriV : PseudoI<(outs VR512:$dst),
609 (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
611 (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2,
612 TILE:$src3, imm:$src4))]>;
613 def PTCVTROWPS2PHHrreV : PseudoI<(outs VR512:$dst),
614 (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
616 (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2,
617 TILE:$src3, GR32:$src4))]>;
618 def PTCVTROWPS2PHLrriV : PseudoI<(outs VR512:$dst),
619 (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
621 (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2,
622 TILE:$src3, imm:$src4))]>;
623 def PTCVTROWPS2PHLrreV : PseudoI<(outs VR512:$dst),
624 (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
626 (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2,
627 TILE:$src3, GR32:$src4))]>;
631 multiclass AMXAVX512_BASE<bits<8> Opcode1, bits<8> Opcode2, string Opstr,
632 Prefix P1, Prefix P2> {
633 let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode], SchedRW = [WriteSystem] in {
635 def rre : I<Opcode1, MRMSrcReg4VOp3, (outs VR512:$dst),
636 (ins TILE:$src1, GR32:$src2),
637 !strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
638 []>, EVEX, VVVV, EVEX_V512, T8;
640 def rri : Ii8<Opcode2, MRMSrcReg, (outs VR512:$dst),
641 (ins TILE:$src1, i32u8imm:$src2),
642 !strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
643 []>, EVEX, EVEX_V512, TA;
644 let usesCustomInserter = 1 in {
645 def "P"#NAME#"rre" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
647 (!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, GR32:$src2))]>;
648 def "P"#NAME#"rri" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
650 (!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, imm:$src2))]>;
655 defm TCVTROWPS2PHH : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2phh", PS, PS>;
656 defm TCVTROWPS2PHL : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2phl", PD, XD>;
657 defm TCVTROWPS2BF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2bf16h", XD, XD>;
658 defm TCVTROWPS2BF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2bf16l", XS, XS>;
660 multiclass m_tilemovrow {
661 let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
662 let SchedRW = [WriteSystem] in {
663 def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
664 (ins TILE:$src1, u8imm:$src2),
665 "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}",
666 []>, TA,PD, EVEX, EVEX_V512;
667 def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst),
668 (ins TILE:$src1, GR32:$src2),
669 "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}",
670 []>, T8,PD, EVEX, VVVV, EVEX_V512;
672 } // HasAMXAVX512, HasAVX10_2_512, In64BitMode
675 defm TILEMOVROW : m_tilemovrow;
677 let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
678 let SchedRW = [WriteSystem] in {
679 let usesCustomInserter = 1 in {
680 def PTILEMOVROWrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
681 [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, imm:$src2))]>;
682 def PTILEMOVROWrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
683 [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, GR32:$src2))]>;
686 def PTILEMOVROWrriV : PseudoI<(outs VR512:$dst),
687 (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
689 (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2,
690 TILE:$src3, imm:$src4))]>;
691 def PTILEMOVROWrreV : PseudoI<(outs VR512:$dst),
692 (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
694 (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2,
695 TILE:$src3, GR32:$src4))]>;
699 let Predicates = [HasAMXTF32, In64BitMode] in {
700 let SchedRW = [WriteSystem] in {
701 let Constraints = "$src1 = $dst" in {
702 def TMMULTF32PS: I<0x48, MRMSrcReg4VOp3, (outs TILE:$dst),
703 (ins TILE:$src1, TILE:$src2, TILE:$src3),
704 "tmmultf32ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
705 []>, VEX, VVVV, T8, PD;
707 let Constraints = "$src4 = $dst" in {
708 def PTMMULTF32PSV : PseudoI<(outs TILE:$dst),
709 (ins GR16:$src1, GR16:$src2, GR16:$src3,
710 TILE:$src4, TILE:$src5, TILE:$src6),
712 (int_x86_tmmultf32ps_internal GR16:$src1,
713 GR16:$src2, GR16:$src3, TILE:$src4,
714 TILE:$src5, TILE:$src6))]>;
716 let usesCustomInserter = 1 in {
717 def PTMMULTF32PS : PseudoI<(outs),
718 (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
719 [(int_x86_tmmultf32ps timm:$src1, timm:$src2,
722 } // SchedRW = [WriteSystem]
725 let Predicates = [HasAMXTF32, HasAMXTRANSPOSE, In64BitMode] in {
726 let SchedRW = [WriteSystem] in {
727 let Constraints = "$src1 = $dst" in {
728 def TTMMULTF32PS: I<0x48, MRMSrcReg4VOp3, (outs TILE:$dst),
729 (ins TILE:$src1, TILE:$src2, TILE:$src3),
730 "ttmmultf32ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
731 []>, VEX, VVVV, T8, PS;
733 let Constraints = "$src4 = $dst" in {
734 def PTTMMULTF32PSV : PseudoI<(outs TILE:$dst),
735 (ins GR16:$src1, GR16:$src2, GR16:$src3,
736 TILE:$src4, TILE:$src5, TILE:$src6),
738 (int_x86_ttmmultf32ps_internal GR16:$src1,
739 GR16:$src2, GR16:$src3, TILE:$src4,
740 TILE:$src5, TILE:$src6))]>;
742 let usesCustomInserter = 1 in {
743 def PTTMMULTF32PS : PseudoI<(outs),
744 (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
745 [(int_x86_ttmmultf32ps timm:$src1, timm:$src2,
748 } // SchedRW = [WriteSystem]
749 } // HasAMXTF32, HasAMXTRANSPOSE