1 //===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the instructions that make up the Intel AMX instruction
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
17 multiclass AMX_TILE_COMMON<string Suffix, Predicate HasEGPR> {
18 let Predicates = [HasAMXTILE, HasEGPR, In64BitMode] in {
19 let hasSideEffects = 1,
20 Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
21 def LDTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
23 [(int_x86_ldtilecfg addr:$src)]>,
25 let hasSideEffects = 1 in
26 def STTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
28 [(int_x86_sttilecfg addr:$src)]>,
31 def TILELOADD#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
33 "tileloadd\t{$src, $dst|$dst, $src}", []>,
36 def TILELOADDT1#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
38 "tileloaddt1\t{$src, $dst|$dst, $src}", []>,
41 def TILESTORED#Suffix : I<0x4b, MRMDestMemFSIB, (outs),
42 (ins sibmem:$dst, TILE:$src),
43 "tilestored\t{$src, $dst|$dst, $src}", []>,
48 let SchedRW = [WriteSystem] in {
49 defm "" : AMX_TILE_COMMON<"", NoEGPR>, VEX;
50 defm "" : AMX_TILE_COMMON<"_EVEX", HasEGPR>, EVEX, NoCD8;
52 let Predicates = [HasAMXTILE, In64BitMode] in {
53 let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
54 def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
55 "tilerelease", [(int_x86_tilerelease)]>, VEX, T8, PS;
56 def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins),
57 "tilezero\t$dst", []>,
60 // Pseduo instruction for RA.
61 let isPseudo = true, mayLoad = 1, hasSideEffects = 1,
62 Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
63 def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>;
64 let isPseudo = true, mayLoad = 1 in
65 def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
67 opaquemem:$src3), []>;
68 let isPseudo = true, mayLoad = 1 in
69 def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
71 opaquemem:$src3), []>;
72 let isPseudo = true, mayStore = 1 in
73 def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
74 GR16:$src2, opaquemem:$src3,
76 let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
78 def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
79 [(set TILE:$dst, (int_x86_tilezero_internal
80 GR16:$src1, GR16:$src2))]>;
82 let usesCustomInserter = 1 in {
83 // Pseudo instructions, using immediates instead of tile registers.
84 // To be translated to the actual instructions in X86ISelLowering.cpp
86 def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
88 def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1,
91 def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>;
92 def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
93 [(int_x86_tilezero timm:$src)]>;
98 let Predicates = [HasAMXINT8, In64BitMode] in {
99 let SchedRW = [WriteSystem] in {
100 let Constraints = "$src1 = $dst" in {
101 def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
102 (ins TILE:$src1, TILE:$src2, TILE:$src3),
103 "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
105 def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
106 (ins TILE:$src1, TILE:$src2, TILE:$src3),
107 "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
109 def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
110 (ins TILE:$src1, TILE:$src2, TILE:$src3),
111 "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
113 def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
114 (ins TILE:$src1, TILE:$src2, TILE:$src3),
115 "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
119 // Pseduo instruction for RA.
120 let isPseudo = true, Constraints = "$src4 = $dst" in {
121 def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
122 GR16:$src2, GR16:$src3, TILE:$src4,
123 TILE:$src5, TILE:$src6),
125 (int_x86_tdpbssd_internal GR16:$src1, GR16:$src2,
126 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
127 def PTDPBSUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
128 GR16:$src2, GR16:$src3, TILE:$src4,
129 TILE:$src5, TILE:$src6),
131 (int_x86_tdpbsud_internal GR16:$src1, GR16:$src2,
132 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
133 def PTDPBUSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
134 GR16:$src2, GR16:$src3, TILE:$src4,
135 TILE:$src5, TILE:$src6),
137 (int_x86_tdpbusd_internal GR16:$src1, GR16:$src2,
138 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
139 def PTDPBUUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
140 GR16:$src2, GR16:$src3, TILE:$src4,
141 TILE:$src5, TILE:$src6),
143 (int_x86_tdpbuud_internal GR16:$src1, GR16:$src2,
144 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
147 let usesCustomInserter = 1 in {
148 // Pseudo instructions, using immediates instead of tile registers.
149 // To be translated to the actual instructions in X86ISelLowering.cpp
150 def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1,
151 u8imm:$src2, u8imm:$src3),
152 [(int_x86_tdpbssd timm:$src1,
153 timm:$src2, timm:$src3)]>;
154 def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1,
155 u8imm:$src2, u8imm:$src3),
156 [(int_x86_tdpbsud timm:$src1,
157 timm:$src2, timm:$src3)]>;
158 def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1,
159 u8imm:$src2, u8imm:$src3),
160 [(int_x86_tdpbusd timm:$src1,
161 timm:$src2, timm:$src3)]>;
162 def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1,
163 u8imm:$src2, u8imm:$src3),
164 [(int_x86_tdpbuud timm:$src1,
165 timm:$src2, timm:$src3)]>;
170 let Predicates = [HasAMXBF16, In64BitMode] in {
171 let SchedRW = [WriteSystem] in {
172 let Constraints = "$src1 = $dst" in
173 def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
174 (ins TILE:$src1, TILE:$src2, TILE:$src3),
175 "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
176 []>, VEX, VVVV, T8, XS;
178 // Pseduo instruction for RA.
179 let isPseudo = true, Constraints = "$src4 = $dst" in
180 def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
181 GR16:$src2, GR16:$src3, TILE:$src4,
182 TILE:$src5, TILE:$src6),
184 (int_x86_tdpbf16ps_internal GR16:$src1,
185 GR16:$src2, GR16:$src3, TILE:$src4,
186 TILE:$src5, TILE:$src6))]>;
188 let usesCustomInserter = 1 in {
189 // Pseudo instructions, using immediates instead of tile registers.
190 // To be translated to the actual instructions in X86ISelLowering.cpp
191 def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1,
192 u8imm:$src2, u8imm:$src3),
193 [(int_x86_tdpbf16ps timm:$src1,
194 timm:$src2, timm:$src3)]>;
197 } // HasAMXTILE, HasAMXBF16
200 let Predicates = [HasAMXFP16, In64BitMode] in {
201 let SchedRW = [WriteSystem] in {
202 let Constraints = "$src1 = $dst" in {
203 def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
204 (ins TILE:$src1, TILE:$src2, TILE:$src3),
205 "tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
206 []>, VEX, VVVV, T8, XD;
209 // Pseduo instruction for RA.
210 let isPseudo = true, Constraints = "$src4 = $dst" in {
211 def PTDPFP16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
212 GR16:$src2, GR16:$src3, TILE:$src4,
213 TILE:$src5, TILE:$src6),
215 (int_x86_tdpfp16ps_internal GR16:$src1,
216 GR16:$src2, GR16:$src3, TILE:$src4,
217 TILE:$src5, TILE:$src6))]>;
220 let usesCustomInserter = 1 in {
221 def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1,
222 u8imm:$src2, u8imm:$src3),
223 [(int_x86_tdpfp16ps timm:$src1,
224 timm:$src2, timm:$src3)]>;
227 } // HasAMXTILE, HasAMXFP16
229 let Predicates = [HasAMXCOMPLEX, In64BitMode] in {
230 let SchedRW = [WriteSystem] in {
231 let Constraints = "$src1 = $dst" in {
232 def TCMMIMFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
233 (ins TILE:$src1, TILE:$src2, TILE:$src3),
234 "tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
235 []>, T8, PD, VEX, VVVV;
236 def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
237 (ins TILE:$src1, TILE:$src2, TILE:$src3),
238 "tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
239 []>, VEX, VVVV, WIG, T8;
241 } // Constraints = "$src1 = $dst"
243 let Constraints = "$src4 = $dst" in {
244 def PTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
245 GR16:$src2, GR16:$src3, TILE:$src4,
246 TILE:$src5, TILE:$src6),
248 (int_x86_tcmmimfp16ps_internal GR16:$src1, GR16:$src2,
249 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
250 def PTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
251 GR16:$src2, GR16:$src3, TILE:$src4,
252 TILE:$src5, TILE:$src6),
254 (int_x86_tcmmrlfp16ps_internal GR16:$src1, GR16:$src2,
255 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
258 let usesCustomInserter = 1 in {
259 def PTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1,
260 u8imm:$src2, u8imm:$src3),
261 [(int_x86_tcmmimfp16ps timm:$src1,
262 timm:$src2, timm:$src3)]>;
263 def PTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1,
264 u8imm:$src2, u8imm:$src3),
265 [(int_x86_tcmmrlfp16ps timm:$src1,
266 timm:$src2, timm:$src3)]>;
268 } // SchedRW = [WriteSystem]