1 //===-- SIInstrInfo.td -----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
10 AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
11 def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
12 AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
14 class AMDGPUMnemonicAlias<string From, string To, string VariantName = "">
15 : MnemonicAlias<From, To, VariantName>, PredicateControl;
17 // Except for the NONE field, this must be kept in sync with the
18 // SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the
19 // getMCOpcodeGen table.
20 def SIEncodingFamily {
36 //===----------------------------------------------------------------------===//
38 //===----------------------------------------------------------------------===//
40 class GFXGen<Predicate pred, string dn, string suffix, int sub> {
41 Predicate AssemblerPredicate = pred;
42 string DecoderNamespace = dn;
43 string Suffix = suffix;
47 def GFX12Gen : GFXGen<isGFX12Only, "GFX12", "_gfx12", SIEncodingFamily.GFX12>;
48 def GFX11Gen : GFXGen<isGFX11Only, "GFX11", "_gfx11", SIEncodingFamily.GFX11>;
49 def GFX10Gen : GFXGen<isGFX10Only, "GFX10", "_gfx10", SIEncodingFamily.GFX10>;
51 //===----------------------------------------------------------------------===//
53 //===----------------------------------------------------------------------===//
55 def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
57 def SDTSBufferLoad : SDTypeProfile<1, 3,
59 SDTCisVT<1, v4i32>, // rsrc
60 SDTCisVT<2, i32>, // offset(imm)
61 SDTCisVT<3, i32>]>; // cachepolicy
63 def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", SDTSBufferLoad,
64 [SDNPMayLoad, SDNPMemOperand]>;
66 def SIsbuffer_load_byte : SDNode<"AMDGPUISD::SBUFFER_LOAD_BYTE", SDTSBufferLoad,
67 [SDNPMayLoad, SDNPMemOperand]>;
69 def SIsbuffer_load_ubyte
70 : SDNode<"AMDGPUISD::SBUFFER_LOAD_UBYTE", SDTSBufferLoad,
71 [SDNPMayLoad, SDNPMemOperand]>;
73 def SIsbuffer_load_short
74 : SDNode<"AMDGPUISD::SBUFFER_LOAD_SHORT", SDTSBufferLoad,
75 [SDNPMayLoad, SDNPMemOperand]>;
77 def SIsbuffer_load_ushort
78 : SDNode<"AMDGPUISD::SBUFFER_LOAD_USHORT", SDTSBufferLoad,
79 [SDNPMayLoad, SDNPMemOperand]>;
81 def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
82 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
83 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
86 def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
87 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
90 // load_d16_{lo|hi} ptr, tied_input
91 def SIload_d16 : SDTypeProfile<1, 2, [
97 def SDTtbuffer_load : SDTypeProfile<1, 8,
99 SDTCisVT<1, v4i32>, // rsrc
100 SDTCisVT<2, i32>, // vindex(VGPR)
101 SDTCisVT<3, i32>, // voffset(VGPR)
102 SDTCisVT<4, i32>, // soffset(SGPR)
103 SDTCisVT<5, i32>, // offset(imm)
104 SDTCisVT<6, i32>, // format(imm)
105 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
106 SDTCisVT<8, i1> // idxen(imm)
109 def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load,
110 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
111 def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16",
113 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
115 def SDTtbuffer_store : SDTypeProfile<0, 9,
117 SDTCisVT<1, v4i32>, // rsrc
118 SDTCisVT<2, i32>, // vindex(VGPR)
119 SDTCisVT<3, i32>, // voffset(VGPR)
120 SDTCisVT<4, i32>, // soffset(SGPR)
121 SDTCisVT<5, i32>, // offset(imm)
122 SDTCisVT<6, i32>, // format(imm)
123 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
124 SDTCisVT<8, i1> // idxen(imm)
127 def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
128 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
129 def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
131 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
133 def SDTBufferLoad : SDTypeProfile<1, 7,
135 SDTCisVT<1, v4i32>, // rsrc
136 SDTCisVT<2, i32>, // vindex(VGPR)
137 SDTCisVT<3, i32>, // voffset(VGPR)
138 SDTCisVT<4, i32>, // soffset(SGPR)
139 SDTCisVT<5, i32>, // offset(imm)
140 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
141 SDTCisVT<7, i1>]>; // idxen(imm)
143 def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
144 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
145 def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad,
146 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
147 def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad,
148 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
149 def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad,
150 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
151 def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad,
152 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
153 def SIbuffer_load_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_TFE", SDTBufferLoad,
154 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
155 def SIbuffer_load_ubyte_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE_TFE", SDTBufferLoad,
156 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
157 def SIbuffer_load_ushort_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT_TFE", SDTBufferLoad,
158 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
159 def SIbuffer_load_byte_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE_TFE", SDTBufferLoad,
160 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
161 def SIbuffer_load_short_tfe: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT_TFE", SDTBufferLoad,
162 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
163 def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
164 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
165 def SIbuffer_load_format_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_TFE", SDTBufferLoad,
166 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
167 def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
169 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
171 def SDTBufferStore : SDTypeProfile<0, 8,
173 SDTCisVT<1, v4i32>, // rsrc
174 SDTCisVT<2, i32>, // vindex(VGPR)
175 SDTCisVT<3, i32>, // voffset(VGPR)
176 SDTCisVT<4, i32>, // soffset(SGPR)
177 SDTCisVT<5, i32>, // offset(imm)
178 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
179 SDTCisVT<7, i1>]>; // idxen(imm)
181 def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
182 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
183 def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE",
185 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
186 def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT",
188 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
189 def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT",
191 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
192 def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
194 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
196 multiclass SDBufferAtomic<string opcode> {
197 def "" : SDNode <opcode,
199 [SDTCisVT<2, v4i32>, // rsrc
200 SDTCisVT<3, i32>, // vindex(VGPR)
201 SDTCisVT<4, i32>, // voffset(VGPR)
202 SDTCisVT<5, i32>, // soffset(SGPR)
203 SDTCisVT<6, i32>, // offset(imm)
204 SDTCisVT<7, i32>, // cachepolicy(imm)
205 SDTCisVT<8, i1>]>, // idxen(imm)
206 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
208 def "_noret" : PatFrag<
209 (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset,
210 node:$offset, node:$cachepolicy, node:$idxen),
211 (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex,
212 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
218 defm SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
219 defm SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
220 defm SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
221 defm SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">;
222 defm SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">;
223 defm SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">;
224 defm SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
225 defm SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
226 defm SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
227 defm SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
228 defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
229 defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
230 defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">;
231 defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
232 defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">;
233 defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">;
234 defm SIbuffer_atomic_cond_sub_u32 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_COND_SUB_U32">;
236 def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
238 [SDTCisVT<3, v4i32>, // rsrc
239 SDTCisVT<4, i32>, // vindex(VGPR)
240 SDTCisVT<5, i32>, // voffset(VGPR)
241 SDTCisVT<6, i32>, // soffset(SGPR)
242 SDTCisVT<7, i32>, // offset(imm)
243 SDTCisVT<8, i32>, // cachepolicy(imm)
244 SDTCisVT<9, i1>]>, // idxen(imm)
245 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
248 def SIbuffer_atomic_cmpswap_noret : PatFrag<
249 (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset,
250 node:$soffset, node:$offset, node:$cachepolicy, node:$idxen),
251 (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex,
252 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
257 class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
259 [SDTCisPtrTy<0>, // vaddr
260 SDTCisVT<1, ty>]>, // vdata
261 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
264 def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
265 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
268 def SIlds : SDNode<"AMDGPUISD::LDS",
269 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
272 def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO",
274 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
277 def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8",
279 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
282 def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8",
284 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
287 def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI",
289 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
292 def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8",
294 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
297 def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
299 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
302 def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
303 SDTypeProfile<0 ,1, [SDTCisInt<0>]>,
304 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
307 def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD",
311 def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD",
315 //===----------------------------------------------------------------------===//
317 //===----------------------------------------------------------------------===//
319 class isIntType<ValueType SrcVT> {
320 bit ret = !and(SrcVT.isInteger, !ne(SrcVT.Value, i1.Value));
323 //===----------------------------------------------------------------------===//
324 // SDNodes PatFrags for loads/stores with a glue input.
325 // This is for SDNodes and PatFrag for local loads and stores to
326 // enable s_mov_b32 m0, -1 to be glued to the memory instructions.
328 // These mirror the regular load/store PatFrags and rely on special
329 // processing during Select() to add the glued copy.
331 //===----------------------------------------------------------------------===//
333 def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad,
334 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
337 def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
338 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
341 def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
346 def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
348 let IsNonExtLoad = 1;
351 def atomic_load_8_glue : PatFrag<(ops node:$ptr),
352 (AMDGPUatomic_ld_glue node:$ptr)> {
357 def atomic_load_16_glue : PatFrag<(ops node:$ptr),
358 (AMDGPUatomic_ld_glue node:$ptr)> {
363 def atomic_load_32_glue : PatFrag<(ops node:$ptr),
364 (AMDGPUatomic_ld_glue node:$ptr)> {
369 def atomic_load_64_glue : PatFrag<(ops node:$ptr),
370 (AMDGPUatomic_ld_glue node:$ptr)> {
375 def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
377 let IsAnyExtLoad = 1;
380 def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
382 let IsSignExtLoad = 1;
385 def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
387 let IsZeroExtLoad = 1;
390 def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
395 def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
400 def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
405 def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
410 def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
415 def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
421 let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
422 def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
423 let IsNonExtLoad = 1;
426 def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
427 def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>;
428 def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>;
430 def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>;
431 def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>;
432 def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>;
433 } // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces
435 def load_align8_local_m0 : PatFrag<(ops node:$ptr),
436 (load_local_m0 node:$ptr)> {
438 int MinAlignment = 8;
441 def load_align16_local_m0 : PatFrag<(ops node:$ptr),
442 (load_local_m0 node:$ptr)> {
444 int MinAlignment = 16;
447 let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
448 def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr),
449 (atomic_load_8_glue node:$ptr)>;
450 def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr),
451 (atomic_load_16_glue node:$ptr)>;
452 def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
453 (atomic_load_32_glue node:$ptr)>;
454 def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
455 (atomic_load_64_glue node:$ptr)>;
456 } // End let AddressSpaces = LoadAddress_local.AddrSpaces
459 def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore,
460 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
463 def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore,
464 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
467 def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr),
468 (AMDGPUst_glue node:$val, node:$ptr)> {
473 def store_glue : PatFrag<(ops node:$val, node:$ptr),
474 (unindexedstore_glue node:$val, node:$ptr)> {
476 let IsTruncStore = 0;
479 def truncstore_glue : PatFrag<(ops node:$val, node:$ptr),
480 (unindexedstore_glue node:$val, node:$ptr)> {
482 let IsTruncStore = 1;
485 def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr),
486 (truncstore_glue node:$val, node:$ptr)> {
489 let IsTruncStore = 1;
492 def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr),
493 (truncstore_glue node:$val, node:$ptr)> {
496 let IsTruncStore = 1;
499 let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
500 def store_local_m0 : PatFrag<(ops node:$val, node:$ptr),
501 (store_glue node:$val, node:$ptr)>;
502 def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
503 (truncstorei8_glue node:$val, node:$ptr)>;
504 def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
505 (truncstorei16_glue node:$val, node:$ptr)>;
508 def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr),
509 (store_local_m0 node:$value, node:$ptr)>,
514 def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr),
515 (store_local_m0 node:$value, node:$ptr)>,
520 let PredicateCode = [{return cast<MemSDNode>(N)->getAlign() < 4;}],
521 GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}],
522 AddressSpaces = [ AddrSpaces.Local ] in {
523 def load_align_less_than_4_local : PatFrag<(ops node:$ptr),
524 (load_local node:$ptr)> {
526 let IsNonExtLoad = 1;
529 def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr),
530 (load_local_m0 node:$ptr)> {
532 let IsNonExtLoad = 1;
535 def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr),
536 (store_local node:$value, node:$ptr)> {
538 let IsTruncStore = 0;
541 def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr),
542 (store_local_m0 node:$value, node:$ptr)> {
544 let IsTruncStore = 0;
548 def atomic_store_8_glue : PatFrag <
549 (ops node:$ptr, node:$value),
550 (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
555 def atomic_store_16_glue : PatFrag <
556 (ops node:$ptr, node:$value),
557 (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
562 def atomic_store_32_glue : PatFrag <
563 (ops node:$ptr, node:$value),
564 (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
569 def atomic_store_64_glue : PatFrag <
570 (ops node:$ptr, node:$value),
571 (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
576 let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
577 def atomic_store_8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
578 (atomic_store_8_glue node:$val, node:$ptr)>;
579 def atomic_store_16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
580 (atomic_store_16_glue node:$val, node:$ptr)>;
581 def atomic_store_32_local_m0 : PatFrag<(ops node:$val, node:$ptr),
582 (atomic_store_32_glue node:$val, node:$ptr)>;
583 def atomic_store_64_local_m0 : PatFrag<(ops node:$val, node:$ptr),
584 (atomic_store_64_glue node:$val, node:$ptr)>;
585 } // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces
588 //===----------------------------------------------------------------------===//
589 // SDNodes PatFrags for a16 loads and stores with 3 components.
590 // v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory
592 //===----------------------------------------------------------------------===//
594 class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag <
595 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
596 node:$auxiliary, node:$idxen),
597 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
598 node:$auxiliary, node:$idxen)> {
603 class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag <
604 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
605 node:$auxiliary, node:$idxen),
606 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
607 node:$auxiliary, node:$idxen)> {
612 class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag <
613 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
614 node:$format, node:$auxiliary, node:$idxen),
615 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
616 node:$format, node:$auxiliary, node:$idxen)> {
621 class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag <
622 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
623 node:$format, node:$auxiliary, node:$idxen),
624 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
625 node:$format, node:$auxiliary, node:$idxen)> {
630 //===----------------------------------------------------------------------===//
631 // SDNodes PatFrags for d16 loads
632 //===----------------------------------------------------------------------===//
634 class LoadD16Frag <SDPatternOperator op> : PatFrag<
635 (ops node:$ptr, node:$tied_in),
636 (op node:$ptr, node:$tied_in)> {
640 foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
641 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
643 def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>;
645 def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> {
649 def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> {
653 def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>;
655 def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> {
659 def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> {
663 } // End let AddressSpaces = ...
664 } // End foreach AddrSpace
666 def lshr_rev : PatFrag <
667 (ops node:$src1, node:$src0),
671 def ashr_rev : PatFrag <
672 (ops node:$src1, node:$src0),
676 def lshl_rev : PatFrag <
677 (ops node:$src1, node:$src0),
681 def add_ctpop : PatFrag <
682 (ops node:$src0, node:$src1),
683 (add (ctpop $src0), $src1)
687 (ops node:$src0, node:$src1),
688 (not (xor $src0, $src1))
692 def shl#I#_add : PatFrag <
693 (ops node:$src0, node:$src1),
694 (add (shl_oneuse $src0, (i32 I)), $src1)> {
695 // FIXME: Poor substitute for disabling pattern in SelectionDAG
696 let PredicateCode = [{return false;}];
697 let GISelPredicateCode = [{return true;}];
701 multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
702 SDTypeProfile tc = SDTAtomic2,
706 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc,
707 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
710 let AddressSpaces = StoreAddress_local.AddrSpaces in {
713 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
714 defm _local_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
716 defm _local_m0 : binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>;
717 defm _local_m0 : noret_binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>;
721 let AddressSpaces = StoreAddress_region.AddrSpaces in {
723 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
724 defm _region_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
726 defm _region_m0 : binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>;
727 defm _region_m0 : noret_binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>;
732 defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
733 defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
734 defm atomic_load_uinc_wrap : SIAtomicM0Glue2 <"LOAD_UINC_WRAP">;
735 defm atomic_load_udec_wrap : SIAtomicM0Glue2 <"LOAD_UDEC_WRAP">;
736 defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
737 defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
738 defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
739 defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
740 defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
741 defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
742 defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
743 defm atomic_swap : SIAtomicM0Glue2 <"SWAP">;
744 defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>;
745 defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 0, SDTAtomic2_f32, 0>;
746 defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 0, SDTAtomic2_f32, 0>;
748 def as_i1timm : SDNodeXForm<timm, [{
749 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
752 def as_i8imm : SDNodeXForm<imm, [{
753 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
756 def as_i8timm : SDNodeXForm<timm, [{
757 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
760 def as_i16imm : SDNodeXForm<imm, [{
761 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
764 def as_i16timm : SDNodeXForm<timm, [{
765 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
768 def as_i32imm: SDNodeXForm<imm, [{
769 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
772 def as_i32timm: SDNodeXForm<timm, [{
773 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
776 def as_i64imm: SDNodeXForm<imm, [{
777 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
780 def cond_as_i32imm: SDNodeXForm<cond, [{
781 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32);
784 // Copied from the AArch64 backend:
785 def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
786 return CurDAG->getTargetConstant(
787 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
790 def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{
791 auto FI = cast<FrameIndexSDNode>(N);
792 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32);
795 // Copied from the AArch64 backend:
796 def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
797 return CurDAG->getTargetConstant(
798 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
801 class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
802 uint64_t Imm = N->getZExtValue();
803 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
804 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
807 def SIMM16bit : TImmLeaf <i32,
808 [{return isInt<16>(Imm) || isUInt<16>(Imm);}],
812 def i64imm_32bit : ImmLeaf<i64, [{
813 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
816 def InlineImm64 : IntImmLeaf<i64, [{
817 return isInlineImmediate(Imm);
820 def InlineImmFP32 : FPImmLeaf<f32, [{
821 return isInlineImmediate(Imm);
824 def InlineImmFP64 : FPImmLeaf<f64, [{
825 return isInlineImmediate(Imm);
829 class VGPRImm <dag frag> : PatLeaf<frag, [{
833 def NegateImm : SDNodeXForm<imm, [{
834 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
837 // TODO: When FP inline imm values work?
838 def NegSubInlineConst32 : ImmLeaf<i32, [{
839 return Imm < -16 && Imm >= -64;
842 def NegSubInlineIntConst16 : ImmLeaf<i16, [{
843 return Imm < -16 && Imm >= -64;
846 def ShiftAmt32Imm : ImmLeaf <i32, [{
850 def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{
851 return fp16SrcZerosHighBits(N->getOpcode());
854 def is_canonicalized : PatLeaf<(fAny srcvalue:$src), [{
855 const SITargetLowering &Lowering =
856 *static_cast<const SITargetLowering *>(getTargetLowering());
857 return Lowering.isCanonicalized(*CurDAG, SDValue(N, 0));
859 let GISelPredicateCode = [{
860 const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
861 MF.getSubtarget().getTargetLowering());
862 const MachineOperand &Dst = MI.getOperand(0);
864 return TLI->isCanonicalized(Dst.getReg(), MF);
868 //===----------------------------------------------------------------------===//
869 // MUBUF/SMEM Patterns
870 //===----------------------------------------------------------------------===//
872 def extract_cpol : SDNodeXForm<timm, [{
873 return CurDAG->getTargetConstant(
874 N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
876 : AMDGPU::CPol::ALL_pregfx12),
880 def extract_swz : SDNodeXForm<timm, [{
882 N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
884 : AMDGPU::CPol::SWZ_pregfx12);
885 return CurDAG->getTargetConstant(Swizzle, SDLoc(N), MVT::i8);
888 def extract_cpol_set_glc : SDNodeXForm<timm, [{
889 const uint32_t cpol = N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
891 : AMDGPU::CPol::ALL_pregfx12);
892 return CurDAG->getTargetConstant(cpol | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8);
895 //===----------------------------------------------------------------------===//
897 //===----------------------------------------------------------------------===//
899 def SOPPBrTarget : CustomOperand<OtherVT> {
900 let PrintMethod = "printOperand";
901 let EncoderMethod = "getSOPPBrEncoding";
902 let DecoderMethod = "decodeSOPPBrTarget";
903 let OperandType = "OPERAND_PCREL";
906 def si_ga : Operand<iPTR>;
908 def InterpSlot : CustomOperand<i32>;
910 // It appears to be necessary to create a separate operand for this to
911 // be able to parse attr<num> with no space.
912 def InterpAttr : CustomOperand<i32>;
914 def InterpAttrChan : ImmOperand<i32>;
916 def SplitBarrier : ImmOperand<i32> {
917 let OperandNamespace = "AMDGPU";
918 let OperandType = "OPERAND_INLINE_SPLIT_BARRIER_INT32";
919 let DecoderMethod = "decodeSplitBarrier";
920 let PrintMethod = "printOperand";
923 def VReg32OrOffClass : AsmOperandClass {
924 let Name = "VReg32OrOff";
925 let ParserMethod = "parseVReg32OrOff";
928 def SendMsg : CustomOperand<i32>;
930 def Swizzle : CustomOperand<i16, 1>;
932 def Endpgm : CustomOperand<i16, 1>;
934 def SWaitCnt : CustomOperand<i32>;
936 def DepCtr : CustomOperand<i32>;
938 def SDelayALU : CustomOperand<i32>;
940 include "SIInstrFormats.td"
941 include "VIInstrFormats.td"
943 def BoolReg : AsmOperandClass {
944 let Name = "BoolReg";
945 let ParserMethod = "parseBoolReg";
946 let RenderMethod = "addRegOperands";
949 class BoolRC : RegisterOperand<SReg_1> {
950 let ParserMatchClass = BoolReg;
951 let DecoderMethod = "decodeBoolReg";
954 def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
955 let ParserMatchClass = BoolReg;
956 let DecoderMethod = "decodeBoolReg";
959 def VOPDstS64orS32 : BoolRC {
960 let PrintMethod = "printVOPDst";
963 // SCSrc_i1 is the operand for pseudo instructions only.
964 // Boolean immediates shall not be exposed to codegen instructions.
965 def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
966 let OperandNamespace = "AMDGPU";
967 let OperandType = "OPERAND_REG_IMM_INT32";
968 let ParserMatchClass = BoolReg;
969 let DecoderMethod = "decodeBoolReg";
972 // ===----------------------------------------------------------------------===//
973 // ExpSrc* Special cases for exp src operands which are printed as
974 // "off" depending on en operand.
975 // ===----------------------------------------------------------------------===//
977 def ExpSrc0 : RegisterOperand<VGPR_32> {
978 let PrintMethod = "printExpSrc0";
979 let ParserMatchClass = VReg32OrOffClass;
982 def ExpSrc1 : RegisterOperand<VGPR_32> {
983 let PrintMethod = "printExpSrc1";
984 let ParserMatchClass = VReg32OrOffClass;
987 def ExpSrc2 : RegisterOperand<VGPR_32> {
988 let PrintMethod = "printExpSrc2";
989 let ParserMatchClass = VReg32OrOffClass;
992 def ExpSrc3 : RegisterOperand<VGPR_32> {
993 let PrintMethod = "printExpSrc3";
994 let ParserMatchClass = VReg32OrOffClass;
997 class SDWASrc<ValueType vt> : RegisterOperand<VS_32> {
998 let OperandNamespace = "AMDGPU";
999 string Type = !if(vt.isFP, "FP", "INT");
1000 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size;
1001 let DecoderMethod = "decodeSDWASrc"#vt.Size;
1002 let EncoderMethod = "getSDWASrcEncoding";
1005 def SDWASrc_i32 : SDWASrc<i32>;
1006 def SDWASrc_i16 : SDWASrc<i16>;
1007 def SDWASrc_f32 : SDWASrc<f32>;
1008 def SDWASrc_f16 : SDWASrc<f16>;
1010 def SDWAVopcDst : BoolRC {
1011 let OperandNamespace = "AMDGPU";
1012 let OperandType = "OPERAND_SDWA_VOPC_DST";
1013 let EncoderMethod = "getSDWAVopcDstEncoding";
1014 let DecoderMethod = "decodeSDWAVopcDst";
1015 let PrintMethod = "printVOPDst";
1018 class NamedIntOperand<ValueType Type, string Prefix, bit Optional = 1,
1020 : CustomOperand<Type, Optional, name> {
1021 let PredicateMethod =
1022 "getPredicate([](const AMDGPUOperand &Op) -> bool { "#
1023 "return Op.isImmTy(AMDGPUOperand::"#ImmTy#"); })";
1024 string Validator = "[](int64_t V) { return true; }";
1025 string ConvertMethod = "[](int64_t &V) { return "#Validator#"(V); }";
1027 "[this](OperandVector &Operands) -> ParseStatus { "#
1028 "return parseIntWithPrefix(\""#Prefix#"\", Operands, "#
1029 "AMDGPUOperand::"#ImmTy#", "#ConvertMethod#"); }";
1032 class NamedBitOperand<string Id, string Name = NAME>
1033 : CustomOperand<i1, 1, Name> {
1034 let PredicateMethod = "isImmTy<AMDGPUOperand::"#ImmTy#">";
1036 "[this](OperandVector &Operands) -> ParseStatus { "#
1037 "return parseNamedBit(\""#Id#"\", Operands, AMDGPUOperand::"#ImmTy#"); }";
1038 let PrintMethod = "[this](const MCInst *MI, unsigned OpNo, "#
1039 "const MCSubtargetInfo &STI, raw_ostream &O) { "#
1040 "printNamedBit(MI, OpNo, O, \""#Id#"\"); }";
1043 class DefaultOperand<CustomOperand Op, int Value>
1044 : OperandWithDefaultOps<Op.Type, (ops (Op.Type Value))>,
1045 CustomOperandProps<1> {
1046 let ParserMatchClass = Op.ParserMatchClass;
1047 let PrintMethod = Op.PrintMethod;
1050 class SDWAOperand<string Id, string Name = NAME>
1051 : CustomOperand<i32, 1, Name> {
1053 "[this](OperandVector &Operands) -> ParseStatus { "#
1054 "return parseSDWASel(Operands, \""#Id#"\", AMDGPUOperand::"#ImmTy#"); }";
1057 class ArrayOperand0<string Id, string Name = NAME>
1058 : OperandWithDefaultOps<i32, (ops (i32 0))>,
1059 CustomOperandProps<1, Name> {
1061 "[this](OperandVector &Operands) -> ParseStatus { "#
1062 "return parseOperandArrayWithPrefix(\""#Id#"\", Operands, "#
1063 "AMDGPUOperand::"#ImmTy#"); }";
1066 let ImmTy = "ImmTyOffset" in
1067 def flat_offset : CustomOperand<i32, 1, "FlatOffset">;
1068 def Offset : NamedIntOperand<i32, "offset">;
1069 let Validator = "isUInt<8>" in {
1070 def Offset0 : NamedIntOperand<i8, "offset0">;
1071 def Offset1 : NamedIntOperand<i8, "offset1">;
1074 def gds : NamedBitOperand<"gds", "GDS">;
1076 def omod : CustomOperand<i32, 1, "OModSI">;
1077 def omod0 : DefaultOperand<omod, 0>;
1079 // We need to make the cases with a default of 0 distinct from no
1080 // default to help deal with some cases where the operand appears
1081 // before a mandatory operand.
1082 def Clamp : NamedBitOperand<"clamp">;
1083 def Clamp0 : DefaultOperand<Clamp, 0>;
1084 def highmod : NamedBitOperand<"high", "High">;
1086 def CPol : CustomOperand<i32, 1>;
1087 def CPol_0 : DefaultOperand<CPol, 0>;
1088 def CPol_GLC1 : DefaultOperand<CPol, 1>;
1089 def CPol_GLC : ValuePredicatedOperand<CPol, "Op.getImm() & CPol::GLC">;
1090 def CPol_NonGLC : ValuePredicatedOperand<CPol, "!(Op.getImm() & CPol::GLC)", 1>;
1091 def CPol_GLC_WithDefault : DefaultOperand<CPol_GLC, !shl(1, CPolBit.GLC)>;
1092 def CPol_NonGLC_WithDefault : DefaultOperand<CPol_NonGLC, 0>;
1094 def TFE : NamedBitOperand<"tfe">;
1095 def UNorm : NamedBitOperand<"unorm">;
1096 def DA : NamedBitOperand<"da">;
1097 def R128A16 : CustomOperand<i1, 1>;
1098 def A16 : NamedBitOperand<"a16">;
1099 def D16 : NamedBitOperand<"d16">;
1100 def LWE : NamedBitOperand<"lwe">;
1101 def exp_compr : NamedBitOperand<"compr", "ExpCompr">;
1102 def exp_vm : NamedBitOperand<"vm", "ExpVM">;
1104 def FORMAT : CustomOperand<i8>;
1106 def DMask : NamedIntOperand<i16, "dmask">;
1107 def Dim : CustomOperand<i8>;
1109 def dst_sel : SDWAOperand<"dst_sel", "SDWADstSel">;
1110 def src0_sel : SDWAOperand<"src0_sel", "SDWASrc0Sel">;
1111 def src1_sel : SDWAOperand<"src1_sel", "SDWASrc1Sel">;
1112 def dst_unused : CustomOperand<i32, 1, "SDWADstUnused">;
1114 def op_sel0 : ArrayOperand0<"op_sel", "OpSel">;
1115 def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">;
1116 def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">;
1117 def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">;
1119 def IndexKey16bit : CustomOperand<i32, 1>;
1120 def IndexKey8bit : CustomOperand<i32, 1>;
1122 def dpp8 : CustomOperand<i32, 0, "DPP8">;
1123 def dpp_ctrl : CustomOperand<i32, 0, "DPPCtrl">;
1125 let DefaultValue = "0xf" in {
1126 def DppRowMask : NamedIntOperand<i32, "row_mask">;
1127 def DppBankMask : NamedIntOperand<i32, "bank_mask">;
1129 def DppBoundCtrl : NamedIntOperand<i1, "bound_ctrl"> {
1130 let ConvertMethod = "[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }";
1133 let DecoderMethod = "decodeDpp8FI" in
1134 def Dpp8FI : NamedIntOperand<i32, "fi", 1, "DppFI">;
1135 def Dpp16FI : NamedIntOperand<i32, "fi", 1, "DppFI">;
1137 def blgp : CustomOperand<i32, 1, "BLGP">;
1138 def CBSZ : NamedIntOperand<i32, "cbsz"> {
1139 let Validator = "isUInt<3>";
1141 def ABID : NamedIntOperand<i32, "abid"> {
1142 let Validator = "isUInt<4>";
1144 def hwreg : CustomOperand<i32, 0, "Hwreg">;
1146 def exp_tgt : CustomOperand<i32, 0, "ExpTgt">;
1148 def WaitVDST : NamedIntOperand<i8, "wait_vdst"> {
1149 let Validator = "isUInt<4>";
1151 def WaitEXP : NamedIntOperand<i8, "wait_exp"> {
1152 let Validator = "isUInt<3>";
1154 def WaitVAVDst : NamedIntOperand<i8, "wait_va_vdst"> {
1155 let Validator = "isUInt<4>";
1157 def WaitVMVSrc : NamedIntOperand<i8, "wait_vm_vsrc"> {
1158 let Validator = "isUInt<1>";
1161 def ByteSel : NamedIntOperand<i8, "byte_sel"> {
1162 let Validator = "isUInt<2>";
1165 class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
1166 let OperandNamespace = "AMDGPU";
1167 let OperandType = "OPERAND_KIMM"#vt.Size;
1168 let PrintMethod = "printU"#vt.Size#"ImmOperand";
1169 let DecoderMethod = "decodeOperand_KImmFP";
1172 // 32-bit VALU immediate operand that uses the constant bus.
1173 def KImmFP32 : KImmFPOperand<i32>;
1175 // 32-bit VALU immediate operand with a 16-bit value that uses the
1177 def KImmFP16 : KImmFPOperand<i16>;
1179 class FPInputModsMatchClass <int opSize> : AsmOperandClass {
1180 let Name = "RegOrImmWithFP"#opSize#"InputMods";
1181 let ParserMethod = "parseRegOrImmWithFPInputMods";
1182 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
1185 class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> {
1186 let Name = "RegOrInlineImmWithFP"#opSize#"InputMods";
1187 let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods";
1190 def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
1191 def FPT16InputModsMatchClass : FPInputModsMatchClass<16> {
1192 let Name = "RegOrImmWithFPT16InputMods";
1193 let PredicateMethod = "isRegOrImmWithFPT16InputMods";
1195 def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
1196 def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
1198 class FP16VCSrcInputModsMatchClass<bit IsFake16>
1199 : FPVCSrcInputModsMatchClass<16> {
1200 let Name = !if(IsFake16, "RegOrInlineImmWithFPFake16InputMods",
1201 "RegOrInlineImmWithFPT16InputMods");
1202 let PredicateMethod = "isRegOrInlineImmWithFP16InputMods<" #
1203 !if(IsFake16, "true", "false") # ">";
1205 def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>;
1207 class InputMods <AsmOperandClass matchClass> : Operand <i32> {
1208 let OperandNamespace = "AMDGPU";
1209 let OperandType = "OPERAND_INPUT_MODS";
1210 let ParserMatchClass = matchClass;
1213 class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
1214 let PrintMethod = "printOperandAndFPInputMods";
1217 def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
1218 def FPT16InputMods : FPInputMods<FPT16InputModsMatchClass>;
1219 def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
1220 def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
1222 class FP16VCSrcInputMods<bit IsFake16>
1223 : FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>>;
1224 def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;
1226 class IntInputModsMatchClass <int opSize> : AsmOperandClass {
1227 let Name = "RegOrImmWithInt"#opSize#"InputMods";
1228 let ParserMethod = "parseRegOrImmWithIntInputMods";
1229 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods";
1231 class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> {
1232 let Name = "RegOrInlineImmWithInt"#opSize#"InputMods";
1233 let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods";
1235 def IntT16InputModsMatchClass : IntInputModsMatchClass<16> {
1236 let Name = "RegOrImmWithIntT16InputMods";
1237 let PredicateMethod = "isRegOrImmWithIntT16InputMods";
1239 def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
1240 def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
1241 def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>;
1243 class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
1244 let PrintMethod = "printOperandAndIntInputMods";
1246 def IntT16InputMods : IntInputMods<IntT16InputModsMatchClass>;
1247 def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
1248 def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
1249 def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>;
1251 class OpSelModsMatchClass : AsmOperandClass {
1252 let Name = "OpSelMods";
1253 let ParserMethod = "parseRegOrImm";
1254 let PredicateMethod = "isRegOrImm";
1257 def IntOpSelModsMatchClass : OpSelModsMatchClass;
1258 def IntOpSelMods : InputMods<IntOpSelModsMatchClass>;
1260 class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1261 let Name = "SDWAWithFP"#opSize#"InputMods";
1262 let ParserMethod = "parseRegOrImmWithFPInputMods";
1263 let PredicateMethod = "isSDWAFP"#opSize#"Operand";
1266 def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>;
1267 def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>;
1269 class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> :
1270 InputMods <matchClass> {
1271 let PrintMethod = "printOperandAndFPInputMods";
1274 def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>;
1275 def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>;
1277 def FPVRegInputModsMatchClass : AsmOperandClass {
1278 let Name = "VRegWithFPInputMods";
1279 let ParserMethod = "parseRegWithFPInputMods";
1280 let PredicateMethod = "isVRegWithInputMods";
1283 class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
1284 let Name = !if(IsFake16, "Fake16VRegWithFPInputMods",
1285 "T16VRegWithFPInputMods");
1286 let ParserMethod = "parseRegWithFPInputMods";
1287 let PredicateMethod = "isT16VRegWithInputMods<" #
1288 !if(IsFake16, "true", "false") # ">";
1291 def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
1292 let PrintMethod = "printOperandAndFPInputMods";
1295 class FPT16VRegInputMods<bit IsFake16>
1296 : InputMods <FPT16VRegInputModsMatchClass<IsFake16>> {
1297 let PrintMethod = "printOperandAndFPInputMods";
1300 class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1301 let Name = "SDWAWithInt"#opSize#"InputMods";
1302 let ParserMethod = "parseRegOrImmWithIntInputMods";
1303 let PredicateMethod = "isSDWAInt"#opSize#"Operand";
1306 def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>;
1307 def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>;
1308 def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> {
1309 let Name = "SDWAWithBin32InputMods";
1310 let ParserMethod = "parseRegOrImm";
1313 class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
1314 InputMods <matchClass> {
1315 let PrintMethod = "printOperandAndIntInputMods";
1318 def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>;
1319 def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
1320 def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>;
1322 def IntVRegInputModsMatchClass : AsmOperandClass {
1323 let Name = "VRegWithIntInputMods";
1324 let ParserMethod = "parseRegWithIntInputMods";
1325 let PredicateMethod = "isVRegWithInputMods";
1328 class IntT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
1329 let Name = !if(IsFake16, "Fake16VRegWithIntInputMods",
1330 "T16VRegWithIntInputMods");
1331 let ParserMethod = "parseRegWithIntInputMods";
1332 let PredicateMethod = "isT16VRegWithInputMods<" #
1333 !if(IsFake16, "true", "false") # ">";
1336 class IntT16VRegInputMods<bit IsFake16>
1337 : InputMods <IntT16VRegInputModsMatchClass<IsFake16>> {
1338 let PrintMethod = "printOperandAndIntInputMods";
1341 def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
1342 let PrintMethod = "printOperandAndIntInputMods";
1345 class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
1346 let Name = "PackedFP"#opSize#"InputMods";
1347 let ParserMethod = "parseRegOrImmWithFPInputMods";
1348 let PredicateMethod = "isPackedFP"#opSize#"InputMods";
1351 class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
1352 let Name = "PackedInt"#opSize#"InputMods";
1353 let ParserMethod = "parseRegOrImm";
1354 let PredicateMethod = "isRegOrImm";
1355 // let PredicateMethod = "isPackedInt"#opSize#"InputMods";
1358 def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
1359 def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
1361 class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
1362 let PrintMethod = "printOperandAndFPInputMods";
1365 class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
1366 //let PrintMethod = "printPackedIntInputMods";
1369 def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
1370 def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
1372 //===----------------------------------------------------------------------===//
1374 //===----------------------------------------------------------------------===//
1376 def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">;
1377 def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">;
1378 def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">;
1380 def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">;
1382 def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
1384 // Modifiers for floating point instructions.
1385 def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
1387 // VOP3 modifiers used for instructions that do not read canonicalized
1388 // floating point values (i.e. integer operations with FP source
1390 def VOP3ModsNonCanonicalizing : ComplexPattern<untyped, 2,
1391 "SelectVOP3ModsNonCanonicalizing">;
1393 def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
1395 def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
1397 def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
1399 def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
1400 def VOP3PModsNeg : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">;
1401 def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">;
1403 def WMMAModsF32NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">;
1404 def WMMAModsF16Neg : ComplexPattern<untyped, 2, "SelectWMMAModsF16Neg">;
1405 def WMMAModsF16NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF16NegAbs">;
1406 def WMMAVISrc : ComplexPattern<untyped, 1, "SelectWMMAVISrc">;
1407 def SWMMACIndex8 : ComplexPattern<untyped, 2, "SelectSWMMACIndex8">;
1408 def SWMMACIndex16 : ComplexPattern<untyped, 2, "SelectSWMMACIndex16">;
1410 def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
1412 def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
1414 def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">;
1415 def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
1417 def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">;
1418 def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">;
1420 //===----------------------------------------------------------------------===//
1421 // SI assembler operands
1422 //===----------------------------------------------------------------------===//
1427 int FLAT_SCR = 0x68;
1430 // This should be kept in sync with SISrcMods enum
1465 int FLAT_SCR_LO = 20;
1466 int FLAT_SCR_HI = 21;
1467 int XNACK_MASK = 22;
1468 int POPS_PACKER = 25;
1469 int SHADER_CYCLES = 29;
1472 class getHwRegImm<int Reg, int Offset = 0, int Size = 32> {
1473 int ret = !and(!or(Reg,
1475 !shl(!add(Size, -1), 11)), 65535);
1478 //===----------------------------------------------------------------------===//
1480 // SI Instruction multiclass helpers.
1482 // Instructions with _32 take 32-bit operands.
1483 // Instructions with _64 take 64-bit operands.
1485 // VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
1486 // encoding is the standard encoding, but instruction that make use of
1487 // any of the instruction modifiers must use the 64-bit encoding.
1489 // Instructions with _e32 use the 32-bit encoding.
1490 // Instructions with _e64 use the 64-bit encoding.
1492 //===----------------------------------------------------------------------===//
1494 class SIMCInstr <string pseudo, int subtarget> {
1495 string PseudoInstr = pseudo;
1496 int Subtarget = subtarget;
1499 //===----------------------------------------------------------------------===//
1500 // Vector ALU classes
1501 //===----------------------------------------------------------------------===//
1503 class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
1505 !if (!eq(Src0.Value, untyped.Value), 0,
1506 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1
1507 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2
1511 // Returns the register class to use for the destination of VOP[123C]
1512 // instructions for the given VT.
1513 class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> {
1514 defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16,
1515 VOPDstOperand_t16Lo128),
1516 VOPDstOperand<VGPR_32>);
1517 RegisterOperand ret = !cond(!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
1518 !eq(VT.Size, 128) : VOPDstOperand<VReg_128>,
1519 !eq(VT.Size, 64) : VOPDstOperand<VReg_64>,
1520 !eq(VT.Size, 32) : VOPDstOperand<VGPR_32>,
1521 !eq(VT.Size, 16) : op16,
1522 1 : VOPDstS64orS32); // else VT == i1
1525 class getVALUDstForVT_fake16<ValueType VT> {
1526 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
1527 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
1528 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
1529 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32_Lo128>,
1530 VOPDstS64orS32)))); // else VT == i1
1533 // Returns the register class to use for the destination of VOP[12C]
1534 // instructions with SDWA extension
1535 class getSDWADstForVT<ValueType VT> {
1536 RegisterOperand ret = !if(!eq(VT.Size, 1),
1537 SDWAVopcDst, // VOPC
1538 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst
1541 // Returns the register class to use for source 0 of VOP[12C]
1542 // instructions for the given VT.
1543 class getVOPSrc0ForVT<ValueType VT, bit IsTrue16, bit IsFake16 = 1> {
1544 RegisterOperand ret =
1545 !cond(!eq(VT, i64) : VSrc_b64,
1546 !eq(VT, f64) : VSrc_f64,
1547 !eq(VT, i32) : VSrc_b32,
1548 !eq(VT, f32) : VSrc_f32,
1549 !eq(VT, i16) : !if(IsTrue16,
1550 !if(IsFake16, VSrcFake16_b16_Lo128, VSrcT_b16_Lo128),
1552 !eq(VT, f16) : !if(IsTrue16,
1553 !if(IsFake16, VSrcFake16_f16_Lo128, VSrcT_f16_Lo128),
1555 !eq(VT, bf16) : !if(IsTrue16,
1556 !if(IsFake16, VSrcFake16_bf16_Lo128, VSrcT_bf16_Lo128),
1558 !eq(VT, v2i16) : VSrc_v2b16,
1559 !eq(VT, v2f16) : VSrc_v2f16,
1560 !eq(VT, v2bf16) : VSrc_v2bf16,
1561 !eq(VT, v4f16) : AVSrc_64,
1562 !eq(VT, v4bf16) : AVSrc_64,
1566 class getSOPSrcForVT<ValueType VT> {
1567 RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32);
1570 // Returns the vreg register class to use for source operand given VT
1571 class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
1572 RegisterOperand ret =
1573 !cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
1574 !eq(VT.Size, 96) : RegisterOperand<VReg_96>,
1575 !eq(VT.Size, 64) : RegisterOperand<VReg_64>,
1576 !eq(VT.Size, 48) : RegisterOperand<VReg_64>,
1577 !eq(VT.Size, 16) : !if(IsTrue16,
1578 !if(IsFake16, VGPRSrc_32_Lo128, VGPRSrc_16_Lo128),
1579 RegisterOperand<VGPR_32>),
1580 1 : RegisterOperand<VGPR_32>);
1583 class getSDWASrcForVT <ValueType VT> {
1584 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
1585 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
1586 RegisterOperand ret = !if(VT.isFP, retFlt, retInt);
1589 // Returns the register class to use for sources of VOP3 instructions for the
1591 class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
1592 RegisterOperand ret =
1593 !cond(!eq(VT, f64) : VSrc_f64,
1594 !eq(VT, f32) : VSrc_f32,
1595 !eq(VT, f16) : !if(IsTrue16, VSrcT_f16, VSrc_f16),
1596 !eq(VT, bf16) : !if(IsTrue16, VSrcT_bf16, VSrc_bf16),
1597 !eq(VT, i16) : !if(IsTrue16, VSrcT_b16, VSrc_b16),
1598 !eq(VT, i1) : SSrc_i1,
1599 !eq(VT, v2f32) : VSrc_v2f32,
1600 !eq(VT, v2i32) : VSrc_v2b32,
1601 !eq(VT, v2f16) : VSrc_v2f16,
1602 !eq(VT, v2bf16) : VSrc_v2bf16,
1603 !eq(VT, v2i16) : VSrc_v2b16,
1604 !eq(VT, v4f16) : AVSrc_64,
1605 !eq(VT, v4bf16) : AVSrc_64,
1606 !eq(VT.Size, 128) : VRegSrc_128,
1607 !eq(VT.Size, 96) : VRegSrc_96,
1608 !eq(VT.Size, 64) : VSrc_b64,
1612 // Src2 of VOP3 DPP instructions cannot be a literal
1613 class getVOP3DPPSrcForVT<ValueType VT> {
1614 RegisterOperand ret =
1615 !cond(!eq(VT, i1) : SSrc_i1,
1616 !eq(VT, i16) : VCSrc_b16,
1617 !eq(VT, f16) : VCSrc_f16,
1618 !eq(VT, bf16) : VCSrc_bf16,
1619 !eq(VT, v2i16) : VCSrc_v2b16,
1620 !eq(VT, v2f16) : VCSrc_v2f16,
1621 !eq(VT, v2bf16) : VCSrc_v2bf16,
1622 !eq(VT, f32) : VCSrc_f32,
1626 // Float or packed int
1627 class isModifierType<ValueType SrcVT> {
1628 bit ret = !or(!eq(SrcVT.Value, f16.Value),
1629 !eq(SrcVT.Value, bf16.Value),
1630 !eq(SrcVT.Value, f32.Value),
1631 !eq(SrcVT.Value, f64.Value),
1632 !eq(SrcVT.Value, v2f16.Value),
1633 !eq(SrcVT.Value, v2i16.Value),
1634 !eq(SrcVT.Value, v2bf16.Value),
1635 !eq(SrcVT.Value, v2f32.Value),
1636 !eq(SrcVT.Value, v2i32.Value),
1637 !eq(SrcVT.Value, v4f16.Value),
1638 !eq(SrcVT.Value, v4i16.Value),
1639 !eq(SrcVT.Value, v4bf16.Value),
1640 !eq(SrcVT.Value, v4f32.Value),
1641 !eq(SrcVT.Value, v4i32.Value),
1642 !eq(SrcVT.Value, v8f16.Value),
1643 !eq(SrcVT.Value, v8i16.Value),
1644 !eq(SrcVT.Value, v8bf16.Value),
1645 !eq(SrcVT.Value, v8f32.Value),
1646 !eq(SrcVT.Value, v8i32.Value),
1647 !eq(SrcVT.Value, v16f16.Value),
1648 !eq(SrcVT.Value, v16i16.Value),
1649 !eq(SrcVT.Value, v16bf16.Value));
1652 // Return type of input modifiers operand for specified input operand
1653 class getSrcMod <ValueType VT, bit IsTrue16 = 0> {
1654 Operand ret = !if(!eq(VT.Size, 64),
1655 !if(VT.isFP, FP64InputMods, Int64InputMods),
1656 !if(!eq(VT.Size, 16),
1657 !if(VT.isFP, !if(IsTrue16, FPT16InputMods, FP16InputMods),
1658 !if(IsTrue16, IntT16InputMods, IntOpSelMods)),
1659 !if(VT.isFP, FP32InputMods, Int32InputMods)));
1662 class getOpSelMod <ValueType VT> {
1663 Operand ret = !cond(!eq(VT, f16) : FP16InputMods,
1664 !eq(VT, bf16) : FP16InputMods,
1665 !eq(VT, v2f16) : PackedF16InputMods,
1666 !eq(VT, v2bf16) : PackedF16InputMods,
1670 // Return type of input modifiers operand specified input operand for DPP
1671 class getSrcModDPP <ValueType VT> {
1672 Operand ret = !if(VT.isFP, FPVRegInputMods, IntVRegInputMods);
1675 class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> {
1678 !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1679 FPT16VRegInputMods<IsFake16>, FPVRegInputMods),
1680 !if (!eq(VT.Value, i16.Value),
1681 IntT16VRegInputMods<IsFake16>, IntVRegInputMods));
1684 // Return type of input modifiers operand for specified input operand for DPP
1685 class getSrcModVOP3DPP <ValueType VT, bit IsFake16 = 1> {
1688 !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1689 FP16VCSrcInputMods<IsFake16>, FP32VCSrcInputMods),
1690 Int32VCSrcInputMods);
1693 // Return type of input modifiers operand specified input operand for SDWA
1694 class getSrcModSDWA <ValueType VT> {
1695 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods,
1696 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods,
1697 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods,
1698 !if(!eq(VT.Value, bf16.Value), FP16SDWAInputMods,
1699 Int32SDWAInputMods))));
1702 // Returns the input arguments for VOP[12C] instructions for the given SrcVT.
1703 class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> {
1704 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1
1705 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
1709 // Returns the input arguments for VOP3 instructions for the given SrcVT.
1710 class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
1711 RegisterOperand Src2RC, int NumSrcArgs,
1712 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
1713 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1716 !if (!eq(NumSrcArgs, 0),
1717 // VOP1 without input operands (V_NOP, V_CLREXCP)
1720 !if (!eq(NumSrcArgs, 1),
1722 // VOP1 with modifiers
1724 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1725 Clamp0:$clamp, omod0:$omod),
1727 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Clamp0:$clamp),
1728 (ins Src0Mod:$src0_modifiers, Src0RC:$src0)))
1730 // VOP1 without modifiers
1732 (ins Src0RC:$src0, Clamp0:$clamp),
1735 !if (!eq(NumSrcArgs, 2),
1737 // VOP 2 with modifiers
1739 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1740 Src1Mod:$src1_modifiers, Src1RC:$src1,
1741 Clamp0:$clamp, omod0:$omod),
1742 !con((ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1743 Src1Mod:$src1_modifiers, Src1RC:$src1),
1744 !if(HasClamp, (ins Clamp0:$clamp), (ins))))
1746 // VOP2 without modifiers
1748 (ins Src0RC:$src0, Src1RC:$src1, Clamp0:$clamp),
1749 (ins Src0RC:$src0, Src1RC:$src1))
1752 /* NumSrcArgs == 3 */,
1755 // VOP3 with modifiers
1757 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1758 Src1Mod:$src1_modifiers, Src1RC:$src1,
1759 Src2Mod:$src2_modifiers, Src2RC:$src2,
1760 Clamp0:$clamp, omod0:$omod),
1762 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1763 Src1Mod:$src1_modifiers, Src1RC:$src1,
1764 Src2Mod:$src2_modifiers, Src2RC:$src2,
1766 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1767 Src1Mod:$src1_modifiers, Src1RC:$src1,
1768 Src2Mod:$src2_modifiers, Src2RC:$src2))),
1769 // VOP3 with modifiers except src2
1771 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1772 Src1Mod:$src1_modifiers, Src1RC:$src1,
1773 Src2RC:$src2, Clamp0:$clamp, omod0:$omod),
1775 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1776 Src1Mod:$src1_modifiers, Src1RC:$src1,
1777 Src2RC:$src2, Clamp0:$clamp),
1778 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1779 Src1Mod:$src1_modifiers, Src1RC:$src1,
1782 // VOP3 without modifiers
1784 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, Clamp0:$clamp),
1785 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2))
1789 class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
1790 RegisterOperand Src2RC, int NumSrcArgs,
1791 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
1792 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> {
1793 // getInst64 handles clamp and omod. implicit mutex between vop3p and omod
1794 dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs,
1795 HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
1796 Src0Mod, Src1Mod, Src2Mod>.ret;
1797 dag opsel = (ins op_sel0:$op_sel);
1798 dag ret = !con(base, !if(HasOpSel, opsel, (ins)));
1801 class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
1802 RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
1803 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1804 dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
1805 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
1806 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret;
1808 dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
1809 dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi);
1811 dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg);
1812 dag ret = !con(base, vop3pFields);
1815 class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
1816 RegisterOperand Src2RC, int NumSrcArgs,
1817 bit HasClamp, bit HasOMod,
1818 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1819 dag ret = getInsVOP3Base<Src0RC, Src1RC,
1821 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod,
1822 Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret;
1825 class getInsDPPBase <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
1826 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
1827 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> {
1828 dag ret = !if(!eq(NumSrcArgs, 0),
1829 // VOP1 without input operands (V_NOP)
1832 !if(HasOld ,(ins OldRC:$old), (ins)),
1833 !if (!eq(NumSrcArgs, 1),
1835 // VOP1_DPP with modifiers
1836 (ins Src0Mod:$src0_modifiers, Src0RC:$src0)
1838 // VOP1_DPP without modifiers
1841 !if (!eq(NumSrcArgs, 2),
1843 // VOP2_DPP with modifiers
1844 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1845 Src1Mod:$src1_modifiers, Src1RC:$src1)
1847 // VOP2_DPP without modifiers
1848 (ins Src0RC:$src0, Src1RC:$src1)
1850 /* NumSrcArgs == 3, VOP3 */,
1852 // VOP3_DPP with modifiers
1853 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1854 Src1Mod:$src1_modifiers, Src1RC:$src1,
1855 Src2Mod:$src2_modifiers, Src2RC:$src2)
1857 // VOP3_DPP without modifiers
1858 (ins Src0RC:$src0, Src1RC:$src1,
1867 class getInsDPP <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
1868 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
1869 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
1870 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
1871 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
1872 (ins dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
1873 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl));
1876 class getInsDPP16 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
1877 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
1878 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
1879 dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
1880 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
1884 class getInsDPP8 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
1885 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
1886 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
1887 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
1888 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
1889 (ins dpp8:$dpp8, Dpp8FI:$fi));
1892 class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> {
1893 dag old = ( ins OldRC:$old );
1894 dag base = VOP3Base;
1896 !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)),
1901 class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
1902 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
1903 (ins dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
1904 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl));
1907 class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
1908 dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
1912 class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
1913 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
1914 (ins dpp8:$dpp8, Dpp8FI:$fi));
1918 class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
1919 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
1922 dag ret = !if(!eq(NumSrcArgs, 0),
1923 // VOP1 without input operands (V_NOP)
1925 !if(!eq(NumSrcArgs, 1),
1927 !if(!not(HasSDWAOMod),
1928 // VOP1_SDWA without omod
1929 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1931 dst_sel:$dst_sel, dst_unused:$dst_unused,
1932 src0_sel:$src0_sel),
1933 // VOP1_SDWA with omod
1934 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1935 Clamp:$clamp, omod:$omod,
1936 dst_sel:$dst_sel, dst_unused:$dst_unused,
1937 src0_sel:$src0_sel)),
1938 !if(!eq(NumSrcArgs, 2),
1939 !if(!eq(DstVT.Size, 1),
1941 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1942 Src1Mod:$src1_modifiers, Src1RC:$src1,
1943 Clamp:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
1945 !if(!not(HasSDWAOMod),
1946 // VOP2_SDWA without omod
1947 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1948 Src1Mod:$src1_modifiers, Src1RC:$src1,
1950 dst_sel:$dst_sel, dst_unused:$dst_unused,
1951 src0_sel:$src0_sel, src1_sel:$src1_sel),
1952 // VOP2_SDWA with omod
1953 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1954 Src1Mod:$src1_modifiers, Src1RC:$src1,
1955 Clamp:$clamp, omod:$omod,
1956 dst_sel:$dst_sel, dst_unused:$dst_unused,
1957 src0_sel:$src0_sel, src1_sel:$src1_sel))),
1958 (ins)/* endif */)));
1962 class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
1963 dag ret = !if(HasDst,
1964 !if(!eq(DstVT.Size, 1),
1965 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions
1966 (outs DstRCDPP:$vdst)),
1971 class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> {
1972 dag ret = !if(HasDst,
1973 !if(!eq(DstVT.Size, 1),
1974 (outs DstRCSDWA:$sdst),
1975 (outs DstRCSDWA:$vdst)),
1979 // Returns the assembly string for the inputs and outputs of a VOP[12C]
1981 class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
1982 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
1983 string src0 = ", $src0";
1984 string src1 = ", $src1";
1985 string src2 = ", $src2";
1986 string ret = !if(HasDst, dst, "") #
1987 !if(!eq(NumSrcArgs, 1), src0, "") #
1988 !if(!eq(NumSrcArgs, 2), src0#src1, "") #
1989 !if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
1992 class getAsmVOPDPart <int NumSrcArgs, string XorY> {
1993 string dst = "$vdst" # XorY;
1994 string src0 = ", $src0" # XorY;
1995 string src1 = ", $vsrc1" # XorY;
1997 !if(!ge(NumSrcArgs, 1), src0, "") #
1998 !if(!ge(NumSrcArgs, 2), src1, "");
2001 // Returns the assembly string for the inputs and outputs of a VOP3P
2003 class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
2004 bit HasClamp, bit HasOpSel> {
2005 string dst = "$vdst";
2006 string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
2007 string src1 = !if(!eq(NumSrcArgs, 1), "",
2008 !if(!eq(NumSrcArgs, 2), " $src1",
2010 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
2012 string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
2013 string clamp = !if(HasClamp, "$clamp", "");
2014 string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", "");
2016 // Each modifier is printed as an array of bits for each operand, so
2017 // all operands are printed as part of src0_modifiers.
2018 string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
2021 class getAsmVOP3OpSel <int NumSrcArgs,
2027 string dst = "$vdst";
2029 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
2030 string isrc1 = !if(!eq(NumSrcArgs, 1), "",
2031 !if(!eq(NumSrcArgs, 2), " $src1",
2033 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
2035 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2036 string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
2037 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2038 " $src1_modifiers,"));
2039 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
2041 string src0 = !if(Src0HasMods, fsrc0, isrc0);
2042 string src1 = !if(Src1HasMods, fsrc1, isrc1);
2043 string src2 = !if(Src2HasMods, fsrc2, isrc2);
2045 string clamp = !if(HasClamp, "$clamp", "");
2046 string omod = !if(HasOMod, "$omod", "");
2047 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod;
2050 class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
2051 string dst = !if(HasDst,
2052 !if(!eq(DstVT.Size, 1),
2055 ""); // use $sdst for VOPC
2056 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2057 string src1 = !if(!eq(NumSrcArgs, 1), "",
2058 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2059 " $src1_modifiers,"));
2060 string args = !if(!not(HasModifiers),
2061 getAsm32<0, NumSrcArgs, DstVT>.ret,
2063 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
2066 class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
2067 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi";
2070 class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32>
2071 : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{
2072 let ret = dst#args#" $dpp8$fi";
2075 class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
2076 bit HasOpSel, bit HasOMod, bit IsVOP3P,
2077 bit HasModifiers, bit Src0HasMods,
2078 bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32,
2079 bit HasByteSel = 0> {
2080 string dst = !if(HasDst,
2081 !if(!eq(DstVT.Size, 1),
2084 ""); // use $sdst for VOPC
2085 string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
2086 string src1nomods = !if(!eq(NumSrcArgs, 1), "",
2087 !if(!eq(NumSrcArgs, 2), " $src1",
2089 string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", "");
2091 string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2092 string src1mods = !if(!eq(NumSrcArgs, 1), "",
2093 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2094 " $src1_modifiers,"));
2095 string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
2097 string src0 = !if(Src0HasMods, src0mods, src0nomods);
2098 string src1 = !if(Src1HasMods, src1mods, src1nomods);
2099 string src2 = !if(Src2HasMods, src2mods, src2nomods);
2100 string opsel = !if(HasOpSel, "$op_sel", "");
2101 string bytesel = !if(HasByteSel, "$byte_sel", "");
2102 string 3PMods = !if(IsVOP3P,
2103 !if(HasOpSel, "$op_sel_hi", "")
2104 #!if(HasModifiers, "$neg_lo$neg_hi", ""),
2106 string clamp = !if(HasClamp, "$clamp", "");
2107 string omod = !if(HasOMod, "$omod", "");
2109 string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod, "");
2113 class getAsmVOP3DPP<string base> {
2114 string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
2117 class getAsmVOP3DPP16<string base> {
2118 string ret = getAsmVOP3DPP<base>.ret # "$fi";
2121 class getAsmVOP3DPP8<string base> {
2122 string ret = base # " $dpp8$fi";
2126 class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
2127 string dst = !if(HasDst,
2128 !if(!eq(DstVT.Size, 1),
2129 " vcc", // use vcc token as dst for VOPC instructions
2132 string src0 = "$src0_modifiers";
2133 string src1 = "$src1_modifiers";
2134 string args = !if(!eq(NumSrcArgs, 0),
2136 !if(!eq(NumSrcArgs, 1),
2138 ", "#src0#", "#src1#"$clamp"
2141 string sdwa = !if(!eq(NumSrcArgs, 0),
2143 !if(!eq(NumSrcArgs, 1),
2144 " $dst_sel $dst_unused $src0_sel",
2145 !if(!eq(DstVT.Size, 1),
2146 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC
2147 " $dst_sel $dst_unused $src0_sel $src1_sel"
2151 string ret = dst#args#sdwa;
2154 class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs,
2155 ValueType DstVT = i32> {
2156 string dst = !if(HasDst,
2157 !if(!eq(DstVT.Size, 1),
2161 string src0 = "$src0_modifiers";
2162 string src1 = "$src1_modifiers";
2163 string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod");
2164 string args = !if(!eq(NumSrcArgs, 0), "",
2165 !if(!eq(NumSrcArgs, 1),
2170 string sdwa = !if(!eq(NumSrcArgs, 0), "",
2171 !if(!eq(NumSrcArgs, 1),
2172 out_mods#" $dst_sel $dst_unused $src0_sel",
2173 !if(!eq(DstVT.Size, 1),
2174 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC
2175 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel"
2179 string ret = dst#args#sdwa;
2182 class getHas64BitOps <int NumSrcArgs, ValueType DstVT, ValueType Src0VT,
2184 bit ret = !if(!eq(NumSrcArgs, 3),
2186 !if(!eq(DstVT.Size, 64),
2188 !if(!eq(Src0VT.Size, 64),
2190 !if(!eq(Src1VT.Size, 64),
2199 class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2200 ValueType Src1VT = i32> {
2201 bit ret = !if(!eq(NumSrcArgs, 3),
2202 0, // NumSrcArgs == 3 - No SDWA for VOP3
2203 !if(!eq(DstVT.Size, 64),
2204 0, // 64-bit dst - No SDWA for 64-bit operands
2205 !if(!eq(Src0VT.Size, 64),
2207 !if(!eq(Src1VT.Size, 64),
2216 class getHasDPP <int NumSrcArgs> {
2217 bit ret = !if(!eq(NumSrcArgs, 3),
2218 0, // NumSrcArgs == 3 - No DPP for VOP3
2222 class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2223 ValueType Src1VT = i32> {
2224 bit ret = !and(getHasDPP<NumSrcArgs>.ret,
2225 !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret));
2228 class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2229 ValueType Src1VT = i32> {
2230 bit ret = !and(getHasDPP<NumSrcArgs>.ret,
2231 getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
2234 // Function that checks if instruction supports DPP and SDWA
2235 class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2236 ValueType Src1VT = i32> {
2237 bit ret = !or(getHasDPP<NumSrcArgs>.ret,
2238 getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
2241 // Return an AGPR+VGPR operand class for the given VGPR register class.
2242 class getLdStRegisterOperand<RegisterClass RC> {
2243 RegisterOperand ret =
2244 !cond(!eq(RC.Size, 32) : AVLdSt_32,
2245 !eq(RC.Size, 64) : AVLdSt_64,
2246 !eq(RC.Size, 96) : AVLdSt_96,
2247 !eq(RC.Size, 128) : AVLdSt_128,
2248 !eq(RC.Size, 160) : AVLdSt_160,
2249 !eq(RC.Size, 1024) : AVLdSt_1024);
2252 class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32,
2253 ValueType Src1VT = i32, ValueType Src2VT = i32> {
2254 bit ret = !if(!eq(DstVT.Size, 64),
2255 0, // 64-bit dst No DPP for 64-bit operands
2256 !if(!eq(Src0VT.Size, 64),
2258 !if(!eq(Src1VT.Size, 64),
2260 !if(!eq(Src2VT.Size, 64),
2275 class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
2277 field list<ValueType> ArgVT = _ArgVT;
2278 field bit EnableClamp = _EnableClamp;
2279 field bit IsTrue16 = 0;
2280 field bit IsRealTrue16 = 0;
2281 field bit IsInvalidSingleUseConsumer = 0;
2282 field bit IsInvalidSingleUseProducer = 0;
2284 field ValueType DstVT = ArgVT[0];
2285 field ValueType Src0VT = ArgVT[1];
2286 field ValueType Src1VT = ArgVT[2];
2287 field ValueType Src2VT = ArgVT[3];
2288 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
2289 field RegisterOperand DstRCDPP = DstRC;
2290 field RegisterOperand DstRC64 = DstRC;
2291 field RegisterOperand DstRCVOP3DPP = DstRC64;
2292 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
2293 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT, IsTrue16>.ret;
2294 field RegisterOperand Src1RC32 = getVregSrcForVT<Src1VT>.ret;
2295 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
2296 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
2297 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
2298 field RegisterOperand Src0DPP = getVregSrcForVT<Src0VT>.ret;
2299 field RegisterOperand Src1DPP = getVregSrcForVT<Src1VT>.ret;
2300 field RegisterOperand Src2DPP = getVregSrcForVT<Src2VT>.ret;
2301 field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
2302 field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
2303 field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
2304 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
2305 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
2306 field Operand Src0Mod = getSrcMod<Src0VT>.ret;
2307 field Operand Src1Mod = getSrcMod<Src1VT>.ret;
2308 field Operand Src2Mod = getSrcMod<Src2VT>.ret;
2309 field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret;
2310 field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
2311 field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret;
2312 field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret;
2313 field Operand Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT>.ret;
2314 field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret;
2315 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
2316 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
2319 field bit IsMAI = 0;
2320 field bit IsVOP3P = 0;
2321 field bit IsDOT = 0;
2322 field bit IsSingle = 0;
2323 field bit IsWMMA = 0;
2324 field bit IsSWMMAC = 0;
2326 field bit IsFP8SrcByteSel = 0;
2327 field bit IsFP8DstByteSel = 0;
2328 field bit IsFP8ByteSel = !or(IsFP8SrcByteSel, IsFP8DstByteSel);
2330 field bit HasDst = !ne(DstVT.Value, untyped.Value);
2331 field bit HasDst32 = HasDst;
2332 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
2333 field bit EmitDstSel = EmitDst;
2334 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
2335 field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value);
2336 field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value);
2337 field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value);
2339 field bit HasSrc0FloatMods = Src0VT.isFP;
2340 field bit HasSrc1FloatMods = Src1VT.isFP;
2341 field bit HasSrc2FloatMods = Src2VT.isFP;
2343 field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
2344 field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
2345 field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
2347 field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp);
2348 field bit HasSDWAClamp = EmitDst;
2349 field bit HasFPClamp = !and(DstVT.isFP, HasClamp);
2350 field bit HasIntClamp = !if(DstVT.isFP, 0, HasClamp);
2351 field bit HasClampLo = HasClamp;
2352 field bit HasClampHi = !and(DstVT.isVector, HasClamp);
2353 field bit HasHigh = 0;
2355 field bit IsPacked = Src0VT.isVector;
2356 field bit HasOpSel = IsPacked;
2357 field bit HasOMod = !if(IsVOP3P, 0, DstVT.isFP);
2358 field bit HasSDWAOMod = DstVT.isFP;
2360 field bit HasModifiers = !or(isModifierType<Src0VT>.ret,
2361 isModifierType<Src1VT>.ret,
2362 isModifierType<Src2VT>.ret,
2365 field bit HasSrc0Mods = HasModifiers;
2366 field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
2367 field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0);
2369 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2370 field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret;
2371 field bit HasExtDPP = !or(getHasDPP<NumSrcArgs>.ret, HasExtVOP3DPP);
2372 field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2373 field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2374 field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2375 field bit HasExtSDWA9 = HasExtSDWA;
2376 field int NeedPatGen = PatGenMode.NoPattern;
2378 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
2379 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
2380 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
2382 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
2384 // VOP3b instructions are a special case with a second explicit
2385 // output. This is manually overridden for them.
2386 field dag Outs32 = Outs;
2387 field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs));
2388 field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
2389 field dag OutsDPP8 = OutsDPP;
2390 field dag OutsVOP3DPP = getOutsDPP<HasDst, DstVT, DstRCVOP3DPP>.ret;
2391 field dag OutsVOP3DPP8 = OutsVOP3DPP;
2392 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
2394 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
2395 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
2396 HasClamp, HasModifiers, HasSrc2Mods,
2397 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
2398 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
2399 NumSrcArgs, HasClamp, HasOpSel,
2400 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
2401 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
2402 NumSrcArgs, HasClamp, HasOMod,
2403 getOpSelMod<Src0VT>.ret,
2404 getOpSelMod<Src1VT>.ret,
2405 getOpSelMod<Src2VT>.ret>.ret;
2406 field dag InsDPP = !if(HasExtDPP,
2407 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
2408 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret,
2410 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
2411 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
2412 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP,
2413 NumSrcArgs, HasModifiers,
2414 Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
2415 defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
2416 Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
2417 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
2418 defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
2419 Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel,
2420 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
2422 field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase);
2424 field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2425 field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2426 field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2427 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
2428 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
2430 field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X);
2431 // It is a slight misnomer to use the deferred f32 operand type for non-float
2432 // operands, but this operand type will only be used if the other dual
2433 // component is FMAAK or FMAMK
2434 field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X);
2435 field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y);
2436 field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y);
2439 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
2440 field string AsmDPP = !if(HasExtDPP,
2441 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
2442 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
2443 // DPP8 encoding has no fields for modifiers, and it is enforced by setting
2444 // the asm operand name via this HasModifiers flag
2445 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
2446 field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
2447 HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers,
2448 HasModifiers, DstVT, IsFP8ByteSel>.ret;
2449 field string Asm64 = AsmVOP3Base;
2450 field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret;
2451 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
2456 HasSrc2FloatMods>.ret;
2457 field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3Base>.ret;
2458 field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3Base>.ret;
2459 field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3Base>.ret;
2460 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
2461 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
2462 field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret;
2463 field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret;
2464 field string TieRegDPP = "$old";
2467 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
2470 let HasExtVOP3DPP = 0;
2471 let HasExt32BitDPP = 0;
2472 let HasExt64BitDPP = 0;
2474 let HasExtSDWA9 = 0;
2477 class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> {
2478 let NeedPatGen = mode;
2481 // VOPC_Profile_t16, VOPC_NoSdst_Profile_t16, VOPC_Class_Profile_t16,
2482 // VOPC_Class_NoSdst_Profile_t16, and VOP_MAC_F16_t16 do not inherit from this
2483 // class, so copy changes to this class in those profiles
2484 class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
2486 let IsRealTrue16 = 1;
2489 let HasModifiers = 1; // All instructions at least have OpSel.
2491 // Most DstVT are 16-bit, but not all.
2492 let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
2493 let DstRC64 = getVALUDstForVT<DstVT>.ret;
2494 let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2495 let Src1RC32 = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2496 let Src0DPP = getVregSrcForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2497 let Src1DPP = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2498 let Src2DPP = getVregSrcForVT<Src2VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2499 let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret;
2500 let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret;
2501 let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret;
2502 let Src0VOP3DPP = VGPRSrc_16;
2503 let Src0ModVOP3DPP = getSrcModVOP3DPP<Src0VT, 0 /*IsFake16*/>.ret;
2504 let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0 /*IsFake16*/>.ret;
2505 let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0 /*IsFake16*/>.ret;
2507 let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret;
2508 let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret;
2509 let Src1RC64 = getVOP3SrcForVT<Src1VT, 1 /*IsTrue16*/>.ret;
2510 let Src2RC64 = getVOP3SrcForVT<Src2VT, 1 /*IsTrue16*/>.ret;
2511 let Src0Mod = getSrcMod<Src0VT, 1 /*IsTrue16*/>.ret;
2512 let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/>.ret;
2513 let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/>.ret;
2516 class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
2518 // Most DstVT are 16-bit, but not all
2519 let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
2520 let DstRC64 = getVALUDstForVT<DstVT>.ret;
2521 let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2522 let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2523 let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2524 let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2525 let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
2526 let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
2527 let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
2530 def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>;
2531 def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
2532 def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
2533 def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>;
2535 def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
2536 def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
2537 def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
2538 def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
2539 def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>;
2541 def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
2542 def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
2544 def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
2545 def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>;
2546 def VOP_I16_I32 : VOPProfile <[i16, i32, untyped, untyped]>;
2548 def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
2549 def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
2550 def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
2552 def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
2553 def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
2554 def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
2555 def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
2557 def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>;
2558 def VOP_BF16_V2BF16_V2BF16_BF16: VOPProfile <[bf16, v2bf16, v2bf16, bf16]>;
2559 def VOP_F32_V2BF16_V2BF16_F32 : VOPProfile <[f32, v2bf16, v2bf16, f32]>;
2561 def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
2563 def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
2565 def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
2566 def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
2567 def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
2568 def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
2569 def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
2570 def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
2571 def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
2572 def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
2573 def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
2574 def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
2575 def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
2576 def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>;
2578 def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
2579 def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
2580 def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
2581 def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
2582 def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
2583 def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
2584 def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
2585 def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
2586 def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>;
2587 def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
2588 def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
2590 def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
2591 def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
2592 def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
2594 def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>;
2595 def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
2596 def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
2597 def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
2598 def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
2599 def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
2600 def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
2601 def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
2602 def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
2604 def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>;
2605 def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>;
2607 def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>;
2608 def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>;
2609 def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>;
2610 def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>;
2611 def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>;
2612 def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>;
2613 def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>;
2614 def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>;
2615 def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>;
2616 def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>;
2617 def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>;
2618 def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>;
2620 def VOP_V4F64_F64_F64_V4F64 : VOPProfile <[v4f64, f64, f64, v4f64]>;
2621 def VOP_V1F64_F64_F64_V1F64 : VOPProfile <[v1f64, f64, f64, v1f64]>;
2623 def VOP_V2F32_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, v2f32]>;
2624 def VOP_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, untyped]>;
2625 def VOP_V2I32_V2I32_V2I32 : VOPProfile <[v2i32, v2i32, v2i32, untyped]>;
2626 def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>;
2627 def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>;
2628 def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>;
2630 def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>;
2631 def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>;
2632 def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>;
2633 def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>;
2634 def VOP_V4F32_I64_I64_V4F32 : VOPProfile <[v4f32, i64, i64, v4f32]>;
2635 def VOP_V16F32_I64_I64_V16F32 : VOPProfile <[v16f32, i64, i64, v16f32]>;
2637 def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>;
2638 def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>;
2639 def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>;
2640 def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>;
2641 def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>;
2642 def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>;
2643 def VOP_V4F32_V2I32_V4I32_I32 : VOPProfile <[v4f32, v2i32, v4i32, i32]>;
2644 def VOP_V16F32_V2I32_V4I32_I32 : VOPProfile <[v16f32, v2i32, v4i32, i32]>;
2646 class Commutable_REV <string revOp, bit isOrig> {
2647 string RevOp = revOp;
2648 bit IsOrig = isOrig;
2651 //===----------------------------------------------------------------------===//
2652 // Interpolation opcodes
2653 //===----------------------------------------------------------------------===//
2655 class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">;
2657 class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
2658 VINTRPCommon <outs, ins, "", pattern>,
2659 SIMCInstr<opName, SIEncodingFamily.NONE> {
2661 let isCodeGenOnly = 1;
2664 // FIXME-GFX10: WIP.
2665 class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
2666 string asm, int encodingFamily> :
2667 VINTRPCommon <outs, ins, asm, []>,
2669 SIMCInstr<opName, encodingFamily> {
2672 class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
2674 VINTRPCommon <outs, ins, asm, []>,
2676 SIMCInstr<opName, SIEncodingFamily.VI> {
2677 let AssemblerPredicate = isGFX8GFX9;
2678 let DecoderNamespace = "GFX8";
2681 // FIXME-GFX10: WIP.
2682 multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
2683 list<dag> pattern = []> {
2684 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
2686 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
2687 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>;
2688 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
2690 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
2692 let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
2693 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>;
2694 } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
2697 //===----------------------------------------------------------------------===//
2698 // Vector instruction mappings
2699 //===----------------------------------------------------------------------===//
2701 // Maps an opcode in e32 form to its e64 equivalent
2702 def getVOPe64 : InstrMapping {
2703 let FilterClass = "VOP";
2704 let RowFields = ["OpName"];
2705 let ColFields = ["Size", "VOP3"];
2706 let KeyCol = ["4", "0"];
2707 let ValueCols = [["8", "1"]];
2710 // Maps an opcode in e64 form to its e32 equivalent
2711 def getVOPe32 : InstrMapping {
2712 let FilterClass = "VOP";
2713 let RowFields = ["OpName"];
2714 let ColFields = ["Size", "VOP3"];
2715 let KeyCol = ["8", "1"];
2716 let ValueCols = [["4", "0"]];
2719 // Maps ordinary instructions to their SDWA counterparts
2720 def getSDWAOp : InstrMapping {
2721 let FilterClass = "VOP";
2722 let RowFields = ["OpName"];
2723 let ColFields = ["AsmVariantName"];
2724 let KeyCol = ["Default"];
2725 let ValueCols = [["SDWA"]];
2728 // Maps SDWA instructions to their ordinary counterparts
2729 def getBasicFromSDWAOp : InstrMapping {
2730 let FilterClass = "VOP";
2731 let RowFields = ["OpName"];
2732 let ColFields = ["AsmVariantName"];
2733 let KeyCol = ["SDWA"];
2734 let ValueCols = [["Default"]];
2737 // Maps ordinary instructions to their DPP counterparts
2738 def getDPPOp32 : InstrMapping {
2739 let FilterClass = "VOP";
2740 let RowFields = ["OpName"];
2741 let ColFields = ["AsmVariantName"];
2742 let KeyCol = ["Default"];
2743 let ValueCols = [["DPP"]];
2746 def getDPPOp64 : InstrMapping {
2747 let FilterClass = "VOP";
2748 let RowFields = ["OpName"];
2749 let ColFields = ["AsmVariantName"];
2750 let KeyCol = ["VOP3"];
2751 let ValueCols = [["VOP3_DPP"]];
2754 // Maps an commuted opcode to its original version
2755 def getCommuteOrig : InstrMapping {
2756 let FilterClass = "Commutable_REV";
2757 let RowFields = ["RevOp"];
2758 let ColFields = ["IsOrig"];
2760 let ValueCols = [["1"]];
2763 // Maps an original opcode to its commuted version
2764 def getCommuteRev : InstrMapping {
2765 let FilterClass = "Commutable_REV";
2766 let RowFields = ["RevOp"];
2767 let ColFields = ["IsOrig"];
2769 let ValueCols = [["0"]];
2772 def getMCOpcodeGen : InstrMapping {
2773 let FilterClass = "SIMCInstr";
2774 let RowFields = ["PseudoInstr"];
2775 let ColFields = ["Subtarget"];
2776 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
2777 // These columns must be kept in sync with the SIEncodingFamily enumeration.
2778 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
2779 [!cast<string>(SIEncodingFamily.VI)],
2780 [!cast<string>(SIEncodingFamily.SDWA)],
2781 [!cast<string>(SIEncodingFamily.SDWA9)],
2782 // GFX80 encoding is added to work around a multiple matching
2783 // issue for buffer instructions with unpacked d16 data. This
2784 // does not actually change the encoding, and thus may be
2786 [!cast<string>(SIEncodingFamily.GFX80)],
2787 [!cast<string>(SIEncodingFamily.GFX9)],
2788 [!cast<string>(SIEncodingFamily.GFX10)],
2789 [!cast<string>(SIEncodingFamily.SDWA10)],
2790 [!cast<string>(SIEncodingFamily.GFX90A)],
2791 [!cast<string>(SIEncodingFamily.GFX940)],
2792 [!cast<string>(SIEncodingFamily.GFX11)],
2793 [!cast<string>(SIEncodingFamily.GFX12)]];
2796 // Get equivalent SOPK instruction.
2797 def getSOPKOp : InstrMapping {
2798 let FilterClass = "SOPKInstTable";
2799 let RowFields = ["BaseCmpOp"];
2800 let ColFields = ["IsSOPK"];
2802 let ValueCols = [["1"]];
2805 def getAddr64Inst : InstrMapping {
2806 let FilterClass = "MUBUFAddr64Table";
2807 let RowFields = ["OpName"];
2808 let ColFields = ["IsAddr64"];
2810 let ValueCols = [["1"]];
2813 def getIfAddr64Inst : InstrMapping {
2814 let FilterClass = "MUBUFAddr64Table";
2815 let RowFields = ["OpName"];
2816 let ColFields = ["IsAddr64"];
2818 let ValueCols = [["1"]];
2821 // Maps a GLOBAL to its SADDR form.
2822 def getGlobalSaddrOp : InstrMapping {
2823 let FilterClass = "GlobalSaddrTable";
2824 let RowFields = ["SaddrOp"];
2825 let ColFields = ["IsSaddr"];
2827 let ValueCols = [["1"]];
2830 // Maps a GLOBAL SADDR to its VADDR form.
2831 def getGlobalVaddrOp : InstrMapping {
2832 let FilterClass = "GlobalSaddrTable";
2833 let RowFields = ["SaddrOp"];
2834 let ColFields = ["IsSaddr"];
2836 let ValueCols = [["0"]];
2839 // Maps a v_cmpx opcode with sdst to opcode without sdst.
2840 def getVCMPXNoSDstOp : InstrMapping {
2841 let FilterClass = "VCMPXNoSDstTable";
2842 let RowFields = ["NoSDstOp"];
2843 let ColFields = ["HasSDst"];
2845 let ValueCols = [["0"]];
2848 // Maps a SOPP to a SOPP with S_NOP
2849 def getSOPPWithRelaxation : InstrMapping {
2850 let FilterClass = "SOPPRelaxTable";
2851 let RowFields = ["KeyName"];
2852 let ColFields = ["IsRelaxed"];
2854 let ValueCols = [["1"]];
2857 // Maps flat scratch opcodes by addressing modes
2858 def getFlatScratchInstSTfromSS : InstrMapping {
2859 let FilterClass = "FlatScratchInst";
2860 let RowFields = ["SVOp"];
2861 let ColFields = ["Mode"];
2862 let KeyCol = ["SS"];
2863 let ValueCols = [["ST"]];
2866 def getFlatScratchInstSSfromSV : InstrMapping {
2867 let FilterClass = "FlatScratchInst";
2868 let RowFields = ["SVOp"];
2869 let ColFields = ["Mode"];
2870 let KeyCol = ["SV"];
2871 let ValueCols = [["SS"]];
2874 def getFlatScratchInstSVfromSVS : InstrMapping {
2875 let FilterClass = "FlatScratchInst";
2876 let RowFields = ["SVOp"];
2877 let ColFields = ["Mode"];
2878 let KeyCol = ["SVS"];
2879 let ValueCols = [["SV"]];
2882 def getFlatScratchInstSVfromSS : InstrMapping {
2883 let FilterClass = "FlatScratchInst";
2884 let RowFields = ["SVOp"];
2885 let ColFields = ["Mode"];
2886 let KeyCol = ["SS"];
2887 let ValueCols = [["SV"]];
2890 def getMFMAEarlyClobberOp : InstrMapping {
2891 let FilterClass = "MFMATable";
2892 let RowFields = ["FMAOp"];
2893 let ColFields = ["IsMac"];
2895 let ValueCols = [["0"]];
2898 // Maps an v_cmp instruction to its v_cmpx equivalent.
2899 def getVCMPXOpFromVCMP : InstrMapping {
2900 let FilterClass = "VCMPVCMPXTable";
2901 let RowFields = ["VCMPOp"];
2902 let ColFields = ["IsVCMPX"];
2904 let ValueCols = [["1"]];
2907 def VOPDComponentTable : GenericTable {
2908 let FilterClass = "VOPD_Component";
2909 let CppTypeName = "VOPDComponentInfo";
2910 let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"];
2911 let PrimaryKey = ["BaseVOP"];
2912 let PrimaryKeyName = "getVOPDComponentHelper";
2915 def getVOPDBaseFromComponent : SearchIndex {
2916 let Table = VOPDComponentTable;
2917 let Key = ["VOPDOp"];
2920 def VOPDPairs : GenericTable {
2921 let FilterClass = "VOPD_Base";
2922 let CppTypeName = "VOPDInfo";
2923 let Fields = ["Opcode", "OpX", "OpY", "SubTgt"];
2924 let PrimaryKey = ["Opcode"];
2925 let PrimaryKeyName = "getVOPDOpcodeHelper";
2928 def getVOPDInfoFromComponentOpcodes : SearchIndex {
2929 let Table = VOPDPairs;
2930 let Key = ["OpX", "OpY", "SubTgt"];
2933 include "SIInstructions.td"
2935 include "DSInstructions.td"
2936 include "MIMGInstructions.td"