1 //===-- SIInstrInfo.td -----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
10 AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
11 def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
12 AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
14 class GCNPredicateControl : PredicateControl {
15 Predicate SIAssemblerPredicate = isGFX6GFX7;
16 Predicate VIAssemblerPredicate = isGFX8GFX9;
19 // Except for the NONE field, this must be kept in sync with the
20 // SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the
21 // getMCOpcodeGen table.
22 def SIEncodingFamily {
37 //===----------------------------------------------------------------------===//
39 //===----------------------------------------------------------------------===//
41 def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
43 def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
44 SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
45 [SDNPMayLoad, SDNPMemOperand]
48 def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
49 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
50 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
53 def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
54 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
57 def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32,
58 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
61 def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32,
62 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
65 // load_d16_{lo|hi} ptr, tied_input
66 def SIload_d16 : SDTypeProfile<1, 2, [
72 def SDTtbuffer_load : SDTypeProfile<1, 8,
74 SDTCisVT<1, v4i32>, // rsrc
75 SDTCisVT<2, i32>, // vindex(VGPR)
76 SDTCisVT<3, i32>, // voffset(VGPR)
77 SDTCisVT<4, i32>, // soffset(SGPR)
78 SDTCisVT<5, i32>, // offset(imm)
79 SDTCisVT<6, i32>, // format(imm)
80 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
81 SDTCisVT<8, i1> // idxen(imm)
84 def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load,
85 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
86 def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16",
88 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
90 def SDTtbuffer_store : SDTypeProfile<0, 9,
92 SDTCisVT<1, v4i32>, // rsrc
93 SDTCisVT<2, i32>, // vindex(VGPR)
94 SDTCisVT<3, i32>, // voffset(VGPR)
95 SDTCisVT<4, i32>, // soffset(SGPR)
96 SDTCisVT<5, i32>, // offset(imm)
97 SDTCisVT<6, i32>, // format(imm)
98 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
99 SDTCisVT<8, i1> // idxen(imm)
102 def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
103 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
104 def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
106 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
108 def SDTBufferLoad : SDTypeProfile<1, 7,
110 SDTCisVT<1, v4i32>, // rsrc
111 SDTCisVT<2, i32>, // vindex(VGPR)
112 SDTCisVT<3, i32>, // voffset(VGPR)
113 SDTCisVT<4, i32>, // soffset(SGPR)
114 SDTCisVT<5, i32>, // offset(imm)
115 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
116 SDTCisVT<7, i1>]>; // idxen(imm)
118 def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
119 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
120 def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad,
121 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
122 def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad,
123 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
124 def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad,
125 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
126 def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad,
127 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
128 def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
129 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
130 def SIbuffer_load_format_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_TFE", SDTBufferLoad,
131 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
132 def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
134 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
136 def SDTBufferStore : SDTypeProfile<0, 8,
138 SDTCisVT<1, v4i32>, // rsrc
139 SDTCisVT<2, i32>, // vindex(VGPR)
140 SDTCisVT<3, i32>, // voffset(VGPR)
141 SDTCisVT<4, i32>, // soffset(SGPR)
142 SDTCisVT<5, i32>, // offset(imm)
143 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
144 SDTCisVT<7, i1>]>; // idxen(imm)
146 def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
147 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
148 def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE",
150 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
151 def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT",
153 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
154 def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT",
156 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
157 def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
159 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
161 class SDBufferAtomic<string opcode> : SDNode <opcode,
163 [SDTCisVT<2, v4i32>, // rsrc
164 SDTCisVT<3, i32>, // vindex(VGPR)
165 SDTCisVT<4, i32>, // voffset(VGPR)
166 SDTCisVT<5, i32>, // soffset(SGPR)
167 SDTCisVT<6, i32>, // offset(imm)
168 SDTCisVT<7, i32>, // cachepolicy(imm)
169 SDTCisVT<8, i1>]>, // idxen(imm)
170 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
173 def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
174 def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
175 def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
176 def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">;
177 def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">;
178 def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">;
179 def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
180 def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
181 def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
182 def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
183 def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
184 def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
185 def SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">;
186 def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
187 def SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">;
188 def SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">;
190 multiclass SDBufferAtomicNoRet {
191 def "_noret" : PatFrag<
192 (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset,
193 node:$offset, node:$cachepolicy, node:$idxen),
194 (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex,
195 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
201 defm SIbuffer_atomic_swap : SDBufferAtomicNoRet;
202 defm SIbuffer_atomic_add : SDBufferAtomicNoRet;
203 defm SIbuffer_atomic_sub : SDBufferAtomicNoRet;
204 defm SIbuffer_atomic_smin : SDBufferAtomicNoRet;
205 defm SIbuffer_atomic_umin : SDBufferAtomicNoRet;
206 defm SIbuffer_atomic_smax : SDBufferAtomicNoRet;
207 defm SIbuffer_atomic_umax : SDBufferAtomicNoRet;
208 defm SIbuffer_atomic_and : SDBufferAtomicNoRet;
209 defm SIbuffer_atomic_or : SDBufferAtomicNoRet;
210 defm SIbuffer_atomic_xor : SDBufferAtomicNoRet;
211 defm SIbuffer_atomic_inc : SDBufferAtomicNoRet;
212 defm SIbuffer_atomic_dec : SDBufferAtomicNoRet;
213 defm SIbuffer_atomic_fadd : SDBufferAtomicNoRet;
214 defm SIbuffer_atomic_fmin : SDBufferAtomicNoRet;
215 defm SIbuffer_atomic_fmax : SDBufferAtomicNoRet;
217 def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
219 [SDTCisVT<0, i32>, // dst
220 SDTCisVT<1, i32>, // src
221 SDTCisVT<2, i32>, // cmp
222 SDTCisVT<3, v4i32>, // rsrc
223 SDTCisVT<4, i32>, // vindex(VGPR)
224 SDTCisVT<5, i32>, // voffset(VGPR)
225 SDTCisVT<6, i32>, // soffset(SGPR)
226 SDTCisVT<7, i32>, // offset(imm)
227 SDTCisVT<8, i32>, // cachepolicy(imm)
228 SDTCisVT<9, i1>]>, // idxen(imm)
229 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
232 def SIbuffer_atomic_cmpswap_noret : PatFrag<
233 (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset,
234 node:$soffset, node:$offset, node:$cachepolicy, node:$idxen),
235 (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex,
236 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
241 class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
243 [SDTCisPtrTy<0>, // vaddr
244 SDTCisVT<1, ty>]>, // vdata
245 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
248 def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
249 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
252 def SIlds : SDNode<"AMDGPUISD::LDS",
253 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
256 def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO",
258 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
261 def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8",
263 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
266 def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8",
268 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
271 def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI",
273 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
276 def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8",
278 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
281 def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
283 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
286 def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
287 SDTypeProfile<0 ,1, [SDTCisInt<0>]>,
288 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
291 def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD",
295 def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD",
299 //===----------------------------------------------------------------------===//
301 //===----------------------------------------------------------------------===//
303 // Returns 1 if the source arguments have modifiers, 0 if they do not.
304 class isFloatType<ValueType SrcVT> {
305 bit ret = !or(!eq(SrcVT.Value, f16.Value),
306 !eq(SrcVT.Value, f32.Value),
307 !eq(SrcVT.Value, f64.Value),
308 !eq(SrcVT.Value, v2f16.Value),
309 !eq(SrcVT.Value, v4f16.Value),
310 !eq(SrcVT.Value, v8f16.Value),
311 !eq(SrcVT.Value, v16f16.Value),
312 !eq(SrcVT.Value, v2f32.Value),
313 !eq(SrcVT.Value, v4f32.Value),
314 !eq(SrcVT.Value, v8f32.Value),
315 !eq(SrcVT.Value, v2f64.Value),
316 !eq(SrcVT.Value, v4f64.Value));
319 // XXX - do v2i16 instructions?
320 class isIntType<ValueType SrcVT> {
321 bit ret = !or(!eq(SrcVT.Value, i8.Value),
322 !eq(SrcVT.Value, i16.Value),
323 !eq(SrcVT.Value, i32.Value),
324 !eq(SrcVT.Value, i64.Value),
325 !eq(SrcVT.Value, v4i16.Value),
326 !eq(SrcVT.Value, v8i16.Value),
327 !eq(SrcVT.Value, v16i16.Value),
328 !eq(SrcVT.Value, v2i32.Value),
329 !eq(SrcVT.Value, v4i32.Value),
330 !eq(SrcVT.Value, v8i32.Value));
333 class isPackedType<ValueType SrcVT> {
334 bit ret = !or(!eq(SrcVT.Value, v2i16.Value),
335 !eq(SrcVT.Value, v2f16.Value),
336 !eq(SrcVT.Value, v4f16.Value),
337 !eq(SrcVT.Value, v2i32.Value),
338 !eq(SrcVT.Value, v2f32.Value),
339 !eq(SrcVT.Value, v4i32.Value),
340 !eq(SrcVT.Value, v4f32.Value),
341 !eq(SrcVT.Value, v8i32.Value),
342 !eq(SrcVT.Value, v8f32.Value));
346 //===----------------------------------------------------------------------===//
347 // PatFrags for global memory operations
348 //===----------------------------------------------------------------------===//
350 defm atomic_load_fmin : binary_atomic_op_all_as<SIatomic_fmin, 0>;
351 defm atomic_load_fmax : binary_atomic_op_all_as<SIatomic_fmax, 0>;
353 //===----------------------------------------------------------------------===//
354 // SDNodes PatFrags for loads/stores with a glue input.
355 // This is for SDNodes and PatFrag for local loads and stores to
356 // enable s_mov_b32 m0, -1 to be glued to the memory instructions.
358 // These mirror the regular load/store PatFrags and rely on special
359 // processing during Select() to add the glued copy.
361 //===----------------------------------------------------------------------===//
363 def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad,
364 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
367 def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
368 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
371 def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
376 def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
378 let IsNonExtLoad = 1;
381 def atomic_load_8_glue : PatFrag<(ops node:$ptr),
382 (AMDGPUatomic_ld_glue node:$ptr)> {
387 def atomic_load_16_glue : PatFrag<(ops node:$ptr),
388 (AMDGPUatomic_ld_glue node:$ptr)> {
393 def atomic_load_32_glue : PatFrag<(ops node:$ptr),
394 (AMDGPUatomic_ld_glue node:$ptr)> {
399 def atomic_load_64_glue : PatFrag<(ops node:$ptr),
400 (AMDGPUatomic_ld_glue node:$ptr)> {
405 def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
407 let IsAnyExtLoad = 1;
410 def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
412 let IsSignExtLoad = 1;
415 def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
417 let IsZeroExtLoad = 1;
420 def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
425 def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
430 def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
435 def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
440 def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
445 def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
451 let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
452 def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
453 let IsNonExtLoad = 1;
456 def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
457 def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>;
458 def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>;
460 def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>;
461 def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>;
462 def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>;
463 } // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces
465 def load_align8_local_m0 : PatFrag<(ops node:$ptr),
466 (load_local_m0 node:$ptr)> {
468 int MinAlignment = 8;
471 def load_align16_local_m0 : PatFrag<(ops node:$ptr),
472 (load_local_m0 node:$ptr)> {
474 int MinAlignment = 16;
477 let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
478 def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr),
479 (atomic_load_8_glue node:$ptr)>;
480 def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr),
481 (atomic_load_16_glue node:$ptr)>;
482 def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
483 (atomic_load_32_glue node:$ptr)>;
484 def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
485 (atomic_load_64_glue node:$ptr)>;
486 } // End let AddressSpaces = LoadAddress_local.AddrSpaces
489 def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore,
490 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
493 def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore,
494 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
497 def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr),
498 (AMDGPUst_glue node:$val, node:$ptr)> {
503 def store_glue : PatFrag<(ops node:$val, node:$ptr),
504 (unindexedstore_glue node:$val, node:$ptr)> {
506 let IsTruncStore = 0;
509 def truncstore_glue : PatFrag<(ops node:$val, node:$ptr),
510 (unindexedstore_glue node:$val, node:$ptr)> {
512 let IsTruncStore = 1;
515 def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr),
516 (truncstore_glue node:$val, node:$ptr)> {
519 let IsTruncStore = 1;
522 def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr),
523 (truncstore_glue node:$val, node:$ptr)> {
526 let IsTruncStore = 1;
529 let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
530 def store_local_m0 : PatFrag<(ops node:$val, node:$ptr),
531 (store_glue node:$val, node:$ptr)>;
532 def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
533 (truncstorei8_glue node:$val, node:$ptr)>;
534 def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
535 (truncstorei16_glue node:$val, node:$ptr)>;
538 def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr),
539 (store_local_m0 node:$value, node:$ptr)>,
544 def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr),
545 (store_local_m0 node:$value, node:$ptr)>,
550 let PredicateCode = [{return cast<MemSDNode>(N)->getAlign() < 4;}],
551 GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}],
552 AddressSpaces = [ AddrSpaces.Local ] in {
553 def load_align_less_than_4_local : PatFrag<(ops node:$ptr),
554 (load_local node:$ptr)> {
556 let IsNonExtLoad = 1;
559 def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr),
560 (load_local_m0 node:$ptr)> {
562 let IsNonExtLoad = 1;
565 def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr),
566 (store_local node:$value, node:$ptr)> {
568 let IsTruncStore = 0;
571 def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr),
572 (store_local_m0 node:$value, node:$ptr)> {
574 let IsTruncStore = 0;
578 def atomic_store_8_glue : PatFrag <
579 (ops node:$ptr, node:$value),
580 (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
585 def atomic_store_16_glue : PatFrag <
586 (ops node:$ptr, node:$value),
587 (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
592 def atomic_store_32_glue : PatFrag <
593 (ops node:$ptr, node:$value),
594 (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
599 def atomic_store_64_glue : PatFrag <
600 (ops node:$ptr, node:$value),
601 (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
606 let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
607 def atomic_store_8_local_m0 : PatFrag<(ops node:$ptr, node:$val),
608 (atomic_store_8_glue node:$ptr, node:$val)>;
609 def atomic_store_16_local_m0 : PatFrag<(ops node:$ptr, node:$val),
610 (atomic_store_16_glue node:$ptr, node:$val)>;
611 def atomic_store_32_local_m0 : PatFrag<(ops node:$ptr, node:$val),
612 (atomic_store_32_glue node:$ptr, node:$val)>;
613 def atomic_store_64_local_m0 : PatFrag<(ops node:$ptr, node:$val),
614 (atomic_store_64_glue node:$ptr, node:$val)>;
615 } // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces
618 //===----------------------------------------------------------------------===//
619 // SDNodes PatFrags for a16 loads and stores with 3 components.
620 // v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory
622 //===----------------------------------------------------------------------===//
624 class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag <
625 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
626 node:$auxiliary, node:$idxen),
627 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
628 node:$auxiliary, node:$idxen)> {
633 class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag <
634 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
635 node:$auxiliary, node:$idxen),
636 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
637 node:$auxiliary, node:$idxen)> {
642 class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag <
643 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
644 node:$format, node:$auxiliary, node:$idxen),
645 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
646 node:$format, node:$auxiliary, node:$idxen)> {
651 class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag <
652 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
653 node:$format, node:$auxiliary, node:$idxen),
654 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
655 node:$format, node:$auxiliary, node:$idxen)> {
660 //===----------------------------------------------------------------------===//
661 // SDNodes PatFrags for d16 loads
662 //===----------------------------------------------------------------------===//
664 class LoadD16Frag <SDPatternOperator op> : PatFrag<
665 (ops node:$ptr, node:$tied_in),
666 (op node:$ptr, node:$tied_in)> {
670 foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
671 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
673 def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>;
675 def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> {
679 def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> {
683 def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>;
685 def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> {
689 def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> {
693 } // End let AddressSpaces = ...
694 } // End foreach AddrSpace
696 def lshr_rev : PatFrag <
697 (ops node:$src1, node:$src0),
701 def ashr_rev : PatFrag <
702 (ops node:$src1, node:$src0),
706 def lshl_rev : PatFrag <
707 (ops node:$src1, node:$src0),
711 def add_ctpop : PatFrag <
712 (ops node:$src0, node:$src1),
713 (add (ctpop $src0), $src1)
717 (ops node:$src0, node:$src1),
718 (not (xor $src0, $src1))
722 def shl#I#_add : PatFrag <
723 (ops node:$src0, node:$src1),
724 (add (shl_oneuse $src0, (i32 I)), $src1)> {
725 // FIXME: Poor substitute for disabling pattern in SelectionDAG
726 let PredicateCode = [{return false;}];
727 let GISelPredicateCode = [{return true;}];
731 multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
732 SDTypeProfile tc = SDTAtomic2,
736 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc,
737 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
740 let AddressSpaces = StoreAddress_local.AddrSpaces in {
741 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
742 defm _local_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"),
746 let AddressSpaces = StoreAddress_region.AddrSpaces in {
747 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
748 defm _region_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"),
753 defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
754 defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
755 defm atomic_load_uinc_wrap : SIAtomicM0Glue2 <"LOAD_UINC_WRAP">;
756 defm atomic_load_udec_wrap : SIAtomicM0Glue2 <"LOAD_UDEC_WRAP">;
757 defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
758 defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
759 defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
760 defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
761 defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
762 defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
763 defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
764 defm atomic_swap : SIAtomicM0Glue2 <"SWAP">;
765 defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>;
766 defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>;
767 defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>;
769 def as_i1timm : SDNodeXForm<timm, [{
770 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
773 def as_i8imm : SDNodeXForm<imm, [{
774 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
777 def as_i8timm : SDNodeXForm<timm, [{
778 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
781 def as_i16imm : SDNodeXForm<imm, [{
782 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
785 def as_i16timm : SDNodeXForm<timm, [{
786 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
789 def as_i32imm: SDNodeXForm<imm, [{
790 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
793 def as_i32timm: SDNodeXForm<timm, [{
794 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
797 def as_i64imm: SDNodeXForm<imm, [{
798 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
801 def cond_as_i32imm: SDNodeXForm<cond, [{
802 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32);
805 // Copied from the AArch64 backend:
806 def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
807 return CurDAG->getTargetConstant(
808 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
811 def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{
812 auto FI = cast<FrameIndexSDNode>(N);
813 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32);
816 // Copied from the AArch64 backend:
817 def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
818 return CurDAG->getTargetConstant(
819 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
822 class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
823 uint64_t Imm = N->getZExtValue();
824 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
825 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
828 def SIMM16bit : ImmLeaf <i32,
829 [{return isInt<16>(Imm);}]
832 def UIMM16bit : ImmLeaf <i32,
833 [{return isUInt<16>(Imm);}]
836 def i64imm_32bit : ImmLeaf<i64, [{
837 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
840 def InlineImm16 : ImmLeaf<i16, [{
841 return isInlineImmediate16(Imm);
844 def InlineImm32 : ImmLeaf<i32, [{
845 return isInlineImmediate32(Imm);
848 def InlineImm64 : ImmLeaf<i64, [{
849 return isInlineImmediate64(Imm);
852 def InlineImmFP32 : FPImmLeaf<f32, [{
853 return isInlineImmediate(Imm);
856 def InlineImmFP64 : FPImmLeaf<f64, [{
857 return isInlineImmediate(Imm);
861 class VGPRImm <dag frag> : PatLeaf<frag, [{
865 def NegateImm : SDNodeXForm<imm, [{
866 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
869 // TODO: When FP inline imm values work?
870 def NegSubInlineConst32 : ImmLeaf<i32, [{
871 return Imm < -16 && Imm >= -64;
874 def NegSubInlineIntConst16 : ImmLeaf<i16, [{
875 return Imm < -16 && Imm >= -64;
878 def ShiftAmt32Imm : ImmLeaf <i32, [{
882 def getNegV2I16Imm : SDNodeXForm<build_vector, [{
883 return SDValue(packNegConstantV2I16(N, *CurDAG), 0);
886 def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
887 assert(N->getNumOperands() == 2);
888 assert(N->getOperand(0).getValueType().getSizeInBits() == 16);
889 SDValue Src0 = N->getOperand(0);
890 SDValue Src1 = N->getOperand(1);
892 return isNegInlineImmediate(Src0.getNode());
894 return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) ||
895 (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode()));
899 def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{
900 return fp16SrcZerosHighBits(N->getOpcode());
904 //===----------------------------------------------------------------------===//
905 // MUBUF/SMEM Patterns
906 //===----------------------------------------------------------------------===//
908 def extract_cpol : SDNodeXForm<timm, [{
909 return CurDAG->getTargetConstant(N->getZExtValue() & AMDGPU::CPol::ALL, SDLoc(N), MVT::i8);
912 def extract_swz : SDNodeXForm<timm, [{
913 return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8);
916 def set_glc : SDNodeXForm<timm, [{
917 return CurDAG->getTargetConstant(N->getZExtValue() | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8);
920 //===----------------------------------------------------------------------===//
922 //===----------------------------------------------------------------------===//
924 def SOPPBrTarget : CustomOperand<OtherVT> {
925 let PrintMethod = "printOperand";
926 let EncoderMethod = "getSOPPBrEncoding";
927 let DecoderMethod = "decodeSOPPBrTarget";
928 let OperandType = "OPERAND_PCREL";
931 def si_ga : Operand<iPTR>;
933 def InterpSlot : CustomOperand<i32>;
935 // It appears to be necessary to create a separate operand for this to
936 // be able to parse attr<num> with no space.
937 def InterpAttr : CustomOperand<i32>;
939 def InterpAttrChan : ImmOperand<i32>;
941 def VReg32OrOffClass : AsmOperandClass {
942 let Name = "VReg32OrOff";
943 let ParserMethod = "parseVReg32OrOff";
946 def SendMsg : CustomOperand<i32>;
948 def Swizzle : CustomOperand<i16, 1>;
950 def Endpgm : CustomOperand<i16, 1>;
952 def SWaitCnt : CustomOperand<i32>;
954 def DepCtr : CustomOperand<i32>;
956 def SDelayALU : CustomOperand<i32>;
958 include "SIInstrFormats.td"
959 include "VIInstrFormats.td"
961 def BoolReg : AsmOperandClass {
962 let Name = "BoolReg";
963 let ParserMethod = "parseBoolReg";
964 let RenderMethod = "addRegOperands";
967 class BoolRC : RegisterOperand<SReg_1> {
968 let ParserMatchClass = BoolReg;
969 let DecoderMethod = "decodeBoolReg";
972 def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
973 let ParserMatchClass = BoolReg;
974 let DecoderMethod = "decodeBoolReg";
977 def VOPDstS64orS32 : BoolRC {
978 let PrintMethod = "printVOPDst";
981 // SCSrc_i1 is the operand for pseudo instructions only.
982 // Boolean immediates shall not be exposed to codegen instructions.
983 def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
984 let OperandNamespace = "AMDGPU";
985 let OperandType = "OPERAND_REG_IMM_INT32";
986 let ParserMatchClass = BoolReg;
987 let DecoderMethod = "decodeBoolReg";
990 // ===----------------------------------------------------------------------===//
991 // ExpSrc* Special cases for exp src operands which are printed as
992 // "off" depending on en operand.
993 // ===----------------------------------------------------------------------===//
995 def ExpSrc0 : RegisterOperand<VGPR_32> {
996 let PrintMethod = "printExpSrc0";
997 let ParserMatchClass = VReg32OrOffClass;
1000 def ExpSrc1 : RegisterOperand<VGPR_32> {
1001 let PrintMethod = "printExpSrc1";
1002 let ParserMatchClass = VReg32OrOffClass;
1005 def ExpSrc2 : RegisterOperand<VGPR_32> {
1006 let PrintMethod = "printExpSrc2";
1007 let ParserMatchClass = VReg32OrOffClass;
1010 def ExpSrc3 : RegisterOperand<VGPR_32> {
1011 let PrintMethod = "printExpSrc3";
1012 let ParserMatchClass = VReg32OrOffClass;
1015 class SDWASrc<ValueType vt> : RegisterOperand<VS_32> {
1016 let OperandNamespace = "AMDGPU";
1017 string Type = !if(isFloatType<vt>.ret, "FP", "INT");
1018 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size;
1019 let DecoderMethod = "decodeSDWASrc"#vt.Size;
1020 let EncoderMethod = "getSDWASrcEncoding";
1023 def SDWASrc_i32 : SDWASrc<i32>;
1024 def SDWASrc_i16 : SDWASrc<i16>;
1025 def SDWASrc_f32 : SDWASrc<f32>;
1026 def SDWASrc_f16 : SDWASrc<f16>;
1028 def SDWAVopcDst : BoolRC {
1029 let OperandNamespace = "AMDGPU";
1030 let OperandType = "OPERAND_SDWA_VOPC_DST";
1031 let EncoderMethod = "getSDWAVopcDstEncoding";
1032 let DecoderMethod = "decodeSDWAVopcDst";
1033 let PrintMethod = "printVOPDst";
1036 class NamedIntOperand<ValueType Type, string Prefix, string Name = NAME,
1037 string ConvertMethod = "nullptr">
1038 : CustomOperand<Type, 1, Name> {
1040 "[this](OperandVector &Operands) -> ParseStatus { "#
1041 "return parseIntWithPrefix(\""#Prefix#"\", Operands, "#
1042 "AMDGPUOperand::"#ImmTy#", "#ConvertMethod#"); }";
1045 class NamedBitOperand<string Id, string Name = NAME>
1046 : CustomOperand<i1, 1, Name> {
1048 "[this](OperandVector &Operands) -> ParseStatus { "#
1049 "return parseNamedBit(\""#Id#"\", Operands, AMDGPUOperand::"#ImmTy#"); }";
1052 class DefaultOperand<CustomOperand Op, int Value>
1053 : OperandWithDefaultOps<Op.Type, (ops (Op.Type Value))>,
1054 CustomOperandProps<1, Op.ParserMatchClass.Name> {
1055 let ParserMethod = Op.ParserMatchClass.ParserMethod;
1058 class SDWAOperand<string Id, string Name = NAME>
1059 : CustomOperand<i32, 1, Name> {
1061 "[this](OperandVector &Operands) -> ParseStatus { "#
1062 "return parseSDWASel(Operands, \""#Id#"\", AMDGPUOperand::"#ImmTy#"); }";
1065 class ArrayOperand0<string Id, string Name = NAME>
1066 : OperandWithDefaultOps<i32, (ops (i32 0))>,
1067 CustomOperandProps<1, Name> {
1069 "[this](OperandVector &Operands) -> ParseStatus { "#
1070 "return parseOperandArrayWithPrefix(\""#Id#"\", Operands, "#
1071 "AMDGPUOperand::"#ImmTy#"); }";
1074 let ImmTy = "ImmTyOffset" in
1075 def flat_offset : CustomOperand<i32, 1, "FlatOffset">;
1076 def offset : NamedIntOperand<i32, "offset", "Offset">;
1077 def offset0 : NamedIntOperand<i8, "offset0", "Offset0">;
1078 def offset1 : NamedIntOperand<i8, "offset1", "Offset1">;
1080 def gds : NamedBitOperand<"gds", "GDS">;
1082 def omod : CustomOperand<i32, 1, "OModSI">;
1083 def omod0 : DefaultOperand<omod, 0>;
1085 // We need to make the cases with a default of 0 distinct from no
1086 // default to help deal with some cases where the operand appears
1087 // before a mandatory operand.
1088 def clampmod : NamedBitOperand<"clamp", "ClampSI">;
1089 def clampmod0 : DefaultOperand<clampmod, 0>;
1090 def highmod : NamedBitOperand<"high", "High">;
1092 def CPol : CustomOperand<i32, 1>;
1093 def CPol_0 : DefaultOperand<CPol, 0>;
1094 def CPol_GLC1 : DefaultOperand<CPol, 1>;
1096 def TFE : NamedBitOperand<"tfe">;
1097 def UNorm : NamedBitOperand<"unorm">;
1098 def DA : NamedBitOperand<"da">;
1099 def R128A16 : CustomOperand<i1, 1>;
1100 def A16 : NamedBitOperand<"a16">;
1101 def D16 : NamedBitOperand<"d16">;
1102 def LWE : NamedBitOperand<"lwe">;
1103 def exp_compr : NamedBitOperand<"compr", "ExpCompr">;
1104 def exp_vm : NamedBitOperand<"vm", "ExpVM">;
1106 def FORMAT : CustomOperand<i8>;
1108 def DMask : NamedIntOperand<i16, "dmask">;
1109 def Dim : CustomOperand<i8>;
1111 def dst_sel : SDWAOperand<"dst_sel", "SDWADstSel">;
1112 def src0_sel : SDWAOperand<"src0_sel", "SDWASrc0Sel">;
1113 def src1_sel : SDWAOperand<"src1_sel", "SDWASrc1Sel">;
1114 def dst_unused : CustomOperand<i32, 1, "SDWADstUnused">;
1116 def op_sel0 : ArrayOperand0<"op_sel", "OpSel">;
1117 def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">;
1118 def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">;
1119 def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">;
1121 def dpp8 : CustomOperand<i32, 0, "DPP8">;
1122 def dpp_ctrl : CustomOperand<i32, 0, "DPPCtrl">;
1124 let DefaultValue = "0xf" in {
1125 def row_mask : NamedIntOperand<i32, "row_mask", "DppRowMask">;
1126 def bank_mask : NamedIntOperand<i32, "bank_mask", "DppBankMask">;
1128 def bound_ctrl : NamedIntOperand<i1, "bound_ctrl", "DppBoundCtrl",
1129 "[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }">;
1130 def FI : NamedIntOperand<i32, "fi", "DppFI">;
1132 def blgp : CustomOperand<i32, 1, "BLGP">;
1133 def cbsz : NamedIntOperand<i32, "cbsz", "CBSZ">;
1134 def abid : NamedIntOperand<i32, "abid", "ABID">;
1136 def hwreg : CustomOperand<i32, 0, "Hwreg">;
1138 def exp_tgt : CustomOperand<i32, 0, "ExpTgt">;
1140 def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">;
1141 def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">;
1143 class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
1144 let OperandNamespace = "AMDGPU";
1145 let OperandType = "OPERAND_KIMM"#vt.Size;
1146 let PrintMethod = "printU"#vt.Size#"ImmOperand";
1147 let DecoderMethod = "decodeOperand_KImmFP";
1150 // 32-bit VALU immediate operand that uses the constant bus.
1151 def KImmFP32 : KImmFPOperand<i32>;
1153 // 32-bit VALU immediate operand with a 16-bit value that uses the
1155 def KImmFP16 : KImmFPOperand<i16>;
1157 class FPInputModsMatchClass <int opSize> : AsmOperandClass {
1158 let Name = "RegOrImmWithFP"#opSize#"InputMods";
1159 let ParserMethod = "parseRegOrImmWithFPInputMods";
1160 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
1163 class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> {
1164 let Name = "RegOrInlineImmWithFP"#opSize#"InputMods";
1165 let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods";
1168 def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
1169 def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
1170 def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
1172 def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>;
1173 def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>;
1175 class InputMods <AsmOperandClass matchClass> : Operand <i32> {
1176 let OperandNamespace = "AMDGPU";
1177 let OperandType = "OPERAND_INPUT_MODS";
1178 let ParserMatchClass = matchClass;
1181 class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
1182 let PrintMethod = "printOperandAndFPInputMods";
1185 def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
1186 def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
1187 def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
1189 def FP16VCSrcInputMods : FPInputMods<FP16VCSrcInputModsMatchClass>;
1190 def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;
1192 class IntInputModsMatchClass <int opSize> : AsmOperandClass {
1193 let Name = "RegOrImmWithInt"#opSize#"InputMods";
1194 let ParserMethod = "parseRegOrImmWithIntInputMods";
1195 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods";
1197 class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> {
1198 let Name = "RegOrInlineImmWithInt"#opSize#"InputMods";
1199 let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods";
1201 def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
1202 def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
1203 def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>;
1205 class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
1206 let PrintMethod = "printOperandAndIntInputMods";
1208 def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
1209 def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
1210 def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>;
1212 class OpSelModsMatchClass : AsmOperandClass {
1213 let Name = "OpSelMods";
1214 let ParserMethod = "parseRegOrImm";
1215 let PredicateMethod = "isRegOrImm";
1218 def IntOpSelModsMatchClass : OpSelModsMatchClass;
1219 def IntOpSelMods : InputMods<IntOpSelModsMatchClass>;
1221 class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1222 let Name = "SDWAWithFP"#opSize#"InputMods";
1223 let ParserMethod = "parseRegOrImmWithFPInputMods";
1224 let PredicateMethod = "isSDWAFP"#opSize#"Operand";
1227 def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>;
1228 def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>;
1230 class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> :
1231 InputMods <matchClass> {
1232 let PrintMethod = "printOperandAndFPInputMods";
1235 def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>;
1236 def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>;
1238 def FPVRegInputModsMatchClass : AsmOperandClass {
1239 let Name = "VRegWithFPInputMods";
1240 let ParserMethod = "parseRegWithFPInputMods";
1241 let PredicateMethod = "isVRegWithInputMods";
1244 def FPT16VRegInputModsMatchClass : AsmOperandClass {
1245 let Name = "T16VRegWithFPInputMods";
1246 let ParserMethod = "parseRegWithFPInputMods";
1247 let PredicateMethod = "isT16VRegWithInputMods";
1250 def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
1251 let PrintMethod = "printOperandAndFPInputMods";
1254 def FPT16VRegInputMods : InputMods <FPT16VRegInputModsMatchClass> {
1255 let PrintMethod = "printOperandAndFPInputMods";
1258 class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1259 let Name = "SDWAWithInt"#opSize#"InputMods";
1260 let ParserMethod = "parseRegOrImmWithIntInputMods";
1261 let PredicateMethod = "isSDWAInt"#opSize#"Operand";
1264 def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>;
1265 def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>;
1266 def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> {
1267 let Name = "SDWAWithBin32InputMods";
1268 let ParserMethod = "parseRegOrImm";
1271 class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
1272 InputMods <matchClass> {
1273 let PrintMethod = "printOperandAndIntInputMods";
1276 def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>;
1277 def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
1278 def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>;
1280 def IntVRegInputModsMatchClass : AsmOperandClass {
1281 let Name = "VRegWithIntInputMods";
1282 let ParserMethod = "parseRegWithIntInputMods";
1283 let PredicateMethod = "isVRegWithInputMods";
1286 def IntT16VRegInputModsMatchClass : AsmOperandClass {
1287 let Name = "T16VRegWithIntInputMods";
1288 let ParserMethod = "parseRegWithIntInputMods";
1289 let PredicateMethod = "isT16VRegWithInputMods";
1292 def IntT16VRegInputMods : InputMods <IntT16VRegInputModsMatchClass> {
1293 let PrintMethod = "printOperandAndIntInputMods";
1296 def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
1297 let PrintMethod = "printOperandAndIntInputMods";
1300 class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
1301 let Name = "PackedFP"#opSize#"InputMods";
1302 let ParserMethod = "parseRegOrImm";
1303 let PredicateMethod = "isRegOrImm";
1304 // let PredicateMethod = "isPackedFP"#opSize#"InputMods";
1307 class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
1308 let Name = "PackedInt"#opSize#"InputMods";
1309 let ParserMethod = "parseRegOrImm";
1310 let PredicateMethod = "isRegOrImm";
1311 // let PredicateMethod = "isPackedInt"#opSize#"InputMods";
1314 def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
1315 def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
1317 class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
1318 // let PrintMethod = "printPackedFPInputMods";
1321 class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
1322 //let PrintMethod = "printPackedIntInputMods";
1325 def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
1326 def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
1328 //===----------------------------------------------------------------------===//
1330 //===----------------------------------------------------------------------===//
1332 def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">;
1333 def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">;
1334 def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">;
1336 def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">;
1338 def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
1339 def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
1340 def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
1342 def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
1344 def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
1346 def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
1347 def DotIUVOP3PMods : ComplexPattern<untyped, 1, "SelectDotIUVOP3PMods">;
1348 def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">;
1350 def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
1352 def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
1354 def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">;
1355 def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
1357 def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">;
1358 def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">;
1360 //===----------------------------------------------------------------------===//
1361 // SI assembler operands
1362 //===----------------------------------------------------------------------===//
1367 int FLAT_SCR = 0x68;
1370 // This should be kept in sync with SISrcMods enum
1405 int FLAT_SCR_LO = 20;
1406 int FLAT_SCR_HI = 21;
1407 int XNACK_MASK = 22;
1408 int POPS_PACKER = 25;
1409 int SHADER_CYCLES = 29;
1412 class getHwRegImm<int Reg, int Offset = 0, int Size = 32> {
1413 int ret = !and(!or(Reg,
1415 !shl(!add(Size, -1), 11)), 65535);
1418 //===----------------------------------------------------------------------===//
1420 // SI Instruction multiclass helpers.
1422 // Instructions with _32 take 32-bit operands.
1423 // Instructions with _64 take 64-bit operands.
1425 // VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
1426 // encoding is the standard encoding, but instruction that make use of
1427 // any of the instruction modifiers must use the 64-bit encoding.
1429 // Instructions with _e32 use the 32-bit encoding.
1430 // Instructions with _e64 use the 64-bit encoding.
1432 //===----------------------------------------------------------------------===//
1434 class SIMCInstr <string pseudo, int subtarget> {
1435 string PseudoInstr = pseudo;
1436 int Subtarget = subtarget;
1439 //===----------------------------------------------------------------------===//
1440 // Vector ALU classes
1441 //===----------------------------------------------------------------------===//
1443 class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
1445 !if (!eq(Src0.Value, untyped.Value), 0,
1446 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1
1447 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2
1451 // Returns the register class to use for the destination of VOP[123C]
1452 // instructions for the given VT.
1453 class getVALUDstForVT<ValueType VT> {
1454 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
1455 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
1456 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
1457 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
1458 VOPDstS64orS32)))); // else VT == i1
1461 class getVALUDstForVT_t16<ValueType VT> {
1462 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
1463 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
1464 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
1465 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32_Lo128>,
1466 VOPDstS64orS32)))); // else VT == i1
1469 // Returns the register class to use for the destination of VOP[12C]
1470 // instructions with SDWA extension
1471 class getSDWADstForVT<ValueType VT> {
1472 RegisterOperand ret = !if(!eq(VT.Size, 1),
1473 SDWAVopcDst, // VOPC
1474 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst
1477 // Returns the register class to use for source 0 of VOP[12C]
1478 // instructions for the given VT.
1479 class getVOPSrc0ForVT<ValueType VT, bit IsTrue16> {
1480 bit isFP = isFloatType<VT>.ret;
1482 RegisterOperand ret =
1484 !if(!eq(VT.Size, 64),
1486 !if(!eq(VT.Value, f16.Value),
1491 !if(!eq(VT.Value, v2f16.Value),
1493 !if(!eq(VT.Value, v4f16.Value),
1500 !if(!eq(VT.Size, 64),
1502 !if(!eq(VT.Value, i16.Value),
1507 !if(!eq(VT.Value, v2i16.Value),
1516 class getSOPSrcForVT<ValueType VT> {
1517 RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32);
1520 // Returns the vreg register class to use for source operand given VT
1521 class getVregSrcForVT<ValueType VT> {
1522 RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
1523 !if(!eq(VT.Size, 96), VReg_96,
1524 !if(!eq(VT.Size, 64), VReg_64,
1525 !if(!eq(VT.Size, 48), VReg_64,
1529 class getVregSrcForVT_t16<ValueType VT> {
1530 RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
1531 !if(!eq(VT.Size, 96), VReg_96,
1532 !if(!eq(VT.Size, 64), VReg_64,
1533 !if(!eq(VT.Size, 48), VReg_64,
1534 !if(!eq(VT.Size, 16), VGPR_32_Lo128,
1538 class getSDWASrcForVT <ValueType VT> {
1539 bit isFP = isFloatType<VT>.ret;
1540 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
1541 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
1542 RegisterOperand ret = !if(isFP, retFlt, retInt);
1545 // Returns the register class to use for sources of VOP3 instructions for the
1547 class getVOP3SrcForVT<ValueType VT> {
1548 bit isFP = isFloatType<VT>.ret;
1549 RegisterOperand ret =
1550 !if(!eq(VT.Size, 128),
1552 !if(!eq(VT.Size, 64),
1554 !if(!eq(VT.Value, v2f32.Value),
1557 !if(!eq(VT.Value, v2i32.Value),
1560 !if(!eq(VT.Value, i1.Value),
1563 !if(!eq(VT.Value, f16.Value),
1565 !if(!eq(VT.Value, v2f16.Value),
1567 !if(!eq(VT.Value, v4f16.Value),
1573 !if(!eq(VT.Value, i16.Value),
1575 !if(!eq(VT.Value, v2i16.Value),
1586 // Src2 of VOP3 DPP instructions cannot be a literal
1587 class getVOP3DPPSrcForVT<ValueType VT> {
1588 bit isFP = isFloatType<VT>.ret;
1589 RegisterOperand ret =
1590 !if (!eq(VT.Value, i1.Value), SSrc_i1,
1592 !if (!eq(VT.Value, f16.Value), VCSrc_f16,
1593 !if (!eq(VT.Value, v2f16.Value), VCSrc_v2f16, VCSrc_f32)),
1594 !if (!eq(VT.Value, i16.Value), VCSrc_b16,
1595 !if (!eq(VT.Value, v2i16.Value), VCSrc_v2b16,
1599 // Float or packed int
1600 class isModifierType<ValueType SrcVT> {
1601 bit ret = !or(!eq(SrcVT.Value, f16.Value),
1602 !eq(SrcVT.Value, f32.Value),
1603 !eq(SrcVT.Value, f64.Value),
1604 !eq(SrcVT.Value, v2f16.Value),
1605 !eq(SrcVT.Value, v2i16.Value),
1606 !eq(SrcVT.Value, v2f32.Value),
1607 !eq(SrcVT.Value, v2i32.Value),
1608 !eq(SrcVT.Value, v4f16.Value),
1609 !eq(SrcVT.Value, v4i16.Value),
1610 !eq(SrcVT.Value, v4f32.Value),
1611 !eq(SrcVT.Value, v4i32.Value),
1612 !eq(SrcVT.Value, v8f16.Value),
1613 !eq(SrcVT.Value, v8i16.Value),
1614 !eq(SrcVT.Value, v8f32.Value),
1615 !eq(SrcVT.Value, v8i32.Value),
1616 !eq(SrcVT.Value, v16f16.Value),
1617 !eq(SrcVT.Value, v16i16.Value));
1620 // Return type of input modifiers operand for specified input operand
1621 class getSrcMod <ValueType VT> {
1622 bit isFP = isFloatType<VT>.ret;
1623 bit isPacked = isPackedType<VT>.ret;
1624 Operand ret = !if(!eq(VT.Size, 64),
1625 !if(isFP, FP64InputMods, Int64InputMods),
1627 !if(!eq(VT.Value, f16.Value),
1635 class getOpSelMod <ValueType VT> {
1636 Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods);
1639 // Return type of input modifiers operand specified input operand for DPP
1640 class getSrcModDPP <ValueType VT> {
1641 bit isFP = isFloatType<VT>.ret;
1642 Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
1645 class getSrcModDPP_t16 <ValueType VT> {
1646 bit isFP = isFloatType<VT>.ret;
1649 !if (!eq(VT.Value, f16.Value), FPT16VRegInputMods,
1651 !if (!eq(VT.Value, i16.Value), IntT16VRegInputMods,
1655 // Return type of input modifiers operand for specified input operand for DPP
1656 class getSrcModVOP3DPP <ValueType VT> {
1657 bit isFP = isFloatType<VT>.ret;
1658 bit isPacked = isPackedType<VT>.ret;
1661 !if (!eq(VT.Value, f16.Value), FP16VCSrcInputMods,
1662 FP32VCSrcInputMods),
1663 Int32VCSrcInputMods);
1666 // Return type of input modifiers operand specified input operand for SDWA
1667 class getSrcModSDWA <ValueType VT> {
1668 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods,
1669 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods,
1670 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods,
1671 Int32SDWAInputMods)));
1674 // Returns the input arguments for VOP[12C] instructions for the given SrcVT.
1675 class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> {
1676 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1
1677 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
1681 // Returns the input arguments for VOP3 instructions for the given SrcVT.
1682 class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
1683 RegisterOperand Src2RC, int NumSrcArgs,
1684 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
1685 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1688 !if (!eq(NumSrcArgs, 0),
1689 // VOP1 without input operands (V_NOP, V_CLREXCP)
1692 !if (!eq(NumSrcArgs, 1),
1694 // VOP1 with modifiers
1696 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1697 clampmod0:$clamp, omod0:$omod),
1698 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1701 // VOP1 without modifiers
1703 (ins Src0RC:$src0, clampmod0:$clamp),
1706 !if (!eq(NumSrcArgs, 2),
1708 // VOP 2 with modifiers
1710 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1711 Src1Mod:$src1_modifiers, Src1RC:$src1,
1712 clampmod0:$clamp, omod0:$omod),
1713 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1714 Src1Mod:$src1_modifiers, Src1RC:$src1,
1717 // VOP2 without modifiers
1719 (ins Src0RC:$src0, Src1RC:$src1, clampmod0:$clamp),
1720 (ins Src0RC:$src0, Src1RC:$src1))
1723 /* NumSrcArgs == 3 */,
1726 // VOP3 with modifiers
1728 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1729 Src1Mod:$src1_modifiers, Src1RC:$src1,
1730 Src2Mod:$src2_modifiers, Src2RC:$src2,
1731 clampmod0:$clamp, omod0:$omod),
1733 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1734 Src1Mod:$src1_modifiers, Src1RC:$src1,
1735 Src2Mod:$src2_modifiers, Src2RC:$src2,
1737 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1738 Src1Mod:$src1_modifiers, Src1RC:$src1,
1739 Src2Mod:$src2_modifiers, Src2RC:$src2))),
1740 // VOP3 with modifiers except src2
1742 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1743 Src1Mod:$src1_modifiers, Src1RC:$src1,
1744 Src2RC:$src2, clampmod0:$clamp, omod0:$omod),
1746 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1747 Src1Mod:$src1_modifiers, Src1RC:$src1,
1748 Src2RC:$src2, clampmod0:$clamp),
1749 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1750 Src1Mod:$src1_modifiers, Src1RC:$src1,
1753 // VOP3 without modifiers
1755 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod0:$clamp),
1756 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2))
1760 class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
1761 RegisterOperand Src2RC, int NumSrcArgs,
1762 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
1763 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel,
1765 // getInst64 handles clamp and omod. implicit mutex between vop3p and omod
1766 dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs,
1767 HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
1768 Src0Mod, Src1Mod, Src2Mod>.ret;
1769 dag opsel = (ins op_sel0:$op_sel);
1770 dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
1771 dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi));
1773 dag ret = !con(base,
1774 !if(HasOpSel, opsel,(ins)),
1775 !if(IsVOP3P, vop3pFields,(ins)));
1778 class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
1779 RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
1780 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1781 dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
1782 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
1783 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod,
1784 HasOpSel, 1/*IsVOP3P*/>.ret;
1787 class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
1788 RegisterOperand Src2RC, int NumSrcArgs,
1789 bit HasClamp, bit HasOMod,
1790 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1791 dag ret = getInsVOP3Base<Src0RC, Src1RC,
1793 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod,
1794 Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/, 0>.ret;
1797 class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
1798 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
1799 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> {
1801 dag ret = !if(!eq(NumSrcArgs, 0),
1802 // VOP1 without input operands (V_NOP)
1805 !if(HasOld ,(ins OldRC:$old), (ins)),
1806 !if (!eq(NumSrcArgs, 1),
1808 // VOP1_DPP with modifiers
1809 (ins Src0Mod:$src0_modifiers, Src0RC:$src0)
1811 // VOP1_DPP without modifiers
1814 !if (!eq(NumSrcArgs, 2),
1816 // VOP2_DPP with modifiers
1817 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1818 Src1Mod:$src1_modifiers, Src1RC:$src1)
1820 // VOP2_DPP without modifiers
1821 (ins Src0RC:$src0, Src1RC:$src1)
1823 /* NumSrcArgs == 3, VOP3 */,
1825 // VOP3_DPP with modifiers
1826 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1827 Src1Mod:$src1_modifiers, Src1RC:$src1,
1828 Src2Mod:$src2_modifiers, Src2RC:$src2)
1830 // VOP3_DPP without modifiers
1831 (ins Src0RC:$src0, Src1RC:$src1,
1840 class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
1841 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
1842 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
1843 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
1844 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
1845 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1846 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
1849 class getInsDPP16 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
1850 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
1851 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
1852 dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
1853 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
1857 class getInsDPP8 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
1858 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
1859 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
1860 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
1861 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
1862 (ins dpp8:$dpp8, FI:$fi));
1865 class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> {
1866 dag old = ( ins OldRC:$old );
1867 dag base = VOP3Base;
1869 !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)),
1874 class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
1875 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
1876 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1877 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
1880 class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
1881 dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
1885 class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
1886 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
1887 (ins dpp8:$dpp8, FI:$fi));
1891 class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
1892 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
1895 dag ret = !if(!eq(NumSrcArgs, 0),
1896 // VOP1 without input operands (V_NOP)
1898 !if(!eq(NumSrcArgs, 1),
1900 !if(!not(HasSDWAOMod),
1901 // VOP1_SDWA without omod
1902 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1904 dst_sel:$dst_sel, dst_unused:$dst_unused,
1905 src0_sel:$src0_sel),
1906 // VOP1_SDWA with omod
1907 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1908 clampmod:$clamp, omod:$omod,
1909 dst_sel:$dst_sel, dst_unused:$dst_unused,
1910 src0_sel:$src0_sel)),
1911 !if(!eq(NumSrcArgs, 2),
1912 !if(!eq(DstVT.Size, 1),
1914 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1915 Src1Mod:$src1_modifiers, Src1RC:$src1,
1916 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
1918 !if(!not(HasSDWAOMod),
1919 // VOP2_SDWA without omod
1920 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1921 Src1Mod:$src1_modifiers, Src1RC:$src1,
1923 dst_sel:$dst_sel, dst_unused:$dst_unused,
1924 src0_sel:$src0_sel, src1_sel:$src1_sel),
1925 // VOP2_SDWA with omod
1926 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1927 Src1Mod:$src1_modifiers, Src1RC:$src1,
1928 clampmod:$clamp, omod:$omod,
1929 dst_sel:$dst_sel, dst_unused:$dst_unused,
1930 src0_sel:$src0_sel, src1_sel:$src1_sel))),
1931 (ins)/* endif */)));
1935 class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
1936 dag ret = !if(HasDst,
1937 !if(!eq(DstVT.Size, 1),
1938 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions
1939 (outs DstRCDPP:$vdst)),
1944 class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> {
1945 dag ret = !if(HasDst,
1946 !if(!eq(DstVT.Size, 1),
1947 (outs DstRCSDWA:$sdst),
1948 (outs DstRCSDWA:$vdst)),
1952 // Returns the assembly string for the inputs and outputs of a VOP[12C]
1954 class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
1955 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
1956 string src0 = ", $src0";
1957 string src1 = ", $src1";
1958 string src2 = ", $src2";
1959 string ret = !if(HasDst, dst, "") #
1960 !if(!eq(NumSrcArgs, 1), src0, "") #
1961 !if(!eq(NumSrcArgs, 2), src0#src1, "") #
1962 !if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
1965 class getAsmVOPDPart <int NumSrcArgs, string XorY> {
1966 string dst = "$vdst" # XorY;
1967 string src0 = ", $src0" # XorY;
1968 string src1 = ", $vsrc1" # XorY;
1970 !if(!ge(NumSrcArgs, 1), src0, "") #
1971 !if(!ge(NumSrcArgs, 2), src1, "");
1974 // Returns the assembly string for the inputs and outputs of a VOP3P
1976 class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
1977 bit HasClamp, bit HasOpSel> {
1978 string dst = "$vdst";
1979 string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
1980 string src1 = !if(!eq(NumSrcArgs, 1), "",
1981 !if(!eq(NumSrcArgs, 2), " $src1",
1983 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
1985 string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
1986 string clamp = !if(HasClamp, "$clamp", "");
1987 string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", "");
1989 // Each modifier is printed as an array of bits for each operand, so
1990 // all operands are printed as part of src0_modifiers.
1991 string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
1994 class getAsmVOP3OpSel <int NumSrcArgs,
2000 string dst = "$vdst";
2002 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
2003 string isrc1 = !if(!eq(NumSrcArgs, 1), "",
2004 !if(!eq(NumSrcArgs, 2), " $src1",
2006 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
2008 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2009 string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
2010 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2011 " $src1_modifiers,"));
2012 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
2014 string src0 = !if(Src0HasMods, fsrc0, isrc0);
2015 string src1 = !if(Src1HasMods, fsrc1, isrc1);
2016 string src2 = !if(Src2HasMods, fsrc2, isrc2);
2018 string clamp = !if(HasClamp, "$clamp", "");
2019 string omod = !if(HasOMod, "$omod", "");
2020 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod;
2023 class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
2024 string dst = !if(HasDst,
2025 !if(!eq(DstVT.Size, 1),
2028 ""); // use $sdst for VOPC
2029 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2030 string src1 = !if(!eq(NumSrcArgs, 1), "",
2031 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2032 " $src1_modifiers,"));
2033 string args = !if(!not(HasModifiers),
2034 getAsm32<0, NumSrcArgs, DstVT>.ret,
2036 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
2039 class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
2040 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi";
2043 class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32>
2044 : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{
2045 let ret = dst#args#" $dpp8$fi";
2048 class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
2049 bit HasOpSel, bit HasOMod, bit IsVOP3P,
2050 bit HasModifiers, bit Src0HasMods,
2051 bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32> {
2052 string dst = !if(HasDst,
2053 !if(!eq(DstVT.Size, 1),
2056 ""); // use $sdst for VOPC
2057 string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
2058 string src1nomods = !if(!eq(NumSrcArgs, 1), "",
2059 !if(!eq(NumSrcArgs, 2), " $src1",
2061 string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", "");
2063 string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2064 string src1mods = !if(!eq(NumSrcArgs, 1), "",
2065 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2066 " $src1_modifiers,"));
2067 string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
2069 string src0 = !if(Src0HasMods, src0mods, src0nomods);
2070 string src1 = !if(Src1HasMods, src1mods, src1nomods);
2071 string src2 = !if(Src2HasMods, src2mods, src2nomods);
2072 string opsel = !if(HasOpSel, "$op_sel", "");
2073 string 3PMods = !if(IsVOP3P,
2074 !if(HasOpSel, "$op_sel_hi", "")
2075 #!if(HasModifiers, "$neg_lo$neg_hi", ""),
2077 string clamp = !if(HasClamp, "$clamp", "");
2078 string omod = !if(HasOMod, "$omod", "");
2080 string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#3PMods#clamp#omod, "");
2084 class getAsmVOP3DPP<string base> {
2085 string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
2088 class getAsmVOP3DPP16<string base> {
2089 string ret = getAsmVOP3DPP<base>.ret # "$fi";
2092 class getAsmVOP3DPP8<string base> {
2093 string ret = base # " $dpp8$fi";
2097 class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
2098 string dst = !if(HasDst,
2099 !if(!eq(DstVT.Size, 1),
2100 " vcc", // use vcc token as dst for VOPC instructions
2103 string src0 = "$src0_modifiers";
2104 string src1 = "$src1_modifiers";
2105 string args = !if(!eq(NumSrcArgs, 0),
2107 !if(!eq(NumSrcArgs, 1),
2109 ", "#src0#", "#src1#"$clamp"
2112 string sdwa = !if(!eq(NumSrcArgs, 0),
2114 !if(!eq(NumSrcArgs, 1),
2115 " $dst_sel $dst_unused $src0_sel",
2116 !if(!eq(DstVT.Size, 1),
2117 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC
2118 " $dst_sel $dst_unused $src0_sel $src1_sel"
2122 string ret = dst#args#sdwa;
2125 class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs,
2126 ValueType DstVT = i32> {
2127 string dst = !if(HasDst,
2128 !if(!eq(DstVT.Size, 1),
2132 string src0 = "$src0_modifiers";
2133 string src1 = "$src1_modifiers";
2134 string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod");
2135 string args = !if(!eq(NumSrcArgs, 0), "",
2136 !if(!eq(NumSrcArgs, 1),
2141 string sdwa = !if(!eq(NumSrcArgs, 0), "",
2142 !if(!eq(NumSrcArgs, 1),
2143 out_mods#" $dst_sel $dst_unused $src0_sel",
2144 !if(!eq(DstVT.Size, 1),
2145 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC
2146 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel"
2150 string ret = dst#args#sdwa;
2153 class getHas64BitOps <int NumSrcArgs, ValueType DstVT, ValueType Src0VT,
2155 bit ret = !if(!eq(NumSrcArgs, 3),
2157 !if(!eq(DstVT.Size, 64),
2159 !if(!eq(Src0VT.Size, 64),
2161 !if(!eq(Src1VT.Size, 64),
2170 class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2171 ValueType Src1VT = i32> {
2172 bit ret = !if(!eq(NumSrcArgs, 3),
2173 0, // NumSrcArgs == 3 - No SDWA for VOP3
2174 !if(!eq(DstVT.Size, 64),
2175 0, // 64-bit dst - No SDWA for 64-bit operands
2176 !if(!eq(Src0VT.Size, 64),
2178 !if(!eq(Src1VT.Size, 64),
2187 class getHasDPP <int NumSrcArgs> {
2188 bit ret = !if(!eq(NumSrcArgs, 3),
2189 0, // NumSrcArgs == 3 - No DPP for VOP3
2193 class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2194 ValueType Src1VT = i32> {
2195 bit ret = !and(getHasDPP<NumSrcArgs>.ret,
2196 !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret));
2199 class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2200 ValueType Src1VT = i32> {
2201 bit ret = !and(getHasDPP<NumSrcArgs>.ret,
2202 getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
2205 // Function that checks if instruction supports DPP and SDWA
2206 class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2207 ValueType Src1VT = i32> {
2208 bit ret = !or(getHasDPP<NumSrcArgs>.ret,
2209 getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
2212 // Return an AGPR+VGPR operand class for the given VGPR register class.
2213 class getLdStRegisterOperand<RegisterClass RC> {
2214 RegisterOperand ret =
2215 !if(!eq(RC.Size, 32), AVLdSt_32,
2216 !if(!eq(RC.Size, 64), AVLdSt_64,
2217 !if(!eq(RC.Size, 96), AVLdSt_96,
2218 !if(!eq(RC.Size, 128), AVLdSt_128,
2219 !if(!eq(RC.Size, 160), AVLdSt_160,
2220 RegisterOperand<VReg_1> // invalid register
2224 class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32,
2225 ValueType Src1VT = i32, ValueType Src2VT = i32> {
2226 bit ret = !if(!eq(DstVT.Size, 64),
2227 0, // 64-bit dst No DPP for 64-bit operands
2228 !if(!eq(Src0VT.Size, 64),
2230 !if(!eq(Src1VT.Size, 64),
2232 !if(!eq(Src2VT.Size, 64),
2247 class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
2249 field list<ValueType> ArgVT = _ArgVT;
2250 field bit EnableClamp = _EnableClamp;
2251 field bit IsTrue16 = 0;
2253 field ValueType DstVT = ArgVT[0];
2254 field ValueType Src0VT = ArgVT[1];
2255 field ValueType Src1VT = ArgVT[2];
2256 field ValueType Src2VT = ArgVT[3];
2257 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
2258 field RegisterOperand DstRCDPP = DstRC;
2259 field RegisterOperand DstRC64 = DstRC;
2260 field RegisterOperand DstRCVOP3DPP = DstRC64;
2261 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
2262 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT, IsTrue16>.ret;
2263 field RegisterOperand Src1RC32 = RegisterOperand<getVregSrcForVT<Src1VT>.ret>;
2264 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
2265 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
2266 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
2267 field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
2268 field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
2269 field RegisterClass Src2DPP = getVregSrcForVT<Src2VT>.ret;
2270 field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
2271 field RegisterOperand Src1VOP3DPP = VRegSrc_32;
2272 field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
2273 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
2274 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
2275 field Operand Src0Mod = getSrcMod<Src0VT>.ret;
2276 field Operand Src1Mod = getSrcMod<Src1VT>.ret;
2277 field Operand Src2Mod = getSrcMod<Src2VT>.ret;
2278 field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret;
2279 field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
2280 field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret;
2281 field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret;
2282 field Operand Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
2283 field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret;
2284 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
2285 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
2288 field bit IsMAI = 0;
2289 field bit IsVOP3P = 0;
2290 field bit IsDOT = 0;
2291 field bit IsSingle = 0;
2292 field bit IsWMMA = 0;
2294 field bit HasDst = !ne(DstVT.Value, untyped.Value);
2295 field bit HasDst32 = HasDst;
2296 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
2297 field bit EmitDstSel = EmitDst;
2298 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
2299 field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value);
2300 field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value);
2301 field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value);
2303 field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
2304 field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
2305 field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret;
2307 field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
2308 field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
2309 field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
2311 field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp);
2312 field bit HasSDWAClamp = EmitDst;
2313 field bit HasFPClamp = !and(isFloatType<DstVT>.ret, HasClamp);
2314 field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp);
2315 field bit HasClampLo = HasClamp;
2316 field bit HasClampHi = !and(isPackedType<DstVT>.ret, HasClamp);
2317 field bit HasHigh = 0;
2319 field bit IsPacked = isPackedType<Src0VT>.ret;
2320 field bit HasOpSel = IsPacked;
2321 field bit HasOMod = !if(IsVOP3P, 0, isFloatType<DstVT>.ret);
2322 field bit HasSDWAOMod = isFloatType<DstVT>.ret;
2324 field bit HasModifiers = !or(isModifierType<Src0VT>.ret,
2325 isModifierType<Src1VT>.ret,
2326 isModifierType<Src2VT>.ret,
2329 field bit HasSrc0Mods = HasModifiers;
2330 field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
2331 field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0);
2333 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2334 field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret;
2335 field bit HasExtDPP = !or(getHasDPP<NumSrcArgs>.ret, HasExtVOP3DPP);
2336 field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2337 field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2338 field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2339 field bit HasExtSDWA9 = HasExtSDWA;
2340 field int NeedPatGen = PatGenMode.NoPattern;
2342 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
2343 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
2344 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
2346 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
2348 // VOP3b instructions are a special case with a second explicit
2349 // output. This is manually overridden for them.
2350 field dag Outs32 = Outs;
2351 field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs));
2352 field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
2353 field dag OutsDPP8 = OutsDPP;
2354 field dag OutsVOP3DPP = getOutsDPP<HasDst, DstVT, DstRCVOP3DPP>.ret;
2355 field dag OutsVOP3DPP8 = OutsVOP3DPP;
2356 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
2358 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
2359 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
2360 HasIntClamp, HasModifiers, HasSrc2Mods,
2361 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
2362 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
2363 NumSrcArgs, HasClamp, HasOpSel,
2364 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
2365 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
2366 NumSrcArgs, HasClamp, HasOMod,
2367 getOpSelMod<Src0VT>.ret,
2368 getOpSelMod<Src1VT>.ret,
2369 getOpSelMod<Src2VT>.ret>.ret;
2370 field dag InsDPP = !if(HasExtDPP,
2371 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
2372 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret,
2374 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
2375 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
2376 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP,
2377 NumSrcArgs, HasModifiers,
2378 Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
2379 field dag InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
2380 Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
2381 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret;
2382 field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2383 field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2384 field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2385 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
2386 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
2388 field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X);
2389 // It is a slight misnomer to use the deferred f32 operand type for non-float
2390 // operands, but this operand type will only be used if the other dual
2391 // component is FMAAK or FMAMK
2392 field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X);
2393 field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y);
2394 field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y);
2397 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
2398 field string AsmDPP = !if(HasExtDPP,
2399 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
2400 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
2401 // DPP8 encoding has no fields for modifiers, and it is enforced by setting
2402 // the asm operand name via this HasModifiers flag
2403 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
2404 field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
2405 HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers,
2406 HasModifiers, DstVT>.ret;
2407 field string Asm64 = AsmVOP3Base;
2408 field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret;
2409 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
2414 HasSrc2FloatMods>.ret;
2415 field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3Base>.ret;
2416 field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3Base>.ret;
2417 field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3Base>.ret;
2418 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
2419 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
2420 field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret;
2421 field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret;
2422 field string TieRegDPP = "$old";
2425 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
2428 let HasExtVOP3DPP = 0;
2429 let HasExt32BitDPP = 0;
2430 let HasExt64BitDPP = 0;
2432 let HasExtSDWA9 = 0;
2435 class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> {
2436 let NeedPatGen = mode;
2439 // VOPC_Profile_t16, VOPC_NoSdst_Profile_t16, VOPC_Class_Profile_t16,
2440 // VOPC_Class_NoSdst_Profile_t16, and VOP_MAC_F16_t16 do not inherit from this
2441 // class, so copy changes to this class in those profiles
2442 class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
2444 // Most DstVT are 16-bit, but not all
2445 let DstRC = getVALUDstForVT_t16<DstVT>.ret;
2446 let DstRC64 = getVALUDstForVT<DstVT>.ret;
2447 let Src1RC32 = RegisterOperand<getVregSrcForVT_t16<Src1VT>.ret>;
2448 let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
2449 let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
2450 let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
2451 let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
2452 let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
2453 let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
2456 def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>;
2457 def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
2458 def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
2459 def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>;
2461 def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
2462 def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
2463 def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
2464 def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
2465 def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>;
2467 def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
2468 def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
2470 def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
2471 def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>;
2472 def VOP_I16_I32 : VOPProfile <[i16, i32, untyped, untyped]>;
2474 def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
2475 def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
2476 def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
2478 def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
2479 def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
2480 def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
2481 def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
2483 def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>;
2484 def VOP_I16_V2I16_V2I16_I16 : VOPProfile <[i16, v2i16, v2i16, i16]>;
2485 def VOP_F32_V2I16_V2I16_F32 : VOPProfile <[f32, v2i16, v2i16, f32]>;
2487 def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
2489 def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
2491 def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
2492 def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
2493 def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
2494 def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
2495 def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
2496 def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
2497 def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
2498 def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
2499 def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
2500 def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
2501 def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
2502 def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>;
2504 def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
2505 def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
2506 def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
2507 def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
2508 def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
2509 def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
2510 def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
2511 def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
2512 def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>;
2513 def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
2514 def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
2516 def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
2517 def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
2518 def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
2520 def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>;
2521 def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
2522 def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
2523 def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
2524 def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
2525 def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
2526 def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
2527 def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
2528 def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
2530 def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>;
2531 def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>;
2533 def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>;
2534 def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>;
2535 def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>;
2536 def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>;
2537 def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>;
2538 def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>;
2539 def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>;
2540 def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>;
2541 def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>;
2542 def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>;
2543 def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>;
2544 def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>;
2546 def VOP_V4F64_F64_F64_V4F64 : VOPProfile <[v4f64, f64, f64, v4f64]>;
2547 def VOP_V1F64_F64_F64_V1F64 : VOPProfile <[v1f64, f64, f64, v1f64]>;
2549 def VOP_V2F32_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, v2f32]>;
2550 def VOP_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, untyped]>;
2551 def VOP_V2I32_V2I32_V2I32 : VOPProfile <[v2i32, v2i32, v2i32, untyped]>;
2552 def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>;
2553 def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>;
2554 def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>;
2556 def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>;
2557 def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>;
2558 def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>;
2559 def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>;
2560 def VOP_V4F32_I64_I64_V4F32 : VOPProfile <[v4f32, i64, i64, v4f32]>;
2561 def VOP_V16F32_I64_I64_V16F32 : VOPProfile <[v16f32, i64, i64, v16f32]>;
2563 def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>;
2564 def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>;
2565 def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>;
2566 def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>;
2567 def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>;
2568 def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>;
2569 def VOP_V4F32_V2I32_V4I32_I32 : VOPProfile <[v4f32, v2i32, v4i32, i32]>;
2570 def VOP_V16F32_V2I32_V4I32_I32 : VOPProfile <[v16f32, v2i32, v4i32, i32]>;
2572 class Commutable_REV <string revOp, bit isOrig> {
2573 string RevOp = revOp;
2574 bit IsOrig = isOrig;
2577 class AtomicNoRet <string noRetOp, bit isRet> {
2578 string NoRetOp = noRetOp;
2582 //===----------------------------------------------------------------------===//
2583 // Interpolation opcodes
2584 //===----------------------------------------------------------------------===//
2586 class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">;
2588 class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
2589 VINTRPCommon <outs, ins, "", pattern>,
2590 SIMCInstr<opName, SIEncodingFamily.NONE> {
2592 let isCodeGenOnly = 1;
2595 // FIXME-GFX10: WIP.
2596 class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
2597 string asm, int encodingFamily> :
2598 VINTRPCommon <outs, ins, asm, []>,
2600 SIMCInstr<opName, encodingFamily> {
2603 class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
2605 VINTRPCommon <outs, ins, asm, []>,
2607 SIMCInstr<opName, SIEncodingFamily.VI> {
2608 let AssemblerPredicate = VIAssemblerPredicate;
2609 let DecoderNamespace = "GFX8";
2612 // FIXME-GFX10: WIP.
2613 multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
2614 list<dag> pattern = []> {
2615 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
2617 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
2618 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>;
2619 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
2621 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
2623 let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
2624 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>;
2625 } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
2628 //===----------------------------------------------------------------------===//
2629 // Vector instruction mappings
2630 //===----------------------------------------------------------------------===//
2632 // Maps an opcode in e32 form to its e64 equivalent
2633 def getVOPe64 : InstrMapping {
2634 let FilterClass = "VOP";
2635 let RowFields = ["OpName"];
2636 let ColFields = ["Size", "VOP3"];
2637 let KeyCol = ["4", "0"];
2638 let ValueCols = [["8", "1"]];
2641 // Maps an opcode in e64 form to its e32 equivalent
2642 def getVOPe32 : InstrMapping {
2643 let FilterClass = "VOP";
2644 let RowFields = ["OpName"];
2645 let ColFields = ["Size", "VOP3"];
2646 let KeyCol = ["8", "1"];
2647 let ValueCols = [["4", "0"]];
2650 // Maps ordinary instructions to their SDWA counterparts
2651 def getSDWAOp : InstrMapping {
2652 let FilterClass = "VOP";
2653 let RowFields = ["OpName"];
2654 let ColFields = ["AsmVariantName"];
2655 let KeyCol = ["Default"];
2656 let ValueCols = [["SDWA"]];
2659 // Maps SDWA instructions to their ordinary counterparts
2660 def getBasicFromSDWAOp : InstrMapping {
2661 let FilterClass = "VOP";
2662 let RowFields = ["OpName"];
2663 let ColFields = ["AsmVariantName"];
2664 let KeyCol = ["SDWA"];
2665 let ValueCols = [["Default"]];
2668 // Maps ordinary instructions to their DPP counterparts
2669 def getDPPOp32 : InstrMapping {
2670 let FilterClass = "VOP";
2671 let RowFields = ["OpName"];
2672 let ColFields = ["AsmVariantName"];
2673 let KeyCol = ["Default"];
2674 let ValueCols = [["DPP"]];
2677 def getDPPOp64 : InstrMapping {
2678 let FilterClass = "VOP";
2679 let RowFields = ["OpName"];
2680 let ColFields = ["AsmVariantName"];
2681 let KeyCol = ["VOP3"];
2682 let ValueCols = [["VOP3_DPP"]];
2685 // Maps an commuted opcode to its original version
2686 def getCommuteOrig : InstrMapping {
2687 let FilterClass = "Commutable_REV";
2688 let RowFields = ["RevOp"];
2689 let ColFields = ["IsOrig"];
2691 let ValueCols = [["1"]];
2694 // Maps an original opcode to its commuted version
2695 def getCommuteRev : InstrMapping {
2696 let FilterClass = "Commutable_REV";
2697 let RowFields = ["RevOp"];
2698 let ColFields = ["IsOrig"];
2700 let ValueCols = [["0"]];
2703 def getMCOpcodeGen : InstrMapping {
2704 let FilterClass = "SIMCInstr";
2705 let RowFields = ["PseudoInstr"];
2706 let ColFields = ["Subtarget"];
2707 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
2708 // These columns must be kept in sync with the SIEncodingFamily enumeration.
2709 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
2710 [!cast<string>(SIEncodingFamily.VI)],
2711 [!cast<string>(SIEncodingFamily.SDWA)],
2712 [!cast<string>(SIEncodingFamily.SDWA9)],
2713 // GFX80 encoding is added to work around a multiple matching
2714 // issue for buffer instructions with unpacked d16 data. This
2715 // does not actually change the encoding, and thus may be
2717 [!cast<string>(SIEncodingFamily.GFX80)],
2718 [!cast<string>(SIEncodingFamily.GFX9)],
2719 [!cast<string>(SIEncodingFamily.GFX10)],
2720 [!cast<string>(SIEncodingFamily.SDWA10)],
2721 [!cast<string>(SIEncodingFamily.GFX90A)],
2722 [!cast<string>(SIEncodingFamily.GFX940)],
2723 [!cast<string>(SIEncodingFamily.GFX11)]];
2726 // Get equivalent SOPK instruction.
2727 def getSOPKOp : InstrMapping {
2728 let FilterClass = "SOPKInstTable";
2729 let RowFields = ["BaseCmpOp"];
2730 let ColFields = ["IsSOPK"];
2732 let ValueCols = [["1"]];
2735 def getAddr64Inst : InstrMapping {
2736 let FilterClass = "MUBUFAddr64Table";
2737 let RowFields = ["OpName"];
2738 let ColFields = ["IsAddr64"];
2740 let ValueCols = [["1"]];
2743 def getIfAddr64Inst : InstrMapping {
2744 let FilterClass = "MUBUFAddr64Table";
2745 let RowFields = ["OpName"];
2746 let ColFields = ["IsAddr64"];
2748 let ValueCols = [["1"]];
2751 // Maps an atomic opcode to its returnless version.
2752 def getAtomicNoRetOp : InstrMapping {
2753 let FilterClass = "AtomicNoRet";
2754 let RowFields = ["NoRetOp"];
2755 let ColFields = ["IsRet"];
2757 let ValueCols = [["0"]];
2760 // Maps a GLOBAL to its SADDR form.
2761 def getGlobalSaddrOp : InstrMapping {
2762 let FilterClass = "GlobalSaddrTable";
2763 let RowFields = ["SaddrOp"];
2764 let ColFields = ["IsSaddr"];
2766 let ValueCols = [["1"]];
2769 // Maps a GLOBAL SADDR to its VADDR form.
2770 def getGlobalVaddrOp : InstrMapping {
2771 let FilterClass = "GlobalSaddrTable";
2772 let RowFields = ["SaddrOp"];
2773 let ColFields = ["IsSaddr"];
2775 let ValueCols = [["0"]];
2778 // Maps a v_cmpx opcode with sdst to opcode without sdst.
2779 def getVCMPXNoSDstOp : InstrMapping {
2780 let FilterClass = "VCMPXNoSDstTable";
2781 let RowFields = ["NoSDstOp"];
2782 let ColFields = ["HasSDst"];
2784 let ValueCols = [["0"]];
2787 // Maps a SOPP to a SOPP with S_NOP
2788 def getSOPPWithRelaxation : InstrMapping {
2789 let FilterClass = "SOPPRelaxTable";
2790 let RowFields = ["KeyName"];
2791 let ColFields = ["IsRelaxed"];
2793 let ValueCols = [["1"]];
2796 // Maps flat scratch opcodes by addressing modes
2797 def getFlatScratchInstSTfromSS : InstrMapping {
2798 let FilterClass = "FlatScratchInst";
2799 let RowFields = ["SVOp"];
2800 let ColFields = ["Mode"];
2801 let KeyCol = ["SS"];
2802 let ValueCols = [["ST"]];
2805 def getFlatScratchInstSSfromSV : InstrMapping {
2806 let FilterClass = "FlatScratchInst";
2807 let RowFields = ["SVOp"];
2808 let ColFields = ["Mode"];
2809 let KeyCol = ["SV"];
2810 let ValueCols = [["SS"]];
2813 def getFlatScratchInstSVfromSVS : InstrMapping {
2814 let FilterClass = "FlatScratchInst";
2815 let RowFields = ["SVOp"];
2816 let ColFields = ["Mode"];
2817 let KeyCol = ["SVS"];
2818 let ValueCols = [["SV"]];
2821 def getFlatScratchInstSVfromSS : InstrMapping {
2822 let FilterClass = "FlatScratchInst";
2823 let RowFields = ["SVOp"];
2824 let ColFields = ["Mode"];
2825 let KeyCol = ["SS"];
2826 let ValueCols = [["SV"]];
2829 def getMFMAEarlyClobberOp : InstrMapping {
2830 let FilterClass = "MFMATable";
2831 let RowFields = ["FMAOp"];
2832 let ColFields = ["IsMac"];
2834 let ValueCols = [["0"]];
2837 // Maps an v_cmp instruction to its v_cmpx equivalent.
2838 def getVCMPXOpFromVCMP : InstrMapping {
2839 let FilterClass = "VCMPVCMPXTable";
2840 let RowFields = ["VCMPOp"];
2841 let ColFields = ["IsVCMPX"];
2843 let ValueCols = [["1"]];
2846 def VOPDComponentTable : GenericTable {
2847 let FilterClass = "VOPD_Component";
2848 let CppTypeName = "VOPDComponentInfo";
2849 let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"];
2850 let PrimaryKey = ["BaseVOP"];
2851 let PrimaryKeyName = "getVOPDComponentHelper";
2854 def getVOPDBaseFromComponent : SearchIndex {
2855 let Table = VOPDComponentTable;
2856 let Key = ["VOPDOp"];
2859 def VOPDPairs : GenericTable {
2860 let FilterClass = "VOPD_Base";
2861 let CppTypeName = "VOPDInfo";
2862 let Fields = ["Opcode", "OpX", "OpY"];
2863 let PrimaryKey = ["Opcode"];
2864 let PrimaryKeyName = "getVOPDOpcodeHelper";
2867 def getVOPDInfoFromComponentOpcodes : SearchIndex {
2868 let Table = VOPDPairs;
2869 let Key = ["OpX", "OpY"];
2872 include "SIInstructions.td"
2874 include "DSInstructions.td"
2875 include "MIMGInstructions.td"