1 //===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
10 AssemblerPredicate <"FeatureWavefrontSize32">;
11 def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
12 AssemblerPredicate <"FeatureWavefrontSize64">;
14 def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
16 class GCNPredicateControl : PredicateControl {
17 Predicate SIAssemblerPredicate = isGFX6GFX7;
18 Predicate VIAssemblerPredicate = isGFX8GFX9;
21 // Execpt for the NONE field, this must be kept in sync with the
22 // SIEncodingFamily enum in AMDGPUInstrInfo.cpp
23 def SIEncodingFamily {
35 //===----------------------------------------------------------------------===//
37 //===----------------------------------------------------------------------===//
39 def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
41 def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
42 SDTypeProfile<1, 4, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>,
44 [SDNPMayLoad, SDNPMemOperand]
47 def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
48 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
49 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
52 def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
53 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
56 def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
57 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
60 def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
61 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
64 def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32,
65 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
68 def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32,
69 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
72 // load_d16_{lo|hi} ptr, tied_input
73 def SIload_d16 : SDTypeProfile<1, 2, [
79 def SDTtbuffer_load : SDTypeProfile<1, 8,
81 SDTCisVT<1, v4i32>, // rsrc
82 SDTCisVT<2, i32>, // vindex(VGPR)
83 SDTCisVT<3, i32>, // voffset(VGPR)
84 SDTCisVT<4, i32>, // soffset(SGPR)
85 SDTCisVT<5, i32>, // offset(imm)
86 SDTCisVT<6, i32>, // format(imm)
87 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
88 SDTCisVT<8, i1> // idxen(imm)
91 def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load,
92 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
93 def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16",
95 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
97 def SDTtbuffer_store : SDTypeProfile<0, 9,
99 SDTCisVT<1, v4i32>, // rsrc
100 SDTCisVT<2, i32>, // vindex(VGPR)
101 SDTCisVT<3, i32>, // voffset(VGPR)
102 SDTCisVT<4, i32>, // soffset(SGPR)
103 SDTCisVT<5, i32>, // offset(imm)
104 SDTCisVT<6, i32>, // format(imm)
105 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
106 SDTCisVT<8, i1> // idxen(imm)
109 def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
110 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
111 def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
113 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
115 def SDTBufferLoad : SDTypeProfile<1, 7,
117 SDTCisVT<1, v4i32>, // rsrc
118 SDTCisVT<2, i32>, // vindex(VGPR)
119 SDTCisVT<3, i32>, // voffset(VGPR)
120 SDTCisVT<4, i32>, // soffset(SGPR)
121 SDTCisVT<5, i32>, // offset(imm)
122 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
123 SDTCisVT<7, i1>]>; // idxen(imm)
125 def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
126 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
127 def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad,
128 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
129 def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad,
130 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
131 def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad,
132 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
133 def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad,
134 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
135 def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
136 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
137 def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
139 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
141 def SDTBufferStore : SDTypeProfile<0, 8,
143 SDTCisVT<1, v4i32>, // rsrc
144 SDTCisVT<2, i32>, // vindex(VGPR)
145 SDTCisVT<3, i32>, // voffset(VGPR)
146 SDTCisVT<4, i32>, // soffset(SGPR)
147 SDTCisVT<5, i32>, // offset(imm)
148 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
149 SDTCisVT<7, i1>]>; // idxen(imm)
151 def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
152 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
153 def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE",
155 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
156 def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT",
158 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
159 def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT",
161 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
162 def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
164 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
166 class SDBufferAtomic<string opcode> : SDNode <opcode,
168 [SDTCisVT<2, v4i32>, // rsrc
169 SDTCisVT<3, i32>, // vindex(VGPR)
170 SDTCisVT<4, i32>, // voffset(VGPR)
171 SDTCisVT<5, i32>, // soffset(SGPR)
172 SDTCisVT<6, i32>, // offset(imm)
173 SDTCisVT<7, i32>, // cachepolicy(imm)
174 SDTCisVT<8, i1>]>, // idxen(imm)
175 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
178 class SDBufferAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
180 [SDTCisVT<0, ty>, // vdata
181 SDTCisVT<1, v4i32>, // rsrc
182 SDTCisVT<2, i32>, // vindex(VGPR)
183 SDTCisVT<3, i32>, // voffset(VGPR)
184 SDTCisVT<4, i32>, // soffset(SGPR)
185 SDTCisVT<5, i32>, // offset(imm)
186 SDTCisVT<6, i32>, // cachepolicy(imm)
187 SDTCisVT<7, i1>]>, // idxen(imm)
188 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
191 def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
192 def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
193 def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
194 def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">;
195 def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">;
196 def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">;
197 def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
198 def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
199 def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
200 def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
201 def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
202 def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
203 def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>;
204 def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>;
206 def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
208 [SDTCisVT<0, i32>, // dst
209 SDTCisVT<1, i32>, // src
210 SDTCisVT<2, i32>, // cmp
211 SDTCisVT<3, v4i32>, // rsrc
212 SDTCisVT<4, i32>, // vindex(VGPR)
213 SDTCisVT<5, i32>, // voffset(VGPR)
214 SDTCisVT<6, i32>, // soffset(SGPR)
215 SDTCisVT<7, i32>, // offset(imm)
216 SDTCisVT<8, i32>, // cachepolicy(imm)
217 SDTCisVT<9, i1>]>, // idxen(imm)
218 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
221 class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
223 [SDTCisPtrTy<0>, // vaddr
224 SDTCisVT<1, ty>]>, // vdata
225 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
228 def SIglobal_atomic_fadd : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_FADD", f32>;
229 def SIglobal_atomic_pk_fadd : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_PK_FADD", v2f16>;
231 def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
232 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
235 def SIlds : SDNode<"AMDGPUISD::LDS",
236 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
239 def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO",
241 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
244 def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8",
246 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
249 def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8",
251 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
254 def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI",
256 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
259 def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8",
261 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
264 def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
266 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
269 def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
270 SDTypeProfile<0 ,1, [SDTCisInt<0>]>,
271 [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]
274 //===----------------------------------------------------------------------===//
276 //===----------------------------------------------------------------------===//
278 // Returns 1 if the source arguments have modifiers, 0 if they do not.
279 // XXX - do f16 instructions?
280 class isFloatType<ValueType SrcVT> {
282 !if(!eq(SrcVT.Value, f16.Value), 1,
283 !if(!eq(SrcVT.Value, f32.Value), 1,
284 !if(!eq(SrcVT.Value, f64.Value), 1,
285 !if(!eq(SrcVT.Value, v2f16.Value), 1,
286 !if(!eq(SrcVT.Value, v4f16.Value), 1,
287 !if(!eq(SrcVT.Value, v2f32.Value), 1,
288 !if(!eq(SrcVT.Value, v2f64.Value), 1,
292 class isIntType<ValueType SrcVT> {
294 !if(!eq(SrcVT.Value, i16.Value), 1,
295 !if(!eq(SrcVT.Value, i32.Value), 1,
296 !if(!eq(SrcVT.Value, i64.Value), 1,
300 class isPackedType<ValueType SrcVT> {
302 !if(!eq(SrcVT.Value, v2i16.Value), 1,
303 !if(!eq(SrcVT.Value, v2f16.Value), 1,
304 !if(!eq(SrcVT.Value, v4f16.Value), 1, 0)
308 //===----------------------------------------------------------------------===//
309 // PatFrags for global memory operations
310 //===----------------------------------------------------------------------===//
312 foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
313 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
316 defm atomic_inc_#as : binary_atomic_op<SIatomic_inc>;
317 defm atomic_dec_#as : binary_atomic_op<SIatomic_dec>;
318 defm atomic_load_fmin_#as : binary_atomic_op<SIatomic_fmin, 0>;
319 defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>;
322 } // End let AddressSpaces = ...
323 } // End foreach AddrSpace
325 def atomic_fadd_global_noret : PatFrag<
326 (ops node:$ptr, node:$value),
327 (SIglobal_atomic_fadd node:$ptr, node:$value)> {
331 let AddressSpaces = StoreAddress_global.AddrSpaces;
334 def atomic_pk_fadd_global_noret : PatFrag<
335 (ops node:$ptr, node:$value),
336 (SIglobal_atomic_pk_fadd node:$ptr, node:$value)> {
338 let MemoryVT = v2f16;
340 let AddressSpaces = StoreAddress_global.AddrSpaces;
343 //===----------------------------------------------------------------------===//
344 // SDNodes PatFrags for loads/stores with a glue input.
345 // This is for SDNodes and PatFrag for local loads and stores to
346 // enable s_mov_b32 m0, -1 to be glued to the memory instructions.
348 // These mirror the regular load/store PatFrags and rely on special
349 // processing during Select() to add the glued copy.
351 //===----------------------------------------------------------------------===//
353 def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad,
354 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
357 def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
358 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
361 def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
366 def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
368 let IsNonExtLoad = 1;
371 def atomic_load_32_glue : PatFrag<(ops node:$ptr),
372 (AMDGPUatomic_ld_glue node:$ptr)> {
377 def atomic_load_64_glue : PatFrag<(ops node:$ptr),
378 (AMDGPUatomic_ld_glue node:$ptr)> {
383 def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
385 let IsAnyExtLoad = 1;
388 def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
390 let IsSignExtLoad = 1;
393 def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
395 let IsZeroExtLoad = 1;
398 def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
403 def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
408 def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
413 def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
418 def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
423 def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
429 let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
430 def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
431 let IsNonExtLoad = 1;
434 let MemoryVT = i8 in {
435 def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
436 def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>;
437 def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>;
440 let MemoryVT = i16 in {
441 def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>;
442 def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>;
443 def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>;
446 def load_align8_local_m0 : PatFrag<(ops node:$ptr),
447 (load_local_m0 node:$ptr)> {
449 let IsNonExtLoad = 1;
450 let MinAlignment = 8;
452 def load_align16_local_m0 : PatFrag<(ops node:$ptr),
453 (load_local_m0 node:$ptr)> {
455 let IsNonExtLoad = 1;
456 let MinAlignment = 16;
461 let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
462 def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
463 (atomic_load_32_glue node:$ptr)> {
466 def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
467 (atomic_load_64_glue node:$ptr)> {
471 } // End let AddressSpaces = LoadAddress_local.AddrSpaces
474 def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore,
475 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
478 def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore,
479 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
482 def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr),
483 (AMDGPUst_glue node:$val, node:$ptr)> {
488 def store_glue : PatFrag<(ops node:$val, node:$ptr),
489 (unindexedstore_glue node:$val, node:$ptr)> {
491 let IsTruncStore = 0;
494 def truncstore_glue : PatFrag<(ops node:$val, node:$ptr),
495 (unindexedstore_glue node:$val, node:$ptr)> {
497 let IsTruncStore = 1;
500 def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr),
501 (truncstore_glue node:$val, node:$ptr)> {
506 def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr),
507 (truncstore_glue node:$val, node:$ptr)> {
512 let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
513 def store_local_m0 : PatFrag<(ops node:$val, node:$ptr),
514 (store_glue node:$val, node:$ptr)> {
516 let IsTruncStore = 0;
519 def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
520 (unindexedstore_glue node:$val, node:$ptr)> {
525 def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
526 (unindexedstore_glue node:$val, node:$ptr)> {
532 def store_align16_local_m0 : PatFrag <
533 (ops node:$value, node:$ptr),
534 (store_local_m0 node:$value, node:$ptr)> {
536 let IsTruncStore = 0;
537 let MinAlignment = 16;
540 def store_align8_local_m0 : PatFrag <
541 (ops node:$value, node:$ptr),
542 (store_local_m0 node:$value, node:$ptr)> {
544 let IsTruncStore = 0;
545 let MinAlignment = 8;
548 let AddressSpaces = StoreAddress_local.AddrSpaces in {
550 def atomic_store_local_32_m0 : PatFrag <
551 (ops node:$value, node:$ptr),
552 (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
556 def atomic_store_local_64_m0 : PatFrag <
557 (ops node:$value, node:$ptr),
558 (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
562 } // End let AddressSpaces = StoreAddress_local.AddrSpaces
565 def si_setcc_uniform : PatFrag <
566 (ops node:$lhs, node:$rhs, node:$cond),
567 (setcc node:$lhs, node:$rhs, node:$cond), [{
568 for (SDNode *Use : N->uses()) {
569 if (Use->isMachineOpcode() || Use->getOpcode() != ISD::CopyToReg)
572 unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
573 if (Reg != AMDGPU::SCC)
579 //===----------------------------------------------------------------------===//
580 // SDNodes PatFrags for d16 loads
581 //===----------------------------------------------------------------------===//
583 class LoadD16Frag <SDPatternOperator op> : PatFrag<(ops node:$ptr, node:$tied_in), (op node:$ptr, node:$tied_in)>;
584 class LocalLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, LocalAddress;
585 class GlobalLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, GlobalLoadAddress;
586 class PrivateLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, PrivateAddress;
587 class FlatLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, FlatLoadAddress;
589 def load_d16_hi_local : LocalLoadD16 <SIload_d16_hi>;
590 def az_extloadi8_d16_hi_local : LocalLoadD16 <SIload_d16_hi_u8>;
591 def sextloadi8_d16_hi_local : LocalLoadD16 <SIload_d16_hi_i8>;
593 def load_d16_hi_global : GlobalLoadD16 <SIload_d16_hi>;
594 def az_extloadi8_d16_hi_global : GlobalLoadD16 <SIload_d16_hi_u8>;
595 def sextloadi8_d16_hi_global : GlobalLoadD16 <SIload_d16_hi_i8>;
597 def load_d16_hi_private : PrivateLoadD16 <SIload_d16_hi>;
598 def az_extloadi8_d16_hi_private : PrivateLoadD16 <SIload_d16_hi_u8>;
599 def sextloadi8_d16_hi_private : PrivateLoadD16 <SIload_d16_hi_i8>;
601 def load_d16_hi_flat : FlatLoadD16 <SIload_d16_hi>;
602 def az_extloadi8_d16_hi_flat : FlatLoadD16 <SIload_d16_hi_u8>;
603 def sextloadi8_d16_hi_flat : FlatLoadD16 <SIload_d16_hi_i8>;
606 def load_d16_lo_local : LocalLoadD16 <SIload_d16_lo>;
607 def az_extloadi8_d16_lo_local : LocalLoadD16 <SIload_d16_lo_u8>;
608 def sextloadi8_d16_lo_local : LocalLoadD16 <SIload_d16_lo_i8>;
610 def load_d16_lo_global : GlobalLoadD16 <SIload_d16_lo>;
611 def az_extloadi8_d16_lo_global : GlobalLoadD16 <SIload_d16_lo_u8>;
612 def sextloadi8_d16_lo_global : GlobalLoadD16 <SIload_d16_lo_i8>;
614 def load_d16_lo_private : PrivateLoadD16 <SIload_d16_lo>;
615 def az_extloadi8_d16_lo_private : PrivateLoadD16 <SIload_d16_lo_u8>;
616 def sextloadi8_d16_lo_private : PrivateLoadD16 <SIload_d16_lo_i8>;
618 def load_d16_lo_flat : FlatLoadD16 <SIload_d16_lo>;
619 def az_extloadi8_d16_lo_flat : FlatLoadD16 <SIload_d16_lo_u8>;
620 def sextloadi8_d16_lo_flat : FlatLoadD16 <SIload_d16_lo_i8>;
624 def lshr_rev : PatFrag <
625 (ops node:$src1, node:$src0),
629 def ashr_rev : PatFrag <
630 (ops node:$src1, node:$src0),
634 def lshl_rev : PatFrag <
635 (ops node:$src1, node:$src0),
639 def add_ctpop : PatFrag <
640 (ops node:$src0, node:$src1),
641 (add (ctpop $src0), $src1)
644 multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
645 SDTypeProfile tc = SDTAtomic2,
649 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc,
650 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
653 let AddressSpaces = StoreAddress_local.AddrSpaces in {
654 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
657 let AddressSpaces = StoreAddress_region.AddrSpaces in {
658 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
662 defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
663 defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
664 defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>;
665 defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>;
666 defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
667 defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
668 defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
669 defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
670 defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
671 defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
672 defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
673 defm atomic_swap : SIAtomicM0Glue2 <"SWAP">;
674 defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>;
675 defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>;
676 defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>;
678 def as_i1imm : SDNodeXForm<imm, [{
679 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
682 def as_i8imm : SDNodeXForm<imm, [{
683 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
686 def as_i16imm : SDNodeXForm<imm, [{
687 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
690 def as_i32imm: SDNodeXForm<imm, [{
691 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
694 def as_i32timm: SDNodeXForm<timm, [{
695 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
698 def as_i64imm: SDNodeXForm<imm, [{
699 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
702 def cond_as_i32imm: SDNodeXForm<cond, [{
703 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32);
706 // Copied from the AArch64 backend:
707 def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
708 return CurDAG->getTargetConstant(
709 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
712 def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{
713 auto FI = cast<FrameIndexSDNode>(N);
714 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32);
717 // Copied from the AArch64 backend:
718 def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
719 return CurDAG->getTargetConstant(
720 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
723 class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
724 uint64_t Imm = N->getZExtValue();
725 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
726 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
729 def SIMM16bit : ImmLeaf <i32,
730 [{return isInt<16>(Imm);}]
733 def UIMM16bit : ImmLeaf <i32,
734 [{return isUInt<16>(Imm);}]
737 def i64imm_32bit : ImmLeaf<i64, [{
738 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
741 class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
742 return isInlineImmediate(N);
745 class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
746 return isInlineImmediate(N);
749 class VGPRImm <dag frag> : PatLeaf<frag, [{
753 def NegateImm : SDNodeXForm<imm, [{
754 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
757 // TODO: When FP inline imm values work?
758 def NegSubInlineConst32 : ImmLeaf<i32, [{
759 return Imm < -16 && Imm >= -64;
762 def NegSubInlineConst16 : ImmLeaf<i16, [{
763 return Imm < -16 && Imm >= -64;
766 def ShiftAmt32Imm : PatLeaf <(imm), [{
767 return N->getZExtValue() < 32;
770 def getNegV2I16Imm : SDNodeXForm<build_vector, [{
771 return SDValue(packNegConstantV2I16(N, *CurDAG), 0);
774 def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
775 assert(N->getNumOperands() == 2);
776 assert(N->getOperand(0).getValueType().getSizeInBits() == 16);
777 SDValue Src0 = N->getOperand(0);
778 SDValue Src1 = N->getOperand(1);
780 return isNegInlineImmediate(Src0.getNode());
782 return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) ||
783 (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode()));
786 //===----------------------------------------------------------------------===//
788 //===----------------------------------------------------------------------===//
790 def SoppBrTarget : AsmOperandClass {
791 let Name = "SoppBrTarget";
792 let ParserMethod = "parseSOppBrTarget";
795 def sopp_brtarget : Operand<OtherVT> {
796 let EncoderMethod = "getSOPPBrEncoding";
797 let DecoderMethod = "decodeSoppBrTarget";
798 let OperandType = "OPERAND_PCREL";
799 let ParserMatchClass = SoppBrTarget;
802 def si_ga : Operand<iPTR>;
804 def InterpSlotMatchClass : AsmOperandClass {
805 let Name = "InterpSlot";
806 let PredicateMethod = "isInterpSlot";
807 let ParserMethod = "parseInterpSlot";
808 let RenderMethod = "addImmOperands";
811 def InterpSlot : Operand<i32> {
812 let PrintMethod = "printInterpSlot";
813 let ParserMatchClass = InterpSlotMatchClass;
814 let OperandType = "OPERAND_IMMEDIATE";
817 def AttrMatchClass : AsmOperandClass {
819 let PredicateMethod = "isInterpAttr";
820 let ParserMethod = "parseInterpAttr";
821 let RenderMethod = "addImmOperands";
824 // It appears to be necessary to create a separate operand for this to
825 // be able to parse attr<num> with no space.
826 def Attr : Operand<i32> {
827 let PrintMethod = "printInterpAttr";
828 let ParserMatchClass = AttrMatchClass;
829 let OperandType = "OPERAND_IMMEDIATE";
832 def AttrChanMatchClass : AsmOperandClass {
833 let Name = "AttrChan";
834 let PredicateMethod = "isAttrChan";
835 let RenderMethod = "addImmOperands";
838 def AttrChan : Operand<i32> {
839 let PrintMethod = "printInterpAttrChan";
840 let ParserMatchClass = AttrChanMatchClass;
841 let OperandType = "OPERAND_IMMEDIATE";
844 def SendMsgMatchClass : AsmOperandClass {
845 let Name = "SendMsg";
846 let PredicateMethod = "isSendMsg";
847 let ParserMethod = "parseSendMsgOp";
848 let RenderMethod = "addImmOperands";
851 def SwizzleMatchClass : AsmOperandClass {
852 let Name = "Swizzle";
853 let PredicateMethod = "isSwizzle";
854 let ParserMethod = "parseSwizzleOp";
855 let RenderMethod = "addImmOperands";
859 def EndpgmMatchClass : AsmOperandClass {
860 let Name = "EndpgmImm";
861 let PredicateMethod = "isEndpgm";
862 let ParserMethod = "parseEndpgmOp";
863 let RenderMethod = "addImmOperands";
867 def ExpTgtMatchClass : AsmOperandClass {
869 let PredicateMethod = "isExpTgt";
870 let ParserMethod = "parseExpTgt";
871 let RenderMethod = "printExpTgt";
874 def SWaitMatchClass : AsmOperandClass {
875 let Name = "SWaitCnt";
876 let RenderMethod = "addImmOperands";
877 let ParserMethod = "parseSWaitCntOps";
880 def VReg32OrOffClass : AsmOperandClass {
881 let Name = "VReg32OrOff";
882 let ParserMethod = "parseVReg32OrOff";
885 let OperandType = "OPERAND_IMMEDIATE" in {
886 def SendMsgImm : Operand<i32> {
887 let PrintMethod = "printSendMsg";
888 let ParserMatchClass = SendMsgMatchClass;
891 def SwizzleImm : Operand<i16> {
892 let PrintMethod = "printSwizzle";
893 let ParserMatchClass = SwizzleMatchClass;
896 def EndpgmImm : Operand<i16> {
897 let PrintMethod = "printEndpgm";
898 let ParserMatchClass = EndpgmMatchClass;
901 def WAIT_FLAG : Operand <i32> {
902 let ParserMatchClass = SWaitMatchClass;
903 let PrintMethod = "printWaitFlag";
905 } // End OperandType = "OPERAND_IMMEDIATE"
907 include "SIInstrFormats.td"
908 include "VIInstrFormats.td"
910 def BoolReg : AsmOperandClass {
911 let Name = "BoolReg";
912 let ParserMethod = "parseBoolReg";
913 let RenderMethod = "addRegOperands";
916 class BoolRC : RegisterOperand<SReg_1> {
917 let ParserMatchClass = BoolReg;
918 let DecoderMethod = "decodeBoolReg";
921 def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
922 let ParserMatchClass = BoolReg;
923 let DecoderMethod = "decodeBoolReg";
926 def VOPDstS64orS32 : BoolRC {
927 let PrintMethod = "printVOPDst";
930 // SCSrc_i1 is the operand for pseudo instructions only.
931 // Boolean immeadiates shall not be exposed to codegen instructions.
932 def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
933 let OperandNamespace = "AMDGPU";
934 let OperandType = "OPERAND_REG_IMM_INT32";
935 let ParserMatchClass = BoolReg;
936 let DecoderMethod = "decodeBoolReg";
939 // ===----------------------------------------------------------------------===//
940 // ExpSrc* Special cases for exp src operands which are printed as
941 // "off" depending on en operand.
942 // ===----------------------------------------------------------------------===//
944 def ExpSrc0 : RegisterOperand<VGPR_32> {
945 let PrintMethod = "printExpSrc0";
946 let ParserMatchClass = VReg32OrOffClass;
949 def ExpSrc1 : RegisterOperand<VGPR_32> {
950 let PrintMethod = "printExpSrc1";
951 let ParserMatchClass = VReg32OrOffClass;
954 def ExpSrc2 : RegisterOperand<VGPR_32> {
955 let PrintMethod = "printExpSrc2";
956 let ParserMatchClass = VReg32OrOffClass;
959 def ExpSrc3 : RegisterOperand<VGPR_32> {
960 let PrintMethod = "printExpSrc3";
961 let ParserMatchClass = VReg32OrOffClass;
964 class SDWASrc<ValueType vt> : RegisterOperand<VS_32> {
965 let OperandNamespace = "AMDGPU";
966 string Type = !if(isFloatType<vt>.ret, "FP", "INT");
967 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size;
968 let DecoderMethod = "decodeSDWASrc"#vt.Size;
969 let EncoderMethod = "getSDWASrcEncoding";
972 def SDWASrc_i32 : SDWASrc<i32>;
973 def SDWASrc_i16 : SDWASrc<i16>;
974 def SDWASrc_f32 : SDWASrc<f32>;
975 def SDWASrc_f16 : SDWASrc<f16>;
977 def SDWAVopcDst : BoolRC {
978 let OperandNamespace = "AMDGPU";
979 let OperandType = "OPERAND_SDWA_VOPC_DST";
980 let EncoderMethod = "getSDWAVopcDstEncoding";
981 let DecoderMethod = "decodeSDWAVopcDst";
982 let PrintMethod = "printVOPDst";
985 class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
986 let Name = "Imm"#CName;
987 let PredicateMethod = "is"#CName;
988 let ParserMethod = !if(Optional, "parseOptionalOperand", "parse"#CName);
989 let RenderMethod = "addImmOperands";
990 let IsOptional = Optional;
991 let DefaultMethod = !if(Optional, "default"#CName, ?);
994 class NamedOperandBit<string Name, AsmOperandClass MatchClass> : Operand<i1> {
995 let PrintMethod = "print"#Name;
996 let ParserMatchClass = MatchClass;
999 class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> {
1000 let PrintMethod = "print"#Name;
1001 let ParserMatchClass = MatchClass;
1004 class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> {
1005 let PrintMethod = "print"#Name;
1006 let ParserMatchClass = MatchClass;
1009 class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
1010 let PrintMethod = "print"#Name;
1011 let ParserMatchClass = MatchClass;
1014 class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
1015 OperandWithDefaultOps<i32, (ops (i32 0))> {
1016 let PrintMethod = "print"#Name;
1017 let ParserMatchClass = MatchClass;
1020 let OperandType = "OPERAND_IMMEDIATE" in {
1022 def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
1023 def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>;
1024 def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>;
1026 def flat_offset : NamedOperandU16<"FlatOffset", NamedMatchClass<"FlatOffset">>;
1027 def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>;
1028 def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>;
1029 def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
1031 def gds : NamedOperandBit<"GDS", NamedMatchClass<"GDS">>;
1033 def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>;
1034 def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
1035 def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
1037 def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
1038 def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
1039 def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
1040 def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
1041 def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
1042 def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
1043 def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
1044 def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
1045 def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
1046 def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
1047 def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
1048 def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
1050 def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;
1052 def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
1053 def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>;
1055 def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>;
1057 def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
1058 def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
1059 def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
1060 def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>;
1061 def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>;
1063 def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>;
1064 def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
1065 def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
1066 def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>;
1068 def op_sel : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>;
1069 def op_sel_hi : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
1070 def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
1071 def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
1073 def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
1074 def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
1075 def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
1077 def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
1079 def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
1083 } // End OperandType = "OPERAND_IMMEDIATE"
1085 class KImmMatchClass<int size> : AsmOperandClass {
1086 let Name = "KImmFP"#size;
1087 let PredicateMethod = "isKImmFP"#size;
1088 let ParserMethod = "parseImm";
1089 let RenderMethod = "addKImmFP"#size#"Operands";
1092 class kimmOperand<ValueType vt> : Operand<vt> {
1093 let OperandNamespace = "AMDGPU";
1094 let OperandType = "OPERAND_KIMM"#vt.Size;
1095 let PrintMethod = "printU"#vt.Size#"ImmOperand";
1096 let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass");
1099 // 32-bit VALU immediate operand that uses the constant bus.
1100 def KImmFP32MatchClass : KImmMatchClass<32>;
1101 def f32kimm : kimmOperand<i32>;
1103 // 32-bit VALU immediate operand with a 16-bit value that uses the
1105 def KImmFP16MatchClass : KImmMatchClass<16>;
1106 def f16kimm : kimmOperand<i16>;
1108 class FPInputModsMatchClass <int opSize> : AsmOperandClass {
1109 let Name = "RegOrImmWithFP"#opSize#"InputMods";
1110 let ParserMethod = "parseRegOrImmWithFPInputMods";
1111 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
1114 def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
1115 def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
1116 def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
1118 class InputMods <AsmOperandClass matchClass> : Operand <i32> {
1119 let OperandNamespace = "AMDGPU";
1120 let OperandType = "OPERAND_INPUT_MODS";
1121 let ParserMatchClass = matchClass;
1124 class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
1125 let PrintMethod = "printOperandAndFPInputMods";
1128 def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
1129 def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
1130 def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
1132 class IntInputModsMatchClass <int opSize> : AsmOperandClass {
1133 let Name = "RegOrImmWithInt"#opSize#"InputMods";
1134 let ParserMethod = "parseRegOrImmWithIntInputMods";
1135 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods";
1137 def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
1138 def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
1140 class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
1141 let PrintMethod = "printOperandAndIntInputMods";
1143 def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
1144 def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
1146 class OpSelModsMatchClass : AsmOperandClass {
1147 let Name = "OpSelMods";
1148 let ParserMethod = "parseRegOrImm";
1149 let PredicateMethod = "isRegOrImm";
1152 def IntOpSelModsMatchClass : OpSelModsMatchClass;
1153 def IntOpSelMods : InputMods<IntOpSelModsMatchClass>;
1155 class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1156 let Name = "SDWAWithFP"#opSize#"InputMods";
1157 let ParserMethod = "parseRegOrImmWithFPInputMods";
1158 let PredicateMethod = "isSDWAFP"#opSize#"Operand";
1161 def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>;
1162 def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>;
1164 class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> :
1165 InputMods <matchClass> {
1166 let PrintMethod = "printOperandAndFPInputMods";
1169 def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>;
1170 def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>;
1172 def FPVRegInputModsMatchClass : AsmOperandClass {
1173 let Name = "VRegWithFPInputMods";
1174 let ParserMethod = "parseRegWithFPInputMods";
1175 let PredicateMethod = "isVReg32";
1178 def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
1179 let PrintMethod = "printOperandAndFPInputMods";
1182 class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1183 let Name = "SDWAWithInt"#opSize#"InputMods";
1184 let ParserMethod = "parseRegOrImmWithIntInputMods";
1185 let PredicateMethod = "isSDWAInt"#opSize#"Operand";
1188 def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>;
1189 def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>;
1191 class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
1192 InputMods <matchClass> {
1193 let PrintMethod = "printOperandAndIntInputMods";
1196 def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>;
1197 def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
1199 def IntVRegInputModsMatchClass : AsmOperandClass {
1200 let Name = "VRegWithIntInputMods";
1201 let ParserMethod = "parseRegWithIntInputMods";
1202 let PredicateMethod = "isVReg32";
1205 def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
1206 let PrintMethod = "printOperandAndIntInputMods";
1209 class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
1210 let Name = "PackedFP"#opSize#"InputMods";
1211 let ParserMethod = "parseRegOrImm";
1212 let PredicateMethod = "isRegOrImm";
1213 // let PredicateMethod = "isPackedFP"#opSize#"InputMods";
1216 class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
1217 let Name = "PackedInt"#opSize#"InputMods";
1218 let ParserMethod = "parseRegOrImm";
1219 let PredicateMethod = "isRegOrImm";
1220 // let PredicateMethod = "isPackedInt"#opSize#"InputMods";
1223 def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
1224 def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
1226 class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
1227 // let PrintMethod = "printPackedFPInputMods";
1230 class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
1231 //let PrintMethod = "printPackedIntInputMods";
1234 def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
1235 def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
1237 //===----------------------------------------------------------------------===//
1239 //===----------------------------------------------------------------------===//
1241 def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
1242 def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
1244 def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">;
1246 def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
1247 def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
1248 def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
1249 def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
1250 def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
1251 // VOP3Mods, but the input source is known to never be NaN.
1252 def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
1253 // VOP3Mods, but only allowed for f32 operands.
1254 def VOP3Mods_f32 : ComplexPattern<fAny, 2, "SelectVOP3Mods_f32">;
1256 def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
1258 def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
1259 def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;
1261 def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
1262 def VOP3OpSel0 : ComplexPattern<untyped, 3, "SelectVOP3OpSel0">;
1264 def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
1265 def VOP3OpSelMods0 : ComplexPattern<untyped, 3, "SelectVOP3OpSelMods0">;
1267 def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
1270 def Hi16Elt : ComplexPattern<untyped, 1, "SelectHi16Elt">;
1272 //===----------------------------------------------------------------------===//
1273 // SI assembler operands
1274 //===----------------------------------------------------------------------===//
1279 int FLAT_SCR = 0x68;
1282 // This should be kept in sync with SISrcMods enum
1306 int LLVM_DEBUG_TRAP = 3;
1322 int FLAT_SCR_LO = 20;
1323 int FLAT_SCR_HI = 21;
1324 int XNACK_MASK = 22;
1325 int POPS_PACKER = 25;
1328 class getHwRegImm<int Reg, int Offset = 0, int Size = 32> {
1330 !or(!shl(Offset, 6),
1331 !shl(!add(Size, -1), 11)));
1334 //===----------------------------------------------------------------------===//
1336 // SI Instruction multiclass helpers.
1338 // Instructions with _32 take 32-bit operands.
1339 // Instructions with _64 take 64-bit operands.
1341 // VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
1342 // encoding is the standard encoding, but instruction that make use of
1343 // any of the instruction modifiers must use the 64-bit encoding.
1345 // Instructions with _e32 use the 32-bit encoding.
1346 // Instructions with _e64 use the 64-bit encoding.
1348 //===----------------------------------------------------------------------===//
1350 class SIMCInstr <string pseudo, int subtarget> {
1351 string PseudoInstr = pseudo;
1352 int Subtarget = subtarget;
1355 //===----------------------------------------------------------------------===//
1357 //===----------------------------------------------------------------------===//
1359 class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon<
1362 ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
1363 exp_vm:$vm, exp_compr:$compr, i8imm:$en),
1364 "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm",
1365 [(node (i8 timm:$tgt), (i8 timm:$en),
1366 f32:$src0, f32:$src1, f32:$src2, f32:$src3,
1367 (i1 timm:$compr), (i1 timm:$vm))]> {
1368 let AsmMatchConverter = "cvtExp";
1371 // Split EXP instruction into EXP and EXP_DONE so we can set
1372 // mayLoad for done=1.
1373 multiclass EXP_m<bit done, SDPatternOperator node> {
1374 let mayLoad = done, DisableWQM = 1 in {
1375 let isPseudo = 1, isCodeGenOnly = 1 in {
1376 def "" : EXP_Helper<done, node>,
1377 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>;
1380 let done = done in {
1381 def _si : EXP_Helper<done>,
1382 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>,
1384 let AssemblerPredicates = [isGFX6GFX7];
1385 let DecoderNamespace = "GFX6GFX7";
1386 let DisableDecoder = DisableSIDecoder;
1389 def _vi : EXP_Helper<done>,
1390 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>,
1392 let AssemblerPredicates = [isGFX8GFX9];
1393 let DecoderNamespace = "GFX8";
1394 let DisableDecoder = DisableVIDecoder;
1397 def _gfx10 : EXP_Helper<done>,
1398 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.GFX10>,
1400 let AssemblerPredicates = [isGFX10Plus];
1401 let DecoderNamespace = "GFX10";
1402 let DisableDecoder = DisableSIDecoder;
1408 //===----------------------------------------------------------------------===//
1409 // Vector ALU classes
1410 //===----------------------------------------------------------------------===//
1412 class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
1414 !if (!eq(Src0.Value, untyped.Value), 0,
1415 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1
1416 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2
1420 // Returns the register class to use for the destination of VOP[123C]
1421 // instructions for the given VT.
1422 class getVALUDstForVT<ValueType VT> {
1423 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
1424 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
1425 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
1426 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
1427 VOPDstS64orS32)))); // else VT == i1
1430 // Returns the register class to use for the destination of VOP[12C]
1431 // instructions with SDWA extension
1432 class getSDWADstForVT<ValueType VT> {
1433 RegisterOperand ret = !if(!eq(VT.Size, 1),
1434 SDWAVopcDst, // VOPC
1435 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst
1438 // Returns the register class to use for source 0 of VOP[12C]
1439 // instructions for the given VT.
1440 class getVOPSrc0ForVT<ValueType VT> {
1441 bit isFP = isFloatType<VT>.ret;
1443 RegisterOperand ret =
1445 !if(!eq(VT.Size, 64),
1447 !if(!eq(VT.Value, f16.Value),
1449 !if(!eq(VT.Value, v2f16.Value),
1451 !if(!eq(VT.Value, v4f16.Value),
1458 !if(!eq(VT.Size, 64),
1460 !if(!eq(VT.Value, i16.Value),
1462 !if(!eq(VT.Value, v2i16.Value),
1471 // Returns the vreg register class to use for source operand given VT
1472 class getVregSrcForVT<ValueType VT> {
1473 RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
1474 !if(!eq(VT.Size, 96), VReg_96,
1475 !if(!eq(VT.Size, 64), VReg_64,
1476 !if(!eq(VT.Size, 48), VReg_64,
1480 class getSDWASrcForVT <ValueType VT> {
1481 bit isFP = isFloatType<VT>.ret;
1482 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
1483 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
1484 RegisterOperand ret = !if(isFP, retFlt, retInt);
1487 // Returns the register class to use for sources of VOP3 instructions for the
1489 class getVOP3SrcForVT<ValueType VT> {
1490 bit isFP = isFloatType<VT>.ret;
1491 RegisterOperand ret =
1492 !if(!eq(VT.Size, 128),
1494 !if(!eq(VT.Size, 64),
1498 !if(!eq(VT.Value, i1.Value),
1501 !if(!eq(VT.Value, f16.Value),
1503 !if(!eq(VT.Value, v2f16.Value),
1505 !if(!eq(VT.Value, v4f16.Value),
1511 !if(!eq(VT.Value, i16.Value),
1513 !if(!eq(VT.Value, v2i16.Value),
1524 // Float or packed int
1525 class isModifierType<ValueType SrcVT> {
1527 !if(!eq(SrcVT.Value, f16.Value), 1,
1528 !if(!eq(SrcVT.Value, f32.Value), 1,
1529 !if(!eq(SrcVT.Value, f64.Value), 1,
1530 !if(!eq(SrcVT.Value, v2f16.Value), 1,
1531 !if(!eq(SrcVT.Value, v2i16.Value), 1,
1535 // Return type of input modifiers operand for specified input operand
1536 class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
1537 bit isFP = isFloatType<VT>.ret;
1538 bit isPacked = isPackedType<VT>.ret;
1539 Operand ret = !if(!eq(VT.Size, 64),
1540 !if(isFP, FP64InputMods, Int64InputMods),
1542 !if(!eq(VT.Value, f16.Value),
1546 !if(EnableF32SrcMods, FP32InputMods, Int32InputMods))
1550 class getOpSelMod <ValueType VT> {
1551 Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods);
1554 // Return type of input modifiers operand specified input operand for DPP
1555 class getSrcModExt <ValueType VT> {
1556 bit isFP = isFloatType<VT>.ret;
1557 Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
1560 // Return type of input modifiers operand specified input operand for SDWA
1561 class getSrcModSDWA <ValueType VT> {
1562 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods,
1563 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods,
1564 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods,
1565 Int32SDWAInputMods)));
1568 // Returns the input arguments for VOP[12C] instructions for the given SrcVT.
1569 class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
1570 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1
1571 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
1575 // Returns the input arguments for VOP3 instructions for the given SrcVT.
1576 class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
1577 RegisterOperand Src2RC, int NumSrcArgs,
1578 bit HasIntClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
1579 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1582 !if (!eq(NumSrcArgs, 0),
1583 // VOP1 without input operands (V_NOP, V_CLREXCP)
1586 !if (!eq(NumSrcArgs, 1),
1587 !if (!eq(HasModifiers, 1),
1588 // VOP1 with modifiers
1589 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1590 clampmod:$clamp, omod:$omod)
1592 // VOP1 without modifiers
1593 !if (!eq(HasIntClamp, 1),
1594 (ins Src0RC:$src0, clampmod:$clamp),
1597 !if (!eq(NumSrcArgs, 2),
1598 !if (!eq(HasModifiers, 1),
1599 // VOP 2 with modifiers
1600 !if( !eq(HasOMod, 1),
1601 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1602 Src1Mod:$src1_modifiers, Src1RC:$src1,
1603 clampmod:$clamp, omod:$omod),
1604 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1605 Src1Mod:$src1_modifiers, Src1RC:$src1,
1608 // VOP2 without modifiers
1609 !if (!eq(HasIntClamp, 1),
1610 (ins Src0RC:$src0, Src1RC:$src1, clampmod:$clamp),
1611 (ins Src0RC:$src0, Src1RC:$src1))
1614 /* NumSrcArgs == 3 */,
1615 !if (!eq(HasModifiers, 1),
1616 !if (!eq(HasSrc2Mods, 1),
1617 // VOP3 with modifiers
1618 !if (!eq(HasOMod, 1),
1619 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1620 Src1Mod:$src1_modifiers, Src1RC:$src1,
1621 Src2Mod:$src2_modifiers, Src2RC:$src2,
1622 clampmod:$clamp, omod:$omod),
1623 !if (!eq(HasIntClamp, 1),
1624 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1625 Src1Mod:$src1_modifiers, Src1RC:$src1,
1626 Src2Mod:$src2_modifiers, Src2RC:$src2,
1628 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1629 Src1Mod:$src1_modifiers, Src1RC:$src1,
1630 Src2Mod:$src2_modifiers, Src2RC:$src2))),
1631 // VOP3 with modifiers except src2
1632 !if (!eq(HasOMod, 1),
1633 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1634 Src1Mod:$src1_modifiers, Src1RC:$src1,
1635 Src2RC:$src2, clampmod:$clamp, omod:$omod),
1636 !if (!eq(HasIntClamp, 1),
1637 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1638 Src1Mod:$src1_modifiers, Src1RC:$src1,
1639 Src2RC:$src2, clampmod:$clamp),
1640 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1641 Src1Mod:$src1_modifiers, Src1RC:$src1,
1644 // VOP3 without modifiers
1645 !if (!eq(HasIntClamp, 1),
1646 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod:$clamp),
1647 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2))
1651 /// XXX - src1 may only allow VGPRs?
1653 // The modifiers (except clamp) are dummy operands for the benefit of
1654 // printing and parsing. They defer their values to looking at the
1655 // srcN_modifiers for what to print.
1656 class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
1657 RegisterOperand Src2RC, int NumSrcArgs,
1659 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1660 dag ret = !if (!eq(NumSrcArgs, 2),
1662 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1663 Src1Mod:$src1_modifiers, Src1RC:$src1,
1665 op_sel:$op_sel, op_sel_hi:$op_sel_hi,
1666 neg_lo:$neg_lo, neg_hi:$neg_hi),
1667 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1668 Src1Mod:$src1_modifiers, Src1RC:$src1,
1669 op_sel:$op_sel, op_sel_hi:$op_sel_hi,
1670 neg_lo:$neg_lo, neg_hi:$neg_hi)),
1671 // else NumSrcArgs == 3
1673 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1674 Src1Mod:$src1_modifiers, Src1RC:$src1,
1675 Src2Mod:$src2_modifiers, Src2RC:$src2,
1677 op_sel:$op_sel, op_sel_hi:$op_sel_hi,
1678 neg_lo:$neg_lo, neg_hi:$neg_hi),
1679 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1680 Src1Mod:$src1_modifiers, Src1RC:$src1,
1681 Src2Mod:$src2_modifiers, Src2RC:$src2,
1682 op_sel:$op_sel, op_sel_hi:$op_sel_hi,
1683 neg_lo:$neg_lo, neg_hi:$neg_hi))
1687 class getInsVOP3OpSel <RegisterOperand Src0RC,
1688 RegisterOperand Src1RC,
1689 RegisterOperand Src2RC,
1695 dag ret = !if (!eq(NumSrcArgs, 2),
1697 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1698 Src1Mod:$src1_modifiers, Src1RC:$src1,
1701 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1702 Src1Mod:$src1_modifiers, Src1RC:$src1,
1704 // else NumSrcArgs == 3
1706 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1707 Src1Mod:$src1_modifiers, Src1RC:$src1,
1708 Src2Mod:$src2_modifiers, Src2RC:$src2,
1711 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1712 Src1Mod:$src1_modifiers, Src1RC:$src1,
1713 Src2Mod:$src2_modifiers, Src2RC:$src2,
1718 class getInsDPP <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
1719 int NumSrcArgs, bit HasModifiers,
1720 Operand Src0Mod, Operand Src1Mod> {
1722 dag ret = !if (!eq(NumSrcArgs, 0),
1723 // VOP1 without input operands (V_NOP)
1724 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1725 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl),
1726 !if (!eq(NumSrcArgs, 1),
1727 !if (!eq(HasModifiers, 1),
1728 // VOP1_DPP with modifiers
1729 (ins DstRC:$old, Src0Mod:$src0_modifiers,
1730 Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1731 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
1733 // VOP1_DPP without modifiers
1734 (ins DstRC:$old, Src0RC:$src0,
1735 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1736 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
1738 /* NumSrcArgs == 2 */,
1739 !if (!eq(HasModifiers, 1),
1740 // VOP2_DPP with modifiers
1742 Src0Mod:$src0_modifiers, Src0RC:$src0,
1743 Src1Mod:$src1_modifiers, Src1RC:$src1,
1744 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1745 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
1747 // VOP2_DPP without modifiers
1749 Src0RC:$src0, Src1RC:$src1, dpp_ctrl:$dpp_ctrl,
1750 row_mask:$row_mask, bank_mask:$bank_mask,
1751 bound_ctrl:$bound_ctrl)
1755 class getInsDPP16 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
1756 int NumSrcArgs, bit HasModifiers,
1757 Operand Src0Mod, Operand Src1Mod> {
1758 dag ret = !con(getInsDPP<DstRC, Src0RC, Src1RC, NumSrcArgs,
1759 HasModifiers, Src0Mod, Src1Mod>.ret,
1763 class getInsDPP8 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
1764 int NumSrcArgs, bit HasModifiers,
1765 Operand Src0Mod, Operand Src1Mod> {
1766 dag ret = !if (!eq(NumSrcArgs, 0),
1767 // VOP1 without input operands (V_NOP)
1768 (ins dpp8:$dpp8, FI:$fi),
1769 !if (!eq(NumSrcArgs, 1),
1770 !if (!eq(HasModifiers, 1),
1771 // VOP1_DPP with modifiers
1772 (ins DstRC:$old, Src0Mod:$src0_modifiers,
1773 Src0RC:$src0, dpp8:$dpp8, FI:$fi)
1775 // VOP1_DPP without modifiers
1776 (ins DstRC:$old, Src0RC:$src0, dpp8:$dpp8, FI:$fi)
1778 /* NumSrcArgs == 2 */,
1779 !if (!eq(HasModifiers, 1),
1780 // VOP2_DPP with modifiers
1782 Src0Mod:$src0_modifiers, Src0RC:$src0,
1783 Src1Mod:$src1_modifiers, Src1RC:$src1,
1786 // VOP2_DPP without modifiers
1788 Src0RC:$src0, Src1RC:$src1, dpp8:$dpp8, FI:$fi)
1794 class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
1795 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
1798 dag ret = !if(!eq(NumSrcArgs, 0),
1799 // VOP1 without input operands (V_NOP)
1801 !if(!eq(NumSrcArgs, 1),
1803 !if(!eq(HasSDWAOMod, 0),
1804 // VOP1_SDWA without omod
1805 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1807 dst_sel:$dst_sel, dst_unused:$dst_unused,
1808 src0_sel:$src0_sel),
1809 // VOP1_SDWA with omod
1810 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1811 clampmod:$clamp, omod:$omod,
1812 dst_sel:$dst_sel, dst_unused:$dst_unused,
1813 src0_sel:$src0_sel)),
1814 !if(!eq(NumSrcArgs, 2),
1815 !if(!eq(DstVT.Size, 1),
1817 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1818 Src1Mod:$src1_modifiers, Src1RC:$src1,
1819 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
1821 !if(!eq(HasSDWAOMod, 0),
1822 // VOP2_SDWA without omod
1823 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1824 Src1Mod:$src1_modifiers, Src1RC:$src1,
1826 dst_sel:$dst_sel, dst_unused:$dst_unused,
1827 src0_sel:$src0_sel, src1_sel:$src1_sel),
1828 // VOP2_SDWA with omod
1829 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1830 Src1Mod:$src1_modifiers, Src1RC:$src1,
1831 clampmod:$clamp, omod:$omod,
1832 dst_sel:$dst_sel, dst_unused:$dst_unused,
1833 src0_sel:$src0_sel, src1_sel:$src1_sel))),
1834 (ins)/* endif */)));
1837 // Outs for DPP and SDWA
1838 class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCExt> {
1839 dag ret = !if(HasDst,
1840 !if(!eq(DstVT.Size, 1),
1841 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions
1842 (outs DstRCExt:$vdst)),
1847 class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> {
1848 dag ret = !if(HasDst,
1849 !if(!eq(DstVT.Size, 1),
1850 (outs DstRCSDWA:$sdst),
1851 (outs DstRCSDWA:$vdst)),
1855 // Returns the assembly string for the inputs and outputs of a VOP[12C]
1856 // instruction. This does not add the _e32 suffix, so it can be reused
1858 class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
1859 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
1860 string src0 = ", $src0";
1861 string src1 = ", $src1";
1862 string src2 = ", $src2";
1863 string ret = !if(HasDst, dst, "") #
1864 !if(!eq(NumSrcArgs, 1), src0, "") #
1865 !if(!eq(NumSrcArgs, 2), src0#src1, "") #
1866 !if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
1869 // Returns the assembly string for the inputs and outputs of a VOP3
1871 class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
1872 bit HasOMod, ValueType DstVT = i32> {
1873 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
1874 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
1875 string src1 = !if(!eq(NumSrcArgs, 1), "",
1876 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
1877 " $src1_modifiers,"));
1878 string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
1879 string iclamp = !if(HasIntClamp, "$clamp", "");
1881 !if(!eq(HasModifiers, 0),
1882 getAsm32<HasDst, NumSrcArgs, DstVT>.ret # iclamp,
1883 dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", ""));
1886 // Returns the assembly string for the inputs and outputs of a VOP3P
1888 class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
1889 bit HasClamp, ValueType DstVT = i32> {
1890 string dst = " $vdst";
1891 string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
1892 string src1 = !if(!eq(NumSrcArgs, 1), "",
1893 !if(!eq(NumSrcArgs, 2), " $src1",
1895 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
1897 string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
1898 string clamp = !if(HasClamp, "$clamp", "");
1900 // Each modifier is printed as an array of bits for each operand, so
1901 // all operands are printed as part of src0_modifiers.
1902 string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp;
1905 class getAsmVOP3OpSel <int NumSrcArgs,
1910 string dst = " $vdst";
1912 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
1913 string isrc1 = !if(!eq(NumSrcArgs, 1), "",
1914 !if(!eq(NumSrcArgs, 2), " $src1",
1916 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
1918 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
1919 string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
1920 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
1921 " $src1_modifiers,"));
1922 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
1924 string src0 = !if(Src0HasMods, fsrc0, isrc0);
1925 string src1 = !if(Src1HasMods, fsrc1, isrc1);
1926 string src2 = !if(Src2HasMods, fsrc2, isrc2);
1928 string clamp = !if(HasClamp, "$clamp", "");
1930 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp;
1933 class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
1934 string dst = !if(HasDst,
1935 !if(!eq(DstVT.Size, 1),
1938 ""); // use $sdst for VOPC
1939 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
1940 string src1 = !if(!eq(NumSrcArgs, 1), "",
1941 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
1942 " $src1_modifiers,"));
1943 string args = !if(!eq(HasModifiers, 0),
1944 getAsm32<0, NumSrcArgs, DstVT>.ret,
1946 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
1949 class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
1950 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi";
1953 class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
1954 string dst = !if(HasDst,
1955 !if(!eq(DstVT.Size, 1),
1958 ""); // use $sdst for VOPC
1959 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
1960 string src1 = !if(!eq(NumSrcArgs, 1), "",
1961 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
1962 " $src1_modifiers,"));
1963 string args = !if(!eq(HasModifiers, 0),
1964 getAsm32<0, NumSrcArgs, DstVT>.ret,
1966 string ret = dst#args#"$dpp8$fi";
1969 class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
1970 string dst = !if(HasDst,
1971 !if(!eq(DstVT.Size, 1),
1972 " vcc", // use vcc token as dst for VOPC instructioins
1975 string src0 = "$src0_modifiers";
1976 string src1 = "$src1_modifiers";
1977 string args = !if(!eq(NumSrcArgs, 0),
1979 !if(!eq(NumSrcArgs, 1),
1981 ", "#src0#", "#src1#"$clamp"
1984 string sdwa = !if(!eq(NumSrcArgs, 0),
1986 !if(!eq(NumSrcArgs, 1),
1987 " $dst_sel $dst_unused $src0_sel",
1988 !if(!eq(DstVT.Size, 1),
1989 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC
1990 " $dst_sel $dst_unused $src0_sel $src1_sel"
1994 string ret = dst#args#sdwa;
1997 class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs,
1998 ValueType DstVT = i32> {
1999 string dst = !if(HasDst,
2000 !if(!eq(DstVT.Size, 1),
2004 string src0 = "$src0_modifiers";
2005 string src1 = "$src1_modifiers";
2006 string out_mods = !if(!eq(HasOMod, 0), "$clamp", "$clamp$omod");
2007 string args = !if(!eq(NumSrcArgs, 0), "",
2008 !if(!eq(NumSrcArgs, 1),
2013 string sdwa = !if(!eq(NumSrcArgs, 0), "",
2014 !if(!eq(NumSrcArgs, 1),
2015 out_mods#" $dst_sel $dst_unused $src0_sel",
2016 !if(!eq(DstVT.Size, 1),
2017 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC
2018 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel"
2022 string ret = dst#args#sdwa;
2026 // Function that checks if instruction supports DPP and SDWA
2027 class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2028 ValueType Src1VT = i32> {
2029 bit ret = !if(!eq(NumSrcArgs, 3),
2030 0, // NumSrcArgs == 3 - No DPP or SDWA for VOP3
2031 !if(!eq(DstVT.Size, 64),
2032 0, // 64-bit dst - No DPP or SDWA for 64-bit operands
2033 !if(!eq(Src0VT.Size, 64),
2035 !if(!eq(Src1VT.Size, 64),
2044 class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2045 ValueType Src1VT = i32> {
2046 bit ret = !if(!eq(NumSrcArgs, 0), 0,
2047 getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
2050 class BitOr<bit a, bit b> {
2051 bit ret = !if(a, 1, !if(b, 1, 0));
2054 class BitAnd<bit a, bit b> {
2055 bit ret = !if(a, !if(b, 1, 0), 0);
2063 class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
2064 bit _EnableClamp = 0> {
2066 field list<ValueType> ArgVT = _ArgVT;
2067 field bit EnableF32SrcMods = _EnableF32SrcMods;
2068 field bit EnableClamp = _EnableClamp;
2070 field ValueType DstVT = ArgVT[0];
2071 field ValueType Src0VT = ArgVT[1];
2072 field ValueType Src1VT = ArgVT[2];
2073 field ValueType Src2VT = ArgVT[3];
2074 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
2075 field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret;
2076 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
2077 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
2078 field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret;
2079 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
2080 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
2081 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
2082 field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
2083 field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
2084 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
2085 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
2086 field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret;
2087 field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret;
2088 field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret;
2089 field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret;
2090 field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
2091 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
2092 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
2095 field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
2096 field bit HasDst32 = HasDst;
2097 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
2098 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
2099 field bit HasSrc0 = !if(!eq(Src0VT.Value, untyped.Value), 0, 1);
2100 field bit HasSrc1 = !if(!eq(Src1VT.Value, untyped.Value), 0, 1);
2101 field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
2103 // TODO: Modifiers logic is somewhat adhoc here, to be refined later
2104 // HasModifiers affects the normal and DPP encodings. We take note of EnableF32SrcMods, which
2105 // enables modifiers for i32 type.
2106 field bit HasModifiers = BitOr<isModifierType<Src0VT>.ret, EnableF32SrcMods>.ret;
2108 // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods.
2109 field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
2110 field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
2111 field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret;
2113 // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods.
2114 field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
2115 field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
2116 field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
2118 field bit HasSrc0Mods = HasModifiers;
2119 field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
2120 field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
2122 field bit HasClamp = BitOr<isModifierType<Src0VT>.ret, EnableClamp>.ret;
2123 field bit HasSDWAClamp = EmitDst;
2124 field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
2125 field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp);
2126 field bit HasClampLo = HasClamp;
2127 field bit HasClampHi = BitAnd<isPackedType<DstVT>.ret, HasClamp>.ret;
2128 field bit HasHigh = 0;
2130 field bit IsPacked = isPackedType<Src0VT>.ret;
2131 field bit HasOpSel = IsPacked;
2132 field bit HasOMod = !if(HasOpSel, 0, isFloatType<DstVT>.ret);
2133 field bit HasSDWAOMod = isFloatType<DstVT>.ret;
2135 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2136 field bit HasExtDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2137 field bit HasExtSDWA = HasExt;
2138 field bit HasExtSDWA9 = HasExt;
2139 field int NeedPatGen = PatGenMode.NoPattern;
2141 field bit IsMAI = 0;
2142 field bit IsDOT = 0;
2144 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
2145 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
2146 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
2148 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
2150 // VOP3b instructions are a special case with a second explicit
2151 // output. This is manually overridden for them.
2152 field dag Outs32 = Outs;
2153 field dag Outs64 = Outs;
2154 field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
2155 field dag OutsDPP8 = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
2156 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
2158 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
2159 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
2160 HasIntClamp, HasModifiers, HasSrc2Mods,
2161 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
2162 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
2163 NumSrcArgs, HasClamp,
2164 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
2165 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
2168 getOpSelMod<Src0VT>.ret,
2169 getOpSelMod<Src1VT>.ret,
2170 getOpSelMod<Src2VT>.ret>.ret;
2171 field dag InsDPP = !if(HasExtDPP,
2172 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
2173 HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
2175 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
2176 HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
2177 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 0,
2178 Src0ModDPP, Src1ModDPP>.ret;
2179 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
2180 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
2184 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
2185 field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
2186 field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
2187 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
2191 HasSrc2FloatMods>.ret;
2192 field string AsmDPP = !if(HasExtDPP,
2193 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
2194 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
2195 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0, DstVT>.ret;
2196 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
2197 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
2199 field string TieRegDPP = "$old";
2202 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
2206 let HasExtSDWA9 = 0;
2209 class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.Pattern> : VOPProfile <p.ArgVT> {
2210 let NeedPatGen = mode;
2213 def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
2214 def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
2215 def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
2217 def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
2218 def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
2219 def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
2220 def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
2222 def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
2223 def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
2225 def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
2227 def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
2228 def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
2229 def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
2231 def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
2232 def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
2233 def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
2234 def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
2236 def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
2238 def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
2240 def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
2241 def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
2242 def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
2243 def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
2244 def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
2245 def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
2246 def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
2247 def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
2248 def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
2249 def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
2250 def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
2252 def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
2253 def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
2254 def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
2255 def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
2256 def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
2257 def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
2258 def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
2259 def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
2260 def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], 0, /*EnableClamp=*/1>;
2261 def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
2262 def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
2264 def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
2265 def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
2266 def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
2268 def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>;
2269 def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
2270 def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
2271 def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
2272 def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
2273 def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
2274 def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
2275 def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
2276 def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
2278 def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>;
2279 def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>;
2281 def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>;
2282 def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>;
2283 def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>;
2284 def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>;
2285 def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>;
2286 def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>;
2287 def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>;
2288 def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>;
2289 def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>;
2290 def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>;
2291 def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>;
2292 def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>;
2294 class Commutable_REV <string revOp, bit isOrig> {
2295 string RevOp = revOp;
2296 bit IsOrig = isOrig;
2299 class AtomicNoRet <string noRetOp, bit isRet> {
2300 string NoRetOp = noRetOp;
2304 //===----------------------------------------------------------------------===//
2305 // Interpolation opcodes
2306 //===----------------------------------------------------------------------===//
2308 class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">;
2310 class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
2311 VINTRPCommon <outs, ins, "", pattern>,
2312 SIMCInstr<opName, SIEncodingFamily.NONE> {
2314 let isCodeGenOnly = 1;
2317 // FIXME-GFX10: WIP.
2318 class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
2319 string asm, int encodingFamily> :
2320 VINTRPCommon <outs, ins, asm, []>,
2322 SIMCInstr<opName, encodingFamily> {
2323 let DisableDecoder = DisableSIDecoder;
2326 class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
2328 VINTRPCommon <outs, ins, asm, []>,
2330 SIMCInstr<opName, SIEncodingFamily.VI> {
2331 let AssemblerPredicate = VIAssemblerPredicate;
2332 let DecoderNamespace = "GFX8";
2333 let DisableDecoder = DisableVIDecoder;
2336 // FIXME-GFX10: WIP.
2337 multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
2338 list<dag> pattern = []> {
2339 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
2341 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
2342 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>;
2343 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
2345 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
2347 let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
2348 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>;
2349 } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
2351 //===----------------------------------------------------------------------===//
2352 // Vector instruction mappings
2353 //===----------------------------------------------------------------------===//
2355 // Maps an opcode in e32 form to its e64 equivalent
2356 def getVOPe64 : InstrMapping {
2357 let FilterClass = "VOP";
2358 let RowFields = ["OpName"];
2359 let ColFields = ["Size", "VOP3"];
2360 let KeyCol = ["4", "0"];
2361 let ValueCols = [["8", "1"]];
2364 // Maps an opcode in e64 form to its e32 equivalent
2365 def getVOPe32 : InstrMapping {
2366 let FilterClass = "VOP";
2367 let RowFields = ["OpName"];
2368 let ColFields = ["Size", "VOP3"];
2369 let KeyCol = ["8", "1"];
2370 let ValueCols = [["4", "0"]];
2373 // Maps ordinary instructions to their SDWA counterparts
2374 def getSDWAOp : InstrMapping {
2375 let FilterClass = "VOP";
2376 let RowFields = ["OpName"];
2377 let ColFields = ["AsmVariantName"];
2378 let KeyCol = ["Default"];
2379 let ValueCols = [["SDWA"]];
2382 // Maps SDWA instructions to their ordinary counterparts
2383 def getBasicFromSDWAOp : InstrMapping {
2384 let FilterClass = "VOP";
2385 let RowFields = ["OpName"];
2386 let ColFields = ["AsmVariantName"];
2387 let KeyCol = ["SDWA"];
2388 let ValueCols = [["Default"]];
2391 // Maps ordinary instructions to their DPP counterparts
2392 def getDPPOp32 : InstrMapping {
2393 let FilterClass = "VOP";
2394 let RowFields = ["OpName"];
2395 let ColFields = ["AsmVariantName"];
2396 let KeyCol = ["Default"];
2397 let ValueCols = [["DPP"]];
2400 // Maps an commuted opcode to its original version
2401 def getCommuteOrig : InstrMapping {
2402 let FilterClass = "Commutable_REV";
2403 let RowFields = ["RevOp"];
2404 let ColFields = ["IsOrig"];
2406 let ValueCols = [["1"]];
2409 // Maps an original opcode to its commuted version
2410 def getCommuteRev : InstrMapping {
2411 let FilterClass = "Commutable_REV";
2412 let RowFields = ["RevOp"];
2413 let ColFields = ["IsOrig"];
2415 let ValueCols = [["0"]];
2418 def getMCOpcodeGen : InstrMapping {
2419 let FilterClass = "SIMCInstr";
2420 let RowFields = ["PseudoInstr"];
2421 let ColFields = ["Subtarget"];
2422 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
2423 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
2424 [!cast<string>(SIEncodingFamily.VI)],
2425 [!cast<string>(SIEncodingFamily.SDWA)],
2426 [!cast<string>(SIEncodingFamily.SDWA9)],
2427 // GFX80 encoding is added to work around a multiple matching
2428 // issue for buffer instructions with unpacked d16 data. This
2429 // does not actually change the encoding, and thus may be
2431 [!cast<string>(SIEncodingFamily.GFX80)],
2432 [!cast<string>(SIEncodingFamily.GFX9)],
2433 [!cast<string>(SIEncodingFamily.GFX10)],
2434 [!cast<string>(SIEncodingFamily.SDWA10)]];
2437 // Get equivalent SOPK instruction.
2438 def getSOPKOp : InstrMapping {
2439 let FilterClass = "SOPKInstTable";
2440 let RowFields = ["BaseCmpOp"];
2441 let ColFields = ["IsSOPK"];
2443 let ValueCols = [["1"]];
2446 def getAddr64Inst : InstrMapping {
2447 let FilterClass = "MUBUFAddr64Table";
2448 let RowFields = ["OpName"];
2449 let ColFields = ["IsAddr64"];
2451 let ValueCols = [["1"]];
2454 def getIfAddr64Inst : InstrMapping {
2455 let FilterClass = "MUBUFAddr64Table";
2456 let RowFields = ["OpName"];
2457 let ColFields = ["IsAddr64"];
2459 let ValueCols = [["1"]];
2462 def getMUBUFNoLdsInst : InstrMapping {
2463 let FilterClass = "MUBUFLdsTable";
2464 let RowFields = ["OpName"];
2465 let ColFields = ["IsLds"];
2467 let ValueCols = [["0"]];
2470 // Maps an atomic opcode to its version with a return value.
2471 def getAtomicRetOp : InstrMapping {
2472 let FilterClass = "AtomicNoRet";
2473 let RowFields = ["NoRetOp"];
2474 let ColFields = ["IsRet"];
2476 let ValueCols = [["1"]];
2479 // Maps an atomic opcode to its returnless version.
2480 def getAtomicNoRetOp : InstrMapping {
2481 let FilterClass = "AtomicNoRet";
2482 let RowFields = ["NoRetOp"];
2483 let ColFields = ["IsRet"];
2485 let ValueCols = [["0"]];
2488 // Maps a GLOBAL to its SADDR form.
2489 def getGlobalSaddrOp : InstrMapping {
2490 let FilterClass = "GlobalSaddrTable";
2491 let RowFields = ["SaddrOp"];
2492 let ColFields = ["IsSaddr"];
2494 let ValueCols = [["1"]];
2497 // Maps a v_cmpx opcode with sdst to opcode without sdst.
2498 def getVCMPXNoSDstOp : InstrMapping {
2499 let FilterClass = "VCMPXNoSDstTable";
2500 let RowFields = ["NoSDstOp"];
2501 let ColFields = ["HasSDst"];
2503 let ValueCols = [["0"]];
2506 // Maps a SOPP to a SOPP with S_NOP
2507 def getSOPPWithRelaxation : InstrMapping {
2508 let FilterClass = "Base_SOPP";
2509 let RowFields = ["AsmString"];
2510 let ColFields = ["Size"];
2512 let ValueCols = [["8"]];
2515 include "SIInstructions.td"
2517 include "DSInstructions.td"
2518 include "MIMGInstructions.td"