1 //===-- DSInstructions.td - DS Instruction Definitions --------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
10 InstSI <outs, ins, "", pattern>,
11 SIMCInstr <opName, SIEncodingFamily.NONE> {
17 let UseNamedOperandTable = 1;
19 // Most instruction load and store data, so set this as the default.
23 let hasSideEffects = 0;
24 let SchedRW = [WriteLDS];
27 let isCodeGenOnly = 1;
29 string Mnemonic = opName;
30 string AsmOperands = asmOps;
32 // Well these bits a kind of hack because it would be more natural
33 // to test "outs" and "ins" dags for the presence of particular operands
36 bits<1> has_data0 = 1;
37 bits<1> has_data1 = 1;
39 bits<1> has_gws_data0 = 0; // data0 is encoded as addr
41 bits<1> has_offset = 1; // has "offset" that should be split to offset0,1
42 bits<1> has_offset0 = 1;
43 bits<1> has_offset1 = 1;
46 bits<1> gdsValue = 0; // if has_gds == 0 set gds to this value
48 bits<1> has_m0_read = 1;
50 let Uses = !if(has_m0_read, [M0, EXEC], [EXEC]);
53 class DS_Real <DS_Pseudo ps, string opName = ps.Mnemonic> :
54 InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands>,
58 let isCodeGenOnly = 0;
61 let UseNamedOperandTable = 1;
63 // copy relevant pseudo op flags
65 let SubtargetPredicate = ps.SubtargetPredicate;
66 let WaveSizePredicate = ps.WaveSizePredicate;
67 let OtherPredicates = ps.OtherPredicates;
68 let TSFlags = ps.TSFlags;
69 let SchedRW = ps.SchedRW;
70 let mayLoad = ps.mayLoad;
71 let mayStore = ps.mayStore;
72 let IsAtomicRet = ps.IsAtomicRet;
73 let IsAtomicNoRet = ps.IsAtomicNoRet;
77 let Constraints = ps.Constraints;
78 let DisableEncoding = ps.DisableEncoding;
90 let offset0 = !if(ps.has_offset, offset{7-0}, ?);
91 let offset1 = !if(ps.has_offset, offset{15-8}, ?);
93 bits<1> acc = !if(ps.has_vdst, vdst{9},
94 !if(!or(ps.has_data0, ps.has_gws_data0), data0{9}, 0));
97 // DS Pseudo instructions
99 class DS_0A1D_NORET<string opName, RegisterClass rc = VGPR_32>
102 (ins getLdStRegisterOperand<rc>.ret:$data0, Offset:$offset, gds:$gds),
103 " $data0$offset$gds"> {
110 class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
113 (ins VGPR_32:$addr, getLdStRegisterOperand<rc>.ret:$data0, Offset:$offset, gds:$gds),
114 " $addr, $data0$offset$gds"> {
118 let IsAtomicNoRet = 1;
121 multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
122 def "" : DS_1A1D_NORET<opName, rc>;
124 let has_m0_read = 0 in {
125 def _gfx9 : DS_1A1D_NORET<opName, rc>;
129 multiclass DS_1A1D_NORET_mc_gfx9<string opName, RegisterClass rc = VGPR_32> {
130 let has_m0_read = 0 in {
131 def "" : DS_1A1D_NORET<opName, rc>;
135 class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32,
136 RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
139 (ins VGPR_32:$addr, data_op:$data0, data_op:$data1, Offset:$offset, gds:$gds),
140 " $addr, $data0, $data1$offset$gds"> {
143 let IsAtomicNoRet = 1;
146 multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
147 def "" : DS_1A2D_NORET<opName, rc>;
149 let has_m0_read = 0 in {
150 def _gfx9 : DS_1A2D_NORET<opName, rc>;
154 class DS_1A2D_Off8_NORET <string opName, RegisterClass rc = VGPR_32,
155 RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
158 (ins VGPR_32:$addr, data_op:$data0, data_op:$data1,
159 Offset0:$offset0, Offset1:$offset1, gds:$gds),
160 " $addr, $data0, $data1$offset0$offset1$gds"> {
166 multiclass DS_1A2D_Off8_NORET_mc <string opName, RegisterClass rc = VGPR_32> {
167 def "" : DS_1A2D_Off8_NORET<opName, rc>;
169 let has_m0_read = 0 in {
170 def _gfx9 : DS_1A2D_Off8_NORET<opName, rc>;
174 class DS_0A1D_RET_GDS<string opName, RegisterClass rc = VGPR_32, RegisterClass src = rc,
175 RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
176 RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
179 (ins src_op:$data0, Offset:$offset),
180 " $vdst, $data0$offset gds"> {
186 let hasSideEffects = 1;
189 class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32,
190 RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
192 (outs data_op:$vdst),
193 (ins VGPR_32:$addr, data_op:$data0, Offset:$offset, gds:$gds),
194 " $vdst, $addr, $data0$offset$gds"> {
196 let hasPostISelHook = 1;
201 multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32> {
202 def "" : DS_1A1D_RET<opName, rc>;
204 let has_m0_read = 0 in {
205 def _gfx9 : DS_1A1D_RET<opName, rc>;
209 multiclass DS_1A1D_RET_mc_gfx9 <string opName, RegisterClass rc = VGPR_32> {
210 let has_m0_read = 0 in {
211 def "" : DS_1A1D_RET<opName, rc>;
215 class DS_1A2D_RET<string opName,
216 RegisterClass rc = VGPR_32,
217 RegisterClass src = rc,
218 RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
219 RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
222 (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, Offset:$offset, gds:$gds),
223 " $vdst, $addr, $data0, $data1$offset$gds"> {
225 let hasPostISelHook = 1;
229 multiclass DS_1A2D_RET_mc<string opName,
230 RegisterClass rc = VGPR_32,
231 RegisterClass src = rc> {
232 def "" : DS_1A2D_RET<opName, rc, src>;
234 let has_m0_read = 0 in {
235 def _gfx9 : DS_1A2D_RET<opName, rc, src>;
239 class DS_1A2D_Off8_RET<string opName,
240 RegisterClass rc = VGPR_32,
241 RegisterClass src = rc,
242 RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
243 RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
246 (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, Offset0:$offset0, Offset1:$offset1, gds:$gds),
247 " $vdst, $addr, $data0, $data1$offset0$offset1$gds"> {
250 let hasPostISelHook = 1;
253 multiclass DS_1A2D_Off8_RET_mc<string opName,
254 RegisterClass rc = VGPR_32,
255 RegisterClass src = rc> {
256 def "" : DS_1A2D_Off8_RET<opName, rc, src>;
258 let has_m0_read = 0 in {
259 def _gfx9 : DS_1A2D_Off8_RET<opName, rc, src>;
263 class DS_BVH_STACK<string opName>
265 (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst, VGPR_32:$addr),
266 (ins VGPR_32:$addr_in, getLdStRegisterOperand<VGPR_32>.ret:$data0, VReg_128:$data1, Offset:$offset),
267 " $vdst, $addr, $data0, $data1$offset"> {
268 let Constraints = "$addr = $addr_in";
269 let DisableEncoding = "$addr_in";
272 // TODO: Use MMOs in the LDS address space instead of hasSideEffects = 1.
273 let hasSideEffects = 1;
274 let SchedRW = [WriteLDS, WriteLDS];
277 class DS_1A_RET<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = Offset,
278 RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
280 (outs data_op:$vdst),
282 (ins VGPR_32:$addr, ofs:$offset, gds:$gds, data_op:$vdst_in),
283 (ins VGPR_32:$addr, ofs:$offset, gds:$gds)),
284 " $vdst, $addr$offset$gds"> {
285 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
286 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
291 multiclass DS_1A_RET_mc<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = Offset> {
292 def "" : DS_1A_RET<opName, rc, HasTiedOutput, ofs>;
294 let has_m0_read = 0 in {
295 def _gfx9 : DS_1A_RET<opName, rc, HasTiedOutput, ofs>;
299 class DS_1A_RET_Tied<string opName, RegisterClass rc = VGPR_32> :
300 DS_1A_RET<opName, rc, 1>;
302 class DS_1A_Off8_RET <string opName, RegisterClass rc = VGPR_32>
304 (outs getLdStRegisterOperand<rc>.ret:$vdst),
305 (ins VGPR_32:$addr, Offset0:$offset0, Offset1:$offset1, gds:$gds),
306 " $vdst, $addr$offset0$offset1$gds"> {
313 multiclass DS_1A_Off8_RET_mc <string opName, RegisterClass rc = VGPR_32> {
314 def "" : DS_1A_Off8_RET<opName, rc>;
316 let has_m0_read = 0 in {
317 def _gfx9 : DS_1A_Off8_RET<opName, rc>;
321 class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
322 (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
323 (ins VGPR_32:$addr, Offset:$offset),
324 " $vdst, $addr$offset gds"> {
332 class DS_0A_RET <string opName> : DS_Pseudo<opName,
333 (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
334 (ins Offset:$offset, gds:$gds),
335 " $vdst$offset$gds"> {
345 class DS_1A <string opName> : DS_Pseudo<opName,
347 (ins VGPR_32:$addr, Offset:$offset, gds:$gds),
348 " $addr$offset$gds"> {
358 multiclass DS_1A_mc <string opName> {
359 def "" : DS_1A<opName>;
361 let has_m0_read = 0 in {
362 def _gfx9 : DS_1A<opName>;
367 class DS_GWS <string opName, dag ins, string asmOps>
368 : DS_Pseudo<opName, (outs), ins, asmOps> {
380 class DS_GWS_0D <string opName>
382 (ins Offset:$offset), "$offset gds"> {
383 let hasSideEffects = 1;
386 class DS_GWS_1D <string opName>
388 (ins getLdStRegisterOperand<VGPR_32>.ret:$data0, Offset:$offset),
389 " $data0$offset gds"> {
391 let has_gws_data0 = 1;
392 let hasSideEffects = 1;
395 class DS_VOID <string opName> : DS_Pseudo<opName,
399 let hasSideEffects = 1;
400 let UseNamedOperandTable = 0;
412 class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag,
413 RegisterOperand data_op = getLdStRegisterOperand<VGPR_32>.ret>
415 (outs data_op:$vdst),
416 (ins VGPR_32:$addr, data_op:$data0, Offset:$offset),
417 " $vdst, $addr, $data0$offset",
419 (node (DS1Addr1Offset i32:$addr, i32:$offset), i32:$data0))] > {
423 let isConvergent = 1;
429 class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, int complexity = 0,
430 bit gds=0> : GCNPat <(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
431 (inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))> {
432 let AddedComplexity = complexity;
435 defm DS_ADD_U32 : DS_1A1D_NORET_mc<"ds_add_u32">;
436 defm DS_SUB_U32 : DS_1A1D_NORET_mc<"ds_sub_u32">;
437 defm DS_RSUB_U32 : DS_1A1D_NORET_mc<"ds_rsub_u32">;
438 defm DS_INC_U32 : DS_1A1D_NORET_mc<"ds_inc_u32">;
439 defm DS_DEC_U32 : DS_1A1D_NORET_mc<"ds_dec_u32">;
440 defm DS_MIN_I32 : DS_1A1D_NORET_mc<"ds_min_i32">;
441 defm DS_MAX_I32 : DS_1A1D_NORET_mc<"ds_max_i32">;
442 defm DS_MIN_U32 : DS_1A1D_NORET_mc<"ds_min_u32">;
443 defm DS_MAX_U32 : DS_1A1D_NORET_mc<"ds_max_u32">;
444 defm DS_AND_B32 : DS_1A1D_NORET_mc<"ds_and_b32">;
445 defm DS_OR_B32 : DS_1A1D_NORET_mc<"ds_or_b32">;
446 defm DS_XOR_B32 : DS_1A1D_NORET_mc<"ds_xor_b32">;
448 let SubtargetPredicate = HasLDSFPAtomicAddF32 in {
449 defm DS_ADD_F32 : DS_1A1D_NORET_mc<"ds_add_f32">;
452 defm DS_MIN_F32 : DS_1A1D_NORET_mc<"ds_min_f32">;
453 defm DS_MAX_F32 : DS_1A1D_NORET_mc<"ds_max_f32">;
456 defm DS_WRITE_B8 : DS_1A1D_NORET_mc<"ds_write_b8">;
457 defm DS_WRITE_B16 : DS_1A1D_NORET_mc<"ds_write_b16">;
458 defm DS_WRITE_B32 : DS_1A1D_NORET_mc<"ds_write_b32">;
459 defm DS_WRITE2_B32 : DS_1A2D_Off8_NORET_mc<"ds_write2_b32">;
460 defm DS_WRITE2ST64_B32: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b32">;
463 let has_m0_read = 0 in {
465 let SubtargetPredicate = HasD16LoadStore in {
466 def DS_WRITE_B8_D16_HI : DS_1A1D_NORET<"ds_write_b8_d16_hi">;
467 def DS_WRITE_B16_D16_HI : DS_1A1D_NORET<"ds_write_b16_d16_hi">;
470 } // End has_m0_read = 0
472 let SubtargetPredicate = HasDSAddTid in {
473 def DS_WRITE_ADDTID_B32 : DS_0A1D_NORET<"ds_write_addtid_b32">;
478 let SubtargetPredicate = HasLdsAtomicAddF64 in {
479 defm DS_ADD_F64 : DS_1A1D_NORET_mc_gfx9<"ds_add_f64", VReg_64>;
480 defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", VReg_64>;
481 } // End SubtargetPredicate = HasLdsAtomicAddF64
483 let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
484 defm DS_PK_ADD_F16 : DS_1A1D_NORET_mc<"ds_pk_add_f16">;
485 defm DS_PK_ADD_RTN_F16 : DS_1A1D_RET_mc<"ds_pk_add_rtn_f16", VGPR_32>;
486 defm DS_PK_ADD_BF16 : DS_1A1D_NORET_mc<"ds_pk_add_bf16">;
487 defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc<"ds_pk_add_rtn_bf16", VGPR_32>;
488 } // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
490 defm DS_CMPSTORE_B32 : DS_1A2D_NORET_mc<"ds_cmpstore_b32">;
491 defm DS_CMPSTORE_F32 : DS_1A2D_NORET_mc<"ds_cmpstore_f32">;
492 defm DS_CMPSTORE_B64 : DS_1A2D_NORET_mc<"ds_cmpstore_b64", VReg_64>;
493 defm DS_CMPSTORE_F64 : DS_1A2D_NORET_mc<"ds_cmpstore_f64", VReg_64>;
494 defm DS_CMPSTORE_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b32", VGPR_32>;
495 defm DS_CMPSTORE_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f32", VGPR_32>;
496 defm DS_CMPSTORE_RTN_B64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b64", VReg_64>;
497 defm DS_CMPSTORE_RTN_F64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f64", VReg_64>;
499 defm DS_MSKOR_B32 : DS_1A2D_NORET_mc<"ds_mskor_b32">;
500 defm DS_CMPST_B32 : DS_1A2D_NORET_mc<"ds_cmpst_b32">;
501 defm DS_CMPST_F32 : DS_1A2D_NORET_mc<"ds_cmpst_f32">;
503 defm DS_ADD_U64 : DS_1A1D_NORET_mc<"ds_add_u64", VReg_64>;
504 defm DS_SUB_U64 : DS_1A1D_NORET_mc<"ds_sub_u64", VReg_64>;
505 defm DS_RSUB_U64 : DS_1A1D_NORET_mc<"ds_rsub_u64", VReg_64>;
506 defm DS_INC_U64 : DS_1A1D_NORET_mc<"ds_inc_u64", VReg_64>;
507 defm DS_DEC_U64 : DS_1A1D_NORET_mc<"ds_dec_u64", VReg_64>;
508 defm DS_MIN_I64 : DS_1A1D_NORET_mc<"ds_min_i64", VReg_64>;
509 defm DS_MAX_I64 : DS_1A1D_NORET_mc<"ds_max_i64", VReg_64>;
510 defm DS_MIN_U64 : DS_1A1D_NORET_mc<"ds_min_u64", VReg_64>;
511 defm DS_MAX_U64 : DS_1A1D_NORET_mc<"ds_max_u64", VReg_64>;
512 defm DS_AND_B64 : DS_1A1D_NORET_mc<"ds_and_b64", VReg_64>;
513 defm DS_OR_B64 : DS_1A1D_NORET_mc<"ds_or_b64", VReg_64>;
514 defm DS_XOR_B64 : DS_1A1D_NORET_mc<"ds_xor_b64", VReg_64>;
515 defm DS_MSKOR_B64 : DS_1A2D_NORET_mc<"ds_mskor_b64", VReg_64>;
517 defm DS_WRITE_B64 : DS_1A1D_NORET_mc<"ds_write_b64", VReg_64>;
518 defm DS_WRITE2_B64 : DS_1A2D_Off8_NORET_mc<"ds_write2_b64", VReg_64>;
519 defm DS_WRITE2ST64_B64: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b64", VReg_64>;
521 defm DS_CMPST_B64 : DS_1A2D_NORET_mc<"ds_cmpst_b64", VReg_64>;
522 defm DS_CMPST_F64 : DS_1A2D_NORET_mc<"ds_cmpst_f64", VReg_64>;
523 defm DS_MIN_F64 : DS_1A1D_NORET_mc<"ds_min_f64", VReg_64>;
524 defm DS_MAX_F64 : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>;
526 defm DS_ADD_RTN_U32 : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32>;
528 let SubtargetPredicate = HasLDSFPAtomicAddF32 in {
529 defm DS_ADD_RTN_F32 : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32>;
531 defm DS_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32>;
532 defm DS_RSUB_RTN_U32 : DS_1A1D_RET_mc<"ds_rsub_rtn_u32", VGPR_32>;
533 defm DS_INC_RTN_U32 : DS_1A1D_RET_mc<"ds_inc_rtn_u32", VGPR_32>;
534 defm DS_DEC_RTN_U32 : DS_1A1D_RET_mc<"ds_dec_rtn_u32", VGPR_32>;
535 defm DS_MIN_RTN_I32 : DS_1A1D_RET_mc<"ds_min_rtn_i32", VGPR_32>;
536 defm DS_MAX_RTN_I32 : DS_1A1D_RET_mc<"ds_max_rtn_i32", VGPR_32>;
537 defm DS_MIN_RTN_U32 : DS_1A1D_RET_mc<"ds_min_rtn_u32", VGPR_32>;
538 defm DS_MAX_RTN_U32 : DS_1A1D_RET_mc<"ds_max_rtn_u32", VGPR_32>;
539 defm DS_AND_RTN_B32 : DS_1A1D_RET_mc<"ds_and_rtn_b32", VGPR_32>;
540 defm DS_OR_RTN_B32 : DS_1A1D_RET_mc<"ds_or_rtn_b32", VGPR_32>;
541 defm DS_XOR_RTN_B32 : DS_1A1D_RET_mc<"ds_xor_rtn_b32", VGPR_32>;
542 defm DS_MSKOR_RTN_B32 : DS_1A2D_RET_mc<"ds_mskor_rtn_b32", VGPR_32>;
543 defm DS_CMPST_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b32", VGPR_32>;
544 defm DS_CMPST_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f32", VGPR_32>;
545 defm DS_MIN_RTN_F32 : DS_1A1D_RET_mc<"ds_min_rtn_f32", VGPR_32>;
546 defm DS_MAX_RTN_F32 : DS_1A1D_RET_mc<"ds_max_rtn_f32", VGPR_32>;
548 defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b32">;
549 defm DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>;
550 defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>;
552 defm DS_ADD_RTN_U64 : DS_1A1D_RET_mc<"ds_add_rtn_u64", VReg_64>;
553 defm DS_SUB_RTN_U64 : DS_1A1D_RET_mc<"ds_sub_rtn_u64", VReg_64>;
554 defm DS_RSUB_RTN_U64 : DS_1A1D_RET_mc<"ds_rsub_rtn_u64", VReg_64>;
555 defm DS_INC_RTN_U64 : DS_1A1D_RET_mc<"ds_inc_rtn_u64", VReg_64>;
556 defm DS_DEC_RTN_U64 : DS_1A1D_RET_mc<"ds_dec_rtn_u64", VReg_64>;
557 defm DS_MIN_RTN_I64 : DS_1A1D_RET_mc<"ds_min_rtn_i64", VReg_64>;
558 defm DS_MAX_RTN_I64 : DS_1A1D_RET_mc<"ds_max_rtn_i64", VReg_64>;
559 defm DS_MIN_RTN_U64 : DS_1A1D_RET_mc<"ds_min_rtn_u64", VReg_64>;
560 defm DS_MAX_RTN_U64 : DS_1A1D_RET_mc<"ds_max_rtn_u64", VReg_64>;
561 defm DS_AND_RTN_B64 : DS_1A1D_RET_mc<"ds_and_rtn_b64", VReg_64>;
562 defm DS_OR_RTN_B64 : DS_1A1D_RET_mc<"ds_or_rtn_b64", VReg_64>;
563 defm DS_XOR_RTN_B64 : DS_1A1D_RET_mc<"ds_xor_rtn_b64", VReg_64>;
564 defm DS_MSKOR_RTN_B64 : DS_1A2D_RET_mc<"ds_mskor_rtn_b64", VReg_64>;
565 defm DS_CMPST_RTN_B64 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b64", VReg_64>;
566 defm DS_CMPST_RTN_F64 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f64", VReg_64>;
567 defm DS_MIN_RTN_F64 : DS_1A1D_RET_mc<"ds_min_rtn_f64", VReg_64>;
568 defm DS_MAX_RTN_F64 : DS_1A1D_RET_mc<"ds_max_rtn_f64", VReg_64>;
570 defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>;
571 defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>;
572 defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>;
574 let isConvergent = 1, usesCustomInserter = 1 in {
575 def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init"> {
578 def DS_GWS_SEMA_V : DS_GWS_0D<"ds_gws_sema_v">;
579 def DS_GWS_SEMA_BR : DS_GWS_1D<"ds_gws_sema_br">;
580 def DS_GWS_SEMA_P : DS_GWS_0D<"ds_gws_sema_p">;
581 def DS_GWS_BARRIER : DS_GWS_1D<"ds_gws_barrier">;
584 let SubtargetPredicate = HasDsSrc2Insts in {
585 def DS_ADD_SRC2_U32 : DS_1A<"ds_add_src2_u32">;
586 def DS_SUB_SRC2_U32 : DS_1A<"ds_sub_src2_u32">;
587 def DS_RSUB_SRC2_U32 : DS_1A<"ds_rsub_src2_u32">;
588 def DS_INC_SRC2_U32 : DS_1A<"ds_inc_src2_u32">;
589 def DS_DEC_SRC2_U32 : DS_1A<"ds_dec_src2_u32">;
590 def DS_MIN_SRC2_I32 : DS_1A<"ds_min_src2_i32">;
591 def DS_MAX_SRC2_I32 : DS_1A<"ds_max_src2_i32">;
592 def DS_MIN_SRC2_U32 : DS_1A<"ds_min_src2_u32">;
593 def DS_MAX_SRC2_U32 : DS_1A<"ds_max_src2_u32">;
594 def DS_AND_SRC2_B32 : DS_1A<"ds_and_src2_b32">;
595 def DS_OR_SRC2_B32 : DS_1A<"ds_or_src2_b32">;
596 def DS_XOR_SRC2_B32 : DS_1A<"ds_xor_src2_b32">;
597 def DS_MIN_SRC2_F32 : DS_1A<"ds_min_src2_f32">;
598 def DS_MAX_SRC2_F32 : DS_1A<"ds_max_src2_f32">;
600 def DS_ADD_SRC2_U64 : DS_1A<"ds_add_src2_u64">;
601 def DS_SUB_SRC2_U64 : DS_1A<"ds_sub_src2_u64">;
602 def DS_RSUB_SRC2_U64 : DS_1A<"ds_rsub_src2_u64">;
603 def DS_INC_SRC2_U64 : DS_1A<"ds_inc_src2_u64">;
604 def DS_DEC_SRC2_U64 : DS_1A<"ds_dec_src2_u64">;
605 def DS_MIN_SRC2_I64 : DS_1A<"ds_min_src2_i64">;
606 def DS_MAX_SRC2_I64 : DS_1A<"ds_max_src2_i64">;
607 def DS_MIN_SRC2_U64 : DS_1A<"ds_min_src2_u64">;
608 def DS_MAX_SRC2_U64 : DS_1A<"ds_max_src2_u64">;
609 def DS_AND_SRC2_B64 : DS_1A<"ds_and_src2_b64">;
610 def DS_OR_SRC2_B64 : DS_1A<"ds_or_src2_b64">;
611 def DS_XOR_SRC2_B64 : DS_1A<"ds_xor_src2_b64">;
612 def DS_MIN_SRC2_F64 : DS_1A<"ds_min_src2_f64">;
613 def DS_MAX_SRC2_F64 : DS_1A<"ds_max_src2_f64">;
615 def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">;
616 def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">;
617 } // End SubtargetPredicate = HasDsSrc2Insts
619 let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
620 def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, Swizzle>;
623 let mayStore = 0 in {
624 defm DS_READ_I8 : DS_1A_RET_mc<"ds_read_i8">;
625 defm DS_READ_U8 : DS_1A_RET_mc<"ds_read_u8">;
626 defm DS_READ_I16 : DS_1A_RET_mc<"ds_read_i16">;
627 defm DS_READ_U16 : DS_1A_RET_mc<"ds_read_u16">;
628 defm DS_READ_B32 : DS_1A_RET_mc<"ds_read_b32">;
629 defm DS_READ_B64 : DS_1A_RET_mc<"ds_read_b64", VReg_64>;
631 defm DS_READ2_B32 : DS_1A_Off8_RET_mc<"ds_read2_b32", VReg_64>;
632 defm DS_READ2ST64_B32: DS_1A_Off8_RET_mc<"ds_read2st64_b32", VReg_64>;
634 defm DS_READ2_B64 : DS_1A_Off8_RET_mc<"ds_read2_b64", VReg_128>;
635 defm DS_READ2ST64_B64: DS_1A_Off8_RET_mc<"ds_read2st64_b64", VReg_128>;
637 let has_m0_read = 0 in {
638 let SubtargetPredicate = HasD16LoadStore, TiedSourceNotRead = 1 in {
639 def DS_READ_U8_D16 : DS_1A_RET_Tied<"ds_read_u8_d16">;
640 def DS_READ_U8_D16_HI : DS_1A_RET_Tied<"ds_read_u8_d16_hi">;
641 def DS_READ_I8_D16 : DS_1A_RET_Tied<"ds_read_i8_d16">;
642 def DS_READ_I8_D16_HI : DS_1A_RET_Tied<"ds_read_i8_d16_hi">;
643 def DS_READ_U16_D16 : DS_1A_RET_Tied<"ds_read_u16_d16">;
644 def DS_READ_U16_D16_HI : DS_1A_RET_Tied<"ds_read_u16_d16_hi">;
646 } // End has_m0_read = 0
648 let SubtargetPredicate = HasDSAddTid in {
649 def DS_READ_ADDTID_B32 : DS_0A_RET<"ds_read_addtid_b32">;
652 } // End mayStore = 0
654 def DS_CONSUME : DS_0A_RET<"ds_consume">;
655 def DS_APPEND : DS_0A_RET<"ds_append">;
657 let SubtargetPredicate = isNotGFX90APlus in
658 def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">;
660 //===----------------------------------------------------------------------===//
661 // Instruction definitions for CI and newer.
662 //===----------------------------------------------------------------------===//
664 let SubtargetPredicate = isGFX7Plus in {
666 defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>;
667 defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>;
669 let isConvergent = 1, usesCustomInserter = 1 in {
670 def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">;
673 let mayStore = 0 in {
674 defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", VReg_96>;
675 defm DS_READ_B128: DS_1A_RET_mc<"ds_read_b128", VReg_128>;
676 } // End mayStore = 0
679 defm DS_WRITE_B96 : DS_1A1D_NORET_mc<"ds_write_b96", VReg_96>;
680 defm DS_WRITE_B128 : DS_1A1D_NORET_mc<"ds_write_b128", VReg_128>;
683 def DS_NOP : DS_VOID<"ds_nop">;
685 } // let SubtargetPredicate = isGFX7Plus
687 //===----------------------------------------------------------------------===//
688 // Instruction definitions for VI and newer.
689 //===----------------------------------------------------------------------===//
691 let SubtargetPredicate = isGFX8Plus in {
693 let Uses = [EXEC] in {
694 def DS_PERMUTE_B32 : DS_1A1D_PERMUTE <"ds_permute_b32",
695 int_amdgcn_ds_permute>;
696 def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
697 int_amdgcn_ds_bpermute>;
700 } // let SubtargetPredicate = isGFX8Plus
702 let SubtargetPredicate = HasLDSFPAtomicAddF32, OtherPredicates = [HasDsSrc2Insts] in {
703 def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
707 //===----------------------------------------------------------------------===//
708 // Instruction definitions for GFX11.
709 //===----------------------------------------------------------------------===//
711 let SubtargetPredicate = isGFX11Only in {
713 def DS_ADD_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_add_gs_reg_rtn", VReg_64, VGPR_32>;
714 def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VReg_64, VGPR_32>;
716 } // let SubtargetPredicate = isGFX11Only
718 let SubtargetPredicate = isGFX11Plus in {
720 let OtherPredicates = [HasImageInsts] in
721 def DS_BVH_STACK_RTN_B32 : DS_BVH_STACK<"ds_bvh_stack_rtn_b32">;
723 } // let SubtargetPredicate = isGFX11Plus
725 //===----------------------------------------------------------------------===//
726 // Instruction definitions for GFX12 and newer.
727 //===----------------------------------------------------------------------===//
729 let SubtargetPredicate = isGFX12Plus in {
731 defm DS_COND_SUB_U32 : DS_1A1D_NORET_mc<"ds_cond_sub_u32">;
732 defm DS_COND_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_cond_sub_rtn_u32", VGPR_32>;
733 defm DS_SUB_CLAMP_U32 : DS_1A1D_NORET_mc<"ds_sub_clamp_u32">;
734 defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_clamp_rtn_u32", VGPR_32>;
736 multiclass DSAtomicRetNoRetPatIntrinsic_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
737 ValueType vt, string frag> {
738 def : DSAtomicRetPat<inst, vt,
739 !cast<PatFrag>(frag#"_local_addrspace")>;
741 let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
742 def : DSAtomicRetPat<noRetInst, vt,
743 !cast<PatFrag>(frag#"_noret_local_addrspace"), /* complexity */ 1>;
746 defm : DSAtomicRetNoRetPatIntrinsic_mc<DS_COND_SUB_RTN_U32, DS_COND_SUB_U32, i32, "int_amdgcn_atomic_cond_sub_u32">;
747 } // let SubtargetPredicate = isGFX12Plus
749 //===----------------------------------------------------------------------===//
751 //===----------------------------------------------------------------------===//
754 (int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
755 (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16timm $offset16), (i1 0))
758 class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
759 (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
760 (inst $ptr, Offset:$offset, (i1 gds))
763 multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
765 let OtherPredicates = [LDSRequiresM0Init] in {
766 def : DSReadPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
769 let OtherPredicates = [NotLDSRequiresM0Init] in {
770 def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
774 class DSReadPat_D16 <DS_Pseudo inst, PatFrag frag, ValueType vt> : GCNPat <
775 (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in),
776 (inst $ptr, Offset:$offset, (i1 0), $in)
779 defm : DSReadPat_mc <DS_READ_I8, i32, "sextloadi8_local">;
780 defm : DSReadPat_mc <DS_READ_I8, i16, "sextloadi8_local">;
781 defm : DSReadPat_mc <DS_READ_U8, i32, "extloadi8_local">;
782 defm : DSReadPat_mc <DS_READ_U8, i32, "zextloadi8_local">;
783 defm : DSReadPat_mc <DS_READ_U8, i16, "extloadi8_local">;
784 defm : DSReadPat_mc <DS_READ_U8, i16, "zextloadi8_local">;
785 defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
786 defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
787 defm : DSReadPat_mc <DS_READ_U16, i32, "extloadi16_local">;
788 defm : DSReadPat_mc <DS_READ_U16, i32, "zextloadi16_local">;
789 defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
791 foreach vt = Reg32Types.types in {
792 defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
795 defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
796 defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
797 defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
798 defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
799 defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
800 defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
802 let OtherPredicates = [D16PreservesUnusedBits] in {
803 def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>;
804 def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>;
805 def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2i16>;
806 def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2f16>;
807 def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2i16>;
808 def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2f16>;
810 def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2i16>;
811 def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2f16>;
812 def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2i16>;
813 def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2f16>;
814 def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2i16>;
815 def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2f16>;
818 class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
819 (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
820 (inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))
823 multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
824 let OtherPredicates = [LDSRequiresM0Init] in {
825 def : DSWritePat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
828 let OtherPredicates = [NotLDSRequiresM0Init] in {
829 def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
833 defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">;
834 defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
835 defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
836 defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
838 foreach vt = Reg32Types.types in {
839 defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
842 defm : DSWritePat_mc <DS_WRITE_B8, i16, "atomic_store_8_local">;
843 defm : DSWritePat_mc <DS_WRITE_B8, i32, "atomic_store_8_local">;
844 defm : DSWritePat_mc <DS_WRITE_B16, i16, "atomic_store_16_local">;
845 defm : DSWritePat_mc <DS_WRITE_B16, i32, "atomic_store_16_local">;
846 defm : DSWritePat_mc <DS_WRITE_B32, i32, "atomic_store_32_local">;
847 defm : DSWritePat_mc <DS_WRITE_B64, i64, "atomic_store_64_local">;
849 let OtherPredicates = [HasD16LoadStore] in {
850 def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_hi16_local>;
851 def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_hi16_local>;
854 class DS64Bit4ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
855 (vt:$value (frag (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
856 (inst $ptr, $offset0, $offset1, (i1 0))
859 class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
860 (frag vt:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
861 (inst $ptr, (i32 (EXTRACT_SUBREG VReg_64:$value, sub0)),
862 (i32 (EXTRACT_SUBREG VReg_64:$value, sub1)), $offset0, $offset1,
866 class DS128Bit8ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
867 (vt:$value (frag (DS128Bit8ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
868 (inst $ptr, $offset0, $offset1, (i1 0))
871 class DS128Bit8ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
872 (frag vt:$value, (DS128Bit8ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
873 (inst $ptr, (i64 (EXTRACT_SUBREG VReg_128:$value, sub0_sub1)),
874 (i64 (EXTRACT_SUBREG VReg_128:$value, sub2_sub3)), $offset0, $offset1,
878 multiclass DS64Bit4ByteAlignedPat_mc<ValueType vt> {
879 let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
880 def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, vt, load_local_m0>;
881 def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, vt, store_local_m0>;
884 let OtherPredicates = [NotLDSRequiresM0Init] in {
885 def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32_gfx9, vt, load_local>;
886 def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32_gfx9, vt, store_local>;
890 multiclass DS128Bit8ByteAlignedPat_mc<ValueType vt> {
891 let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
892 def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64, vt, load_local_m0>;
893 def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64, vt, store_local_m0>;
896 let OtherPredicates = [NotLDSRequiresM0Init] in {
897 def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64_gfx9, vt, load_local>;
898 def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64_gfx9, vt, store_local>;
902 // v2i32 loads are split into i32 loads on SI during lowering, due to a bug
903 // related to bounds checking.
904 foreach vt = VReg_64.RegTypes in {
905 defm : DS64Bit4ByteAlignedPat_mc<vt>;
908 foreach vt = VReg_128.RegTypes in {
909 defm : DS128Bit8ByteAlignedPat_mc<vt>;
912 // Prefer ds_read over ds_read2 and ds_write over ds_write2, all other things
913 // being equal, because it has a larger immediate offset range.
914 let AddedComplexity = 100 in {
916 foreach vt = VReg_64.RegTypes in {
917 defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
918 defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align8_local">;
921 let SubtargetPredicate = isGFX7Plus in {
923 foreach vt = VReg_96.RegTypes in {
924 defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">;
925 defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_align16_local">;
928 foreach vt = VReg_128.RegTypes in {
929 defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">;
930 defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align16_local">;
933 let SubtargetPredicate = HasUnalignedAccessMode in {
935 // Select 64 bit loads and stores aligned less than 4 as a single ds_read_b64/
936 // ds_write_b64 instruction as this is faster than ds_read2_b32/ds_write2_b32
937 // which would be used otherwise. In this case a b32 access would still be
938 // misaligned, but we will have 2 of them.
939 foreach vt = VReg_64.RegTypes in {
940 defm : DSReadPat_mc <DS_READ_B64, vt, "load_align_less_than_4_local">;
941 defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align_less_than_4_local">;
944 // Selection will split most of the unaligned 3 dword accesses due to performance
945 // reasons when beneficial. Keep these two patterns for the rest of the cases.
946 foreach vt = VReg_96.RegTypes in {
947 defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">;
948 defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_local">;
951 // Select 128 bit loads and stores aligned less than 4 as a single ds_read_b128/
952 // ds_write_b128 instruction as this is faster than ds_read2_b64/ds_write2_b64
953 // which would be used otherwise. In this case a b64 access would still be
954 // misaligned, but we will have 2 of them.
955 foreach vt = VReg_128.RegTypes in {
956 defm : DSReadPat_mc <DS_READ_B128, vt, "load_align_less_than_4_local">;
957 defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align_less_than_4_local">;
960 } // End SubtargetPredicate = HasUnalignedAccessMode
962 } // End SubtargetPredicate = isGFX7Plus
964 } // End AddedComplexity = 100
966 multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
967 let OtherPredicates = [LDSRequiresM0Init] in {
968 def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt)>;
971 let OtherPredicates = [NotLDSRequiresM0Init] in {
972 def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
973 !cast<PatFrag>(frag#"_local_"#vt)>;
976 let OtherPredicates = [HasGDS] in {
977 def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt),
978 /* complexity */ 0, /* gds */ 1>;
982 multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
983 ValueType vt, string frag> {
984 let OtherPredicates = [LDSRequiresM0Init] in {
985 def : DSAtomicRetPat<inst, vt,
986 !cast<PatFrag>(frag#"_local_m0_"#vt)>;
987 def : DSAtomicRetPat<noRetInst, vt,
988 !cast<PatFrag>(frag#"_local_m0_noret_"#vt), /* complexity */ 1>;
991 let OtherPredicates = [NotLDSRequiresM0Init] in {
992 def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
993 !cast<PatFrag>(frag#"_local_"#vt)>;
994 def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
995 !cast<PatFrag>(frag#"_local_noret_"#vt), /* complexity */ 1>;
998 let OtherPredicates = [HasGDS] in {
999 def : DSAtomicRetPat<inst, vt,
1000 !cast<PatFrag>(frag#"_region_m0_"#vt),
1001 /* complexity */ 0, /* gds */ 1>;
1002 def : DSAtomicRetPat<noRetInst, vt,
1003 !cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1004 /* complexity */ 1, /* gds */ 1>;
1010 let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1011 // Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode.
1012 class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag,
1013 int complexity = 0, bit gds=0> : GCNPat<
1014 (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
1015 (inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, Offset:$offset, (i1 gds))> {
1016 let AddedComplexity = complexity;
1019 multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
1021 let OtherPredicates = [LDSRequiresM0Init] in {
1022 def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt)>;
1023 def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_local_m0_noret_"#vt),
1024 /* complexity */ 1>;
1027 let OtherPredicates = [NotLDSRequiresM0Init] in {
1028 def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1029 !cast<PatFrag>(frag#"_local_"#vt)>;
1030 def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1031 !cast<PatFrag>(frag#"_local_noret_"#vt),
1032 /* complexity */ 1>;
1035 let OtherPredicates = [HasGDS] in {
1036 def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt),
1037 /* complexity */ 0, /* gds */ 1>;
1038 def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1039 /* complexity */ 1, /* gds */ 1>;
1042 } // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
1044 let SubtargetPredicate = isGFX11Plus in {
1045 // The order of src and cmp agrees with the BUFFER_ATOMIC_CMPSWAP opcode.
1046 class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag,
1047 int complexity = 0, bit gds=0> : GCNPat<
1048 (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
1049 (inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, Offset:$offset, (i1 gds))> {
1050 let AddedComplexity = complexity;
1053 multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt, string frag> {
1055 def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1056 !cast<PatFrag>(frag#"_local_"#vt)>;
1057 def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1058 !cast<PatFrag>(frag#"_local_noret_"#vt), /* complexity */ 1>;
1060 let OtherPredicates = [HasGDS] in {
1061 def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt),
1062 /* complexity */ 0, /* gds */ 1>;
1063 def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1064 /* complexity */ 1, /* gds */ 1>;
1067 } // End SubtargetPredicate = isGFX11Plus
1070 defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap">;
1071 defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U32, DS_ADD_U32, i32, "atomic_load_add">;
1072 defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U32, DS_SUB_U32, i32, "atomic_load_sub">;
1073 defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U32, DS_INC_U32, i32, "atomic_load_uinc_wrap">;
1074 defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U32, DS_DEC_U32, i32, "atomic_load_udec_wrap">;
1075 defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B32, DS_AND_B32, i32, "atomic_load_and">;
1076 defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B32, DS_OR_B32, i32, "atomic_load_or">;
1077 defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B32, DS_XOR_B32, i32, "atomic_load_xor">;
1078 defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I32, DS_MIN_I32, i32, "atomic_load_min">;
1079 defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I32, DS_MAX_I32, i32, "atomic_load_max">;
1080 defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U32, DS_MIN_U32, i32, "atomic_load_umin">;
1081 defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U32, DS_MAX_U32, i32, "atomic_load_umax">;
1082 defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F32, DS_MIN_F32, f32, "atomic_load_fmin">;
1083 defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F32, DS_MAX_F32, f32, "atomic_load_fmax">;
1086 let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
1087 defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_load_fadd">;
1088 defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_BF16, DS_PK_ADD_BF16, v2bf16, "atomic_load_fadd">;
1091 let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1092 defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B32, DS_CMPST_B32, i32, "atomic_cmp_swap">;
1095 let SubtargetPredicate = isGFX11Plus in {
1096 defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B32, DS_CMPSTORE_B32, i32, "atomic_cmp_swap">;
1099 let SubtargetPredicate = HasLDSFPAtomicAddF32 in {
1100 defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_F32, DS_ADD_F32, f32, "atomic_load_fadd">;
1104 defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;
1105 defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U64, DS_ADD_U64, i64, "atomic_load_add">;
1106 defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U64, DS_SUB_U64, i64, "atomic_load_sub">;
1107 defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U64, DS_INC_U64, i64, "atomic_load_uinc_wrap">;
1108 defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U64, DS_DEC_U64, i64, "atomic_load_udec_wrap">;
1109 defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B64, DS_AND_B64, i64, "atomic_load_and">;
1110 defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B64, DS_OR_B64, i64, "atomic_load_or">;
1111 defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B64, DS_XOR_B64, i64, "atomic_load_xor">;
1112 defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I64, DS_MIN_I64, i64, "atomic_load_min">;
1113 defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I64, DS_MAX_I64, i64, "atomic_load_max">;
1114 defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U64, DS_MIN_U64, i64, "atomic_load_umin">;
1115 defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U64, DS_MAX_U64, i64, "atomic_load_umax">;
1116 defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F64, DS_MIN_F64, f64, "atomic_load_fmin">;
1117 defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F64, DS_MAX_F64, f64, "atomic_load_fmax">;
1119 let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1120 defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B64, DS_CMPST_B64, i64, "atomic_cmp_swap">;
1121 } // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
1123 let SubtargetPredicate = isGFX11Plus in {
1124 defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B64, DS_CMPSTORE_B64, i64, "atomic_cmp_swap">;
1125 } // End SubtargetPredicate = isGFX11Plus
1127 let SubtargetPredicate = HasLdsAtomicAddF64 in {
1128 def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_f64>;
1129 let AddedComplexity = 1 in
1130 def : DSAtomicRetPat<DS_ADD_F64, f64, atomic_load_fadd_local_noret_f64>;
1132 class DSAtomicRetPatIntrinsic<DS_Pseudo inst, ValueType vt, PatFrag frag,
1133 bit gds=0> : GCNPat <
1134 (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value)),
1135 (inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))> {
1138 def : DSAtomicRetPatIntrinsic<DS_ADD_RTN_F64, f64, int_amdgcn_flat_atomic_fadd_local_addrspace>;
1139 let AddedComplexity = 1 in
1140 def : DSAtomicRetPatIntrinsic<DS_ADD_F64, f64, int_amdgcn_flat_atomic_fadd_noret_local_addrspace>;
1143 let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
1144 defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_load_fadd">;
1145 } // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
1147 let OtherPredicates = [HasGDS] in
1149 (SIds_ordered_count i32:$value, i16:$offset),
1150 (DS_ORDERED_COUNT $value, (as_i16imm $offset))
1154 (i64 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
1155 (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
1159 (i32 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
1161 (i64 (COPY_TO_REGCLASS
1162 (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
1168 (i64 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
1169 (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
1173 (i32 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
1175 (i64 (COPY_TO_REGCLASS
1176 (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
1181 //===----------------------------------------------------------------------===//
1182 // Target-specific instruction encodings.
1183 //===----------------------------------------------------------------------===//
1185 //===----------------------------------------------------------------------===//
1186 // Base ENC_DS for GFX6, GFX7, GFX10, GFX11, GFX12.
1187 //===----------------------------------------------------------------------===//
1189 class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op, DS_Pseudo ps, int ef,
1190 string opName = ps.Mnemonic,
1192 : DS_Real<ps, opName>, SIMCInstr <ps.PseudoInstr, ef> {
1194 let Inst{7-0} = !if(ps.has_offset0, offset0, 0);
1195 let Inst{15-8} = !if(ps.has_offset1, offset1, 0);
1196 let Inst{17} = !if(ps.has_gds, gds, ps.gdsValue);
1197 let Inst{25-18} = op;
1198 let Inst{31-26} = 0x36;
1199 let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
1200 let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
1201 let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
1202 let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
1204 let gds = !if(hasGDS, ?, 0);
1207 //===----------------------------------------------------------------------===//
1209 //===----------------------------------------------------------------------===//
1211 multiclass DS_Real_gfx12<bits<8> op, string name = !tolower(NAME), bit needAlias = true> {
1212 defvar ps = !cast<DS_Pseudo>(NAME);
1213 let AssemblerPredicate = isGFX12Plus in {
1214 let DecoderNamespace = "GFX12" in
1216 Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, ps, SIEncodingFamily.GFX12,
1217 name, /*hasGDS=*/false>;
1218 if !and(needAlias, !ne(ps.Mnemonic, name)) then
1219 def : AMDGPUMnemonicAlias<ps.Mnemonic, name>;
1220 } // End AssemblerPredicate
1223 defm DS_MIN_F32 : DS_Real_gfx12<0x012, "ds_min_num_f32">;
1224 defm DS_MAX_F32 : DS_Real_gfx12<0x013, "ds_max_num_f32">;
1225 defm DS_MIN_RTN_F32 : DS_Real_gfx12<0x032, "ds_min_num_rtn_f32">;
1226 defm DS_MAX_RTN_F32 : DS_Real_gfx12<0x033, "ds_max_num_rtn_f32">;
1227 defm DS_MIN_F64 : DS_Real_gfx12<0x052, "ds_min_num_f64">;
1228 defm DS_MAX_F64 : DS_Real_gfx12<0x053, "ds_max_num_f64">;
1229 defm DS_MIN_RTN_F64 : DS_Real_gfx12<0x072, "ds_min_num_rtn_f64">;
1230 defm DS_MAX_RTN_F64 : DS_Real_gfx12<0x073, "ds_max_num_rtn_f64">;
1231 defm DS_COND_SUB_U32 : DS_Real_gfx12<0x098>;
1232 defm DS_SUB_CLAMP_U32 : DS_Real_gfx12<0x099>;
1233 defm DS_COND_SUB_RTN_U32 : DS_Real_gfx12<0x0a8>;
1234 defm DS_SUB_CLAMP_RTN_U32 : DS_Real_gfx12<0x0a9>;
1235 defm DS_PK_ADD_F16 : DS_Real_gfx12<0x09a>;
1236 defm DS_PK_ADD_RTN_F16 : DS_Real_gfx12<0x0aa>;
1237 defm DS_PK_ADD_BF16 : DS_Real_gfx12<0x09b>;
1238 defm DS_PK_ADD_RTN_BF16 : DS_Real_gfx12<0x0ab>;
1240 // New aliases added in GFX12 without renaming the instructions.
1241 let AssemblerPredicate = isGFX12Plus in {
1242 def : AMDGPUMnemonicAlias<"ds_subrev_u32", "ds_rsub_u32">;
1243 def : AMDGPUMnemonicAlias<"ds_subrev_rtn_u32", "ds_rsub_rtn_u32">;
1244 def : AMDGPUMnemonicAlias<"ds_subrev_u64", "ds_rsub_u64">;
1245 def : AMDGPUMnemonicAlias<"ds_subrev_rtn_u64", "ds_rsub_rtn_u64">;
1248 //===----------------------------------------------------------------------===//
1250 //===----------------------------------------------------------------------===//
1252 multiclass DS_Real_gfx11<bits<8> op, string name = !tolower(NAME)> {
1253 defvar ps = !cast<DS_Pseudo>(NAME);
1254 let AssemblerPredicate = isGFX11Only in {
1255 let DecoderNamespace = "GFX11" in
1257 Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, ps, SIEncodingFamily.GFX11,
1259 if !ne(ps.Mnemonic, name) then
1260 def : AMDGPUMnemonicAlias<ps.Mnemonic, name>;
1261 } // End AssemblerPredicate
1264 multiclass DS_Real_gfx11_gfx12<bits<8> op, string name = !tolower(NAME)>
1265 : DS_Real_gfx11<op, name>, DS_Real_gfx12<op, name>;
1267 defm DS_WRITE_B32 : DS_Real_gfx11_gfx12<0x00d, "ds_store_b32">;
1268 defm DS_WRITE2_B32 : DS_Real_gfx11_gfx12<0x00e, "ds_store_2addr_b32">;
1269 defm DS_WRITE2ST64_B32 : DS_Real_gfx11_gfx12<0x00f, "ds_store_2addr_stride64_b32">;
1270 defm DS_WRITE_B8 : DS_Real_gfx11_gfx12<0x01e, "ds_store_b8">;
1271 defm DS_WRITE_B16 : DS_Real_gfx11_gfx12<0x01f, "ds_store_b16">;
1272 defm DS_WRXCHG_RTN_B32 : DS_Real_gfx11_gfx12<0x02d, "ds_storexchg_rtn_b32">;
1273 defm DS_WRXCHG2_RTN_B32 : DS_Real_gfx11_gfx12<0x02e, "ds_storexchg_2addr_rtn_b32">;
1274 defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx11_gfx12<0x02f, "ds_storexchg_2addr_stride64_rtn_b32">;
1275 defm DS_READ_B32 : DS_Real_gfx11_gfx12<0x036, "ds_load_b32">;
1276 defm DS_READ2_B32 : DS_Real_gfx11_gfx12<0x037, "ds_load_2addr_b32">;
1277 defm DS_READ2ST64_B32 : DS_Real_gfx11_gfx12<0x038, "ds_load_2addr_stride64_b32">;
1278 defm DS_READ_I8 : DS_Real_gfx11_gfx12<0x039, "ds_load_i8">;
1279 defm DS_READ_U8 : DS_Real_gfx11_gfx12<0x03a, "ds_load_u8">;
1280 defm DS_READ_I16 : DS_Real_gfx11_gfx12<0x03b, "ds_load_i16">;
1281 defm DS_READ_U16 : DS_Real_gfx11_gfx12<0x03c, "ds_load_u16">;
1282 defm DS_WRITE_B64 : DS_Real_gfx11_gfx12<0x04d, "ds_store_b64">;
1283 defm DS_WRITE2_B64 : DS_Real_gfx11_gfx12<0x04e, "ds_store_2addr_b64">;
1284 defm DS_WRITE2ST64_B64 : DS_Real_gfx11_gfx12<0x04f, "ds_store_2addr_stride64_b64">;
1285 defm DS_WRXCHG_RTN_B64 : DS_Real_gfx11_gfx12<0x06d, "ds_storexchg_rtn_b64">;
1286 defm DS_WRXCHG2_RTN_B64 : DS_Real_gfx11_gfx12<0x06e, "ds_storexchg_2addr_rtn_b64">;
1287 defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx11_gfx12<0x06f, "ds_storexchg_2addr_stride64_rtn_b64">;
1288 defm DS_READ_B64 : DS_Real_gfx11_gfx12<0x076, "ds_load_b64">;
1289 defm DS_READ2_B64 : DS_Real_gfx11_gfx12<0x077, "ds_load_2addr_b64">;
1290 defm DS_READ2ST64_B64 : DS_Real_gfx11_gfx12<0x078, "ds_load_2addr_stride64_b64">;
1291 defm DS_WRITE_B8_D16_HI : DS_Real_gfx11_gfx12<0x0a0, "ds_store_b8_d16_hi">;
1292 defm DS_WRITE_B16_D16_HI : DS_Real_gfx11_gfx12<0x0a1, "ds_store_b16_d16_hi">;
1293 defm DS_READ_U8_D16 : DS_Real_gfx11_gfx12<0x0a2, "ds_load_u8_d16">;
1294 defm DS_READ_U8_D16_HI : DS_Real_gfx11_gfx12<0x0a3, "ds_load_u8_d16_hi">;
1295 defm DS_READ_I8_D16 : DS_Real_gfx11_gfx12<0x0a4, "ds_load_i8_d16">;
1296 defm DS_READ_I8_D16_HI : DS_Real_gfx11_gfx12<0x0a5, "ds_load_i8_d16_hi">;
1297 defm DS_READ_U16_D16 : DS_Real_gfx11_gfx12<0x0a6, "ds_load_u16_d16">;
1298 defm DS_READ_U16_D16_HI : DS_Real_gfx11_gfx12<0x0a7, "ds_load_u16_d16_hi">;
1299 defm DS_WRITE_ADDTID_B32 : DS_Real_gfx11_gfx12<0x0b0, "ds_store_addtid_b32">;
1300 defm DS_READ_ADDTID_B32 : DS_Real_gfx11_gfx12<0x0b1, "ds_load_addtid_b32">;
1301 defm DS_WRITE_B96 : DS_Real_gfx11_gfx12<0x0de, "ds_store_b96">;
1302 defm DS_WRITE_B128 : DS_Real_gfx11_gfx12<0x0df, "ds_store_b128">;
1303 defm DS_READ_B96 : DS_Real_gfx11_gfx12<0x0fe, "ds_load_b96">;
1304 defm DS_READ_B128 : DS_Real_gfx11_gfx12<0x0ff, "ds_load_b128">;
1306 // DS_CMPST_* are renamed to DS_CMPSTORE_* in GFX11, but also the data operands (src and cmp) are swapped
1307 // comparing to pre-GFX11.
1308 // Note: the mnemonic alias is not generated to avoid a potential ambiguity due to the semantics change.
1310 defm DS_CMPSTORE_B32 : DS_Real_gfx11_gfx12<0x010>;
1311 defm DS_CMPSTORE_F32 : DS_Real_gfx11<0x011>;
1312 defm DS_CMPSTORE_RTN_B32 : DS_Real_gfx11_gfx12<0x030>;
1313 defm DS_CMPSTORE_RTN_F32 : DS_Real_gfx11<0x031>;
1314 defm DS_CMPSTORE_B64 : DS_Real_gfx11_gfx12<0x050>;
1315 defm DS_CMPSTORE_F64 : DS_Real_gfx11<0x051>;
1316 defm DS_CMPSTORE_RTN_B64 : DS_Real_gfx11_gfx12<0x070>;
1317 defm DS_CMPSTORE_RTN_F64 : DS_Real_gfx11<0x071>;
1319 defm DS_ADD_RTN_F32 : DS_Real_gfx11_gfx12<0x079>;
1320 defm DS_ADD_GS_REG_RTN : DS_Real_gfx11<0x07a>;
1321 defm DS_SUB_GS_REG_RTN : DS_Real_gfx11<0x07b>;
1322 defm DS_BVH_STACK_RTN_B32 : DS_Real_gfx11<0x0ad>;
1324 //===----------------------------------------------------------------------===//
1326 //===----------------------------------------------------------------------===//
1328 let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
1329 multiclass DS_Real_gfx10<bits<8> op> {
1330 def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1331 !cast<DS_Pseudo>(NAME), SIEncodingFamily.GFX10>;
1333 } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
1335 defm DS_ADD_RTN_F32 : DS_Real_gfx10<0x055>;
1336 defm DS_WRITE_B8_D16_HI : DS_Real_gfx10<0x0a0>;
1337 defm DS_WRITE_B16_D16_HI : DS_Real_gfx10<0x0a1>;
1338 defm DS_READ_U8_D16 : DS_Real_gfx10<0x0a2>;
1339 defm DS_READ_U8_D16_HI : DS_Real_gfx10<0x0a3>;
1340 defm DS_READ_I8_D16 : DS_Real_gfx10<0x0a4>;
1341 defm DS_READ_I8_D16_HI : DS_Real_gfx10<0x0a5>;
1342 defm DS_READ_U16_D16 : DS_Real_gfx10<0x0a6>;
1343 defm DS_READ_U16_D16_HI : DS_Real_gfx10<0x0a7>;
1344 defm DS_WRITE_ADDTID_B32 : DS_Real_gfx10<0x0b0>;
1345 defm DS_READ_ADDTID_B32 : DS_Real_gfx10<0x0b1>;
1347 //===----------------------------------------------------------------------===//
1348 // GFX10, GFX11, GFX12.
1349 //===----------------------------------------------------------------------===//
1351 multiclass DS_Real_gfx10_gfx11_gfx12<bits<8> op> :
1352 DS_Real_gfx10<op>, DS_Real_gfx11<op>, DS_Real_gfx12<op>;
1354 multiclass DS_Real_gfx10_gfx11<bits<8> op> :
1355 DS_Real_gfx10<op>, DS_Real_gfx11<op>;
1357 defm DS_ADD_F32 : DS_Real_gfx10_gfx11_gfx12<0x015>;
1358 defm DS_ADD_SRC2_F32 : DS_Real_gfx10<0x095>;
1359 defm DS_PERMUTE_B32 : DS_Real_gfx10_gfx11_gfx12<0x0b2>;
1360 defm DS_BPERMUTE_B32 : DS_Real_gfx10_gfx11_gfx12<0x0b3>;
1362 //===----------------------------------------------------------------------===//
1363 // GFX7, GFX10, GFX11, GFX12.
1364 //===----------------------------------------------------------------------===//
1366 let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
1367 multiclass DS_Real_gfx7<bits<8> op> {
1368 def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1369 !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
1371 } // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
1373 multiclass DS_Real_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
1374 DS_Real_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
1376 multiclass DS_Real_gfx7_gfx10_gfx11<bits<8> op> :
1377 DS_Real_gfx7<op>, DS_Real_gfx10_gfx11<op>;
1379 multiclass DS_Real_gfx7_gfx10<bits<8> op> :
1380 DS_Real_gfx7<op>, DS_Real_gfx10<op>;
1382 // FIXME-GFX7: Add tests when upstreaming this part.
1383 defm DS_GWS_SEMA_RELEASE_ALL : DS_Real_gfx7_gfx10_gfx11<0x018>;
1384 defm DS_WRAP_RTN_B32 : DS_Real_gfx7_gfx10_gfx11<0x034>;
1385 defm DS_CONDXCHG32_RTN_B64 : DS_Real_gfx7_gfx10_gfx11_gfx12<0x07e>;
1386 defm DS_WRITE_B96 : DS_Real_gfx7_gfx10<0x0de>;
1387 defm DS_WRITE_B128 : DS_Real_gfx7_gfx10<0x0df>;
1388 defm DS_READ_B96 : DS_Real_gfx7_gfx10<0x0fe>;
1389 defm DS_READ_B128 : DS_Real_gfx7_gfx10<0x0ff>;
1391 //===----------------------------------------------------------------------===//
1392 // GFX6, GFX7, GFX10, GFX11.
1393 //===----------------------------------------------------------------------===//
1395 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
1396 multiclass DS_Real_gfx6_gfx7<bits<8> op> {
1397 def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1398 !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
1400 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
1402 multiclass DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
1403 DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
1405 multiclass DS_Real_gfx6_gfx7_gfx10_gfx11<bits<8> op> :
1406 DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11<op>;
1408 multiclass DS_Real_gfx6_gfx7_gfx10<bits<8> op> :
1409 DS_Real_gfx6_gfx7<op>, DS_Real_gfx10<op>;
1411 defm DS_ADD_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x000>;
1412 defm DS_SUB_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x001>;
1413 defm DS_RSUB_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>;
1414 defm DS_INC_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x003>;
1415 defm DS_DEC_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x004>;
1416 defm DS_MIN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x005>;
1417 defm DS_MAX_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x006>;
1418 defm DS_MIN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x007>;
1419 defm DS_MAX_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x008>;
1420 defm DS_AND_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x009>;
1421 defm DS_OR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00a>;
1422 defm DS_XOR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00b>;
1423 defm DS_MSKOR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00c>;
1425 defm DS_WRITE_B32 : DS_Real_gfx6_gfx7_gfx10<0x00d>;
1426 defm DS_WRITE2_B32 : DS_Real_gfx6_gfx7_gfx10<0x00e>;
1427 defm DS_WRITE2ST64_B32 : DS_Real_gfx6_gfx7_gfx10<0x00f>;
1428 defm DS_CMPST_B32 : DS_Real_gfx6_gfx7_gfx10<0x010>;
1429 defm DS_CMPST_F32 : DS_Real_gfx6_gfx7_gfx10<0x011>;
1431 defm DS_MIN_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x012>;
1432 defm DS_MAX_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x013>;
1433 defm DS_NOP : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x014>;
1434 defm DS_GWS_INIT : DS_Real_gfx6_gfx7_gfx10_gfx11<0x019>;
1435 defm DS_GWS_SEMA_V : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01a>;
1436 defm DS_GWS_SEMA_BR : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01b>;
1437 defm DS_GWS_SEMA_P : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01c>;
1438 defm DS_GWS_BARRIER : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01d>;
1440 defm DS_WRITE_B8 : DS_Real_gfx6_gfx7_gfx10<0x01e>;
1441 defm DS_WRITE_B16 : DS_Real_gfx6_gfx7_gfx10<0x01f>;
1443 defm DS_ADD_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x020>;
1444 defm DS_SUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x021>;
1445 defm DS_RSUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x022>;
1446 defm DS_INC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x023>;
1447 defm DS_DEC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x024>;
1448 defm DS_MIN_RTN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x025>;
1449 defm DS_MAX_RTN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x026>;
1450 defm DS_MIN_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x027>;
1451 defm DS_MAX_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x028>;
1452 defm DS_AND_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x029>;
1453 defm DS_OR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02a>;
1454 defm DS_XOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02b>;
1455 defm DS_MSKOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02c>;
1457 defm DS_WRXCHG_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02d>;
1458 defm DS_WRXCHG2_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02e>;
1459 defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02f>;
1460 defm DS_CMPST_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x030>;
1461 defm DS_CMPST_RTN_F32 : DS_Real_gfx6_gfx7_gfx10<0x031>;
1463 defm DS_MIN_RTN_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x032>;
1464 defm DS_MAX_RTN_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x033>;
1465 defm DS_SWIZZLE_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x035>;
1467 defm DS_READ_B32 : DS_Real_gfx6_gfx7_gfx10<0x036>;
1468 defm DS_READ2_B32 : DS_Real_gfx6_gfx7_gfx10<0x037>;
1469 defm DS_READ2ST64_B32 : DS_Real_gfx6_gfx7_gfx10<0x038>;
1470 defm DS_READ_I8 : DS_Real_gfx6_gfx7_gfx10<0x039>;
1471 defm DS_READ_U8 : DS_Real_gfx6_gfx7_gfx10<0x03a>;
1472 defm DS_READ_I16 : DS_Real_gfx6_gfx7_gfx10<0x03b>;
1473 defm DS_READ_U16 : DS_Real_gfx6_gfx7_gfx10<0x03c>;
1475 defm DS_CONSUME : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03d>;
1476 defm DS_APPEND : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03e>;
1477 defm DS_ORDERED_COUNT : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03f>;
1478 defm DS_ADD_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x040>;
1479 defm DS_SUB_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x041>;
1480 defm DS_RSUB_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x042>;
1481 defm DS_INC_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x043>;
1482 defm DS_DEC_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x044>;
1483 defm DS_MIN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x045>;
1484 defm DS_MAX_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x046>;
1485 defm DS_MIN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x047>;
1486 defm DS_MAX_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x048>;
1487 defm DS_AND_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x049>;
1488 defm DS_OR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04a>;
1489 defm DS_XOR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04b>;
1490 defm DS_MSKOR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04c>;
1492 defm DS_WRITE_B64 : DS_Real_gfx6_gfx7_gfx10<0x04d>;
1493 defm DS_WRITE2_B64 : DS_Real_gfx6_gfx7_gfx10<0x04e>;
1494 defm DS_WRITE2ST64_B64 : DS_Real_gfx6_gfx7_gfx10<0x04f>;
1495 defm DS_CMPST_B64 : DS_Real_gfx6_gfx7_gfx10<0x050>;
1496 defm DS_CMPST_F64 : DS_Real_gfx6_gfx7_gfx10<0x051>;
1498 defm DS_MIN_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x052>;
1499 defm DS_MAX_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x053>;
1500 defm DS_ADD_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x060>;
1501 defm DS_SUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x061>;
1502 defm DS_RSUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x062>;
1503 defm DS_INC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x063>;
1504 defm DS_DEC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x064>;
1505 defm DS_MIN_RTN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x065>;
1506 defm DS_MAX_RTN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x066>;
1507 defm DS_MIN_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x067>;
1508 defm DS_MAX_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x068>;
1509 defm DS_AND_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x069>;
1510 defm DS_OR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06a>;
1511 defm DS_XOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06b>;
1512 defm DS_MSKOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06c>;
1514 defm DS_WRXCHG_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06d>;
1515 defm DS_WRXCHG2_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06e>;
1516 defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06f>;
1517 defm DS_CMPST_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x070>;
1518 defm DS_CMPST_RTN_F64 : DS_Real_gfx6_gfx7_gfx10<0x071>;
1520 defm DS_MIN_RTN_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x072>;
1521 defm DS_MAX_RTN_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x073>;
1523 defm DS_READ_B64 : DS_Real_gfx6_gfx7_gfx10<0x076>;
1524 defm DS_READ2_B64 : DS_Real_gfx6_gfx7_gfx10<0x077>;
1525 defm DS_READ2ST64_B64 : DS_Real_gfx6_gfx7_gfx10<0x078>;
1526 defm DS_ADD_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x080>;
1527 defm DS_SUB_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x081>;
1528 defm DS_RSUB_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x082>;
1529 defm DS_INC_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x083>;
1530 defm DS_DEC_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x084>;
1531 defm DS_MIN_SRC2_I32 : DS_Real_gfx6_gfx7_gfx10<0x085>;
1532 defm DS_MAX_SRC2_I32 : DS_Real_gfx6_gfx7_gfx10<0x086>;
1533 defm DS_MIN_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x087>;
1534 defm DS_MAX_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x088>;
1535 defm DS_AND_SRC2_B32 : DS_Real_gfx6_gfx7_gfx10<0x089>;
1536 defm DS_OR_SRC2_B32 : DS_Real_gfx6_gfx7_gfx10<0x08a>;
1537 defm DS_XOR_SRC2_B32 : DS_Real_gfx6_gfx7_gfx10<0x08b>;
1538 defm DS_WRITE_SRC2_B32 : DS_Real_gfx6_gfx7_gfx10<0x08d>;
1539 defm DS_MIN_SRC2_F32 : DS_Real_gfx6_gfx7_gfx10<0x092>;
1540 defm DS_MAX_SRC2_F32 : DS_Real_gfx6_gfx7_gfx10<0x093>;
1541 defm DS_ADD_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c0>;
1542 defm DS_SUB_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c1>;
1543 defm DS_RSUB_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c2>;
1544 defm DS_INC_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c3>;
1545 defm DS_DEC_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c4>;
1546 defm DS_MIN_SRC2_I64 : DS_Real_gfx6_gfx7_gfx10<0x0c5>;
1547 defm DS_MAX_SRC2_I64 : DS_Real_gfx6_gfx7_gfx10<0x0c6>;
1548 defm DS_MIN_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c7>;
1549 defm DS_MAX_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c8>;
1550 defm DS_AND_SRC2_B64 : DS_Real_gfx6_gfx7_gfx10<0x0c9>;
1551 defm DS_OR_SRC2_B64 : DS_Real_gfx6_gfx7_gfx10<0x0ca>;
1552 defm DS_XOR_SRC2_B64 : DS_Real_gfx6_gfx7_gfx10<0x0cb>;
1553 defm DS_WRITE_SRC2_B64 : DS_Real_gfx6_gfx7_gfx10<0x0cd>;
1554 defm DS_MIN_SRC2_F64 : DS_Real_gfx6_gfx7_gfx10<0x0d2>;
1555 defm DS_MAX_SRC2_F64 : DS_Real_gfx6_gfx7_gfx10<0x0d3>;
1557 //===----------------------------------------------------------------------===//
1559 //===----------------------------------------------------------------------===//
1561 class DS_Real_vi <bits<8> op, DS_Pseudo ps> :
1563 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1564 let AssemblerPredicate = isGFX8GFX9;
1565 let DecoderNamespace = "GFX8";
1568 let Inst{7-0} = !if(ps.has_offset0, offset0, 0);
1569 let Inst{15-8} = !if(ps.has_offset1, offset1, 0);
1570 let Inst{16} = !if(ps.has_gds, gds, ps.gdsValue);
1571 let Inst{24-17} = op;
1573 let Inst{31-26} = 0x36; // ds prefix
1574 let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
1575 let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
1576 let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
1577 let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
1580 def DS_ADD_U32_vi : DS_Real_vi<0x0, DS_ADD_U32>;
1581 def DS_SUB_U32_vi : DS_Real_vi<0x1, DS_SUB_U32>;
1582 def DS_RSUB_U32_vi : DS_Real_vi<0x2, DS_RSUB_U32>;
1583 def DS_INC_U32_vi : DS_Real_vi<0x3, DS_INC_U32>;
1584 def DS_DEC_U32_vi : DS_Real_vi<0x4, DS_DEC_U32>;
1585 def DS_MIN_I32_vi : DS_Real_vi<0x5, DS_MIN_I32>;
1586 def DS_MAX_I32_vi : DS_Real_vi<0x6, DS_MAX_I32>;
1587 def DS_MIN_U32_vi : DS_Real_vi<0x7, DS_MIN_U32>;
1588 def DS_MAX_U32_vi : DS_Real_vi<0x8, DS_MAX_U32>;
1589 def DS_AND_B32_vi : DS_Real_vi<0x9, DS_AND_B32>;
1590 def DS_OR_B32_vi : DS_Real_vi<0xa, DS_OR_B32>;
1591 def DS_XOR_B32_vi : DS_Real_vi<0xb, DS_XOR_B32>;
1592 def DS_MSKOR_B32_vi : DS_Real_vi<0xc, DS_MSKOR_B32>;
1593 def DS_WRITE_B32_vi : DS_Real_vi<0xd, DS_WRITE_B32>;
1594 def DS_WRITE2_B32_vi : DS_Real_vi<0xe, DS_WRITE2_B32>;
1595 def DS_WRITE2ST64_B32_vi : DS_Real_vi<0xf, DS_WRITE2ST64_B32>;
1596 def DS_CMPST_B32_vi : DS_Real_vi<0x10, DS_CMPST_B32>;
1597 def DS_CMPST_F32_vi : DS_Real_vi<0x11, DS_CMPST_F32>;
1598 def DS_MIN_F32_vi : DS_Real_vi<0x12, DS_MIN_F32>;
1599 def DS_MAX_F32_vi : DS_Real_vi<0x13, DS_MAX_F32>;
1600 def DS_NOP_vi : DS_Real_vi<0x14, DS_NOP>;
1601 def DS_ADD_F32_vi : DS_Real_vi<0x15, DS_ADD_F32>;
1602 def DS_GWS_INIT_vi : DS_Real_vi<0x99, DS_GWS_INIT>;
1603 def DS_GWS_SEMA_V_vi : DS_Real_vi<0x9a, DS_GWS_SEMA_V>;
1604 def DS_GWS_SEMA_BR_vi : DS_Real_vi<0x9b, DS_GWS_SEMA_BR>;
1605 def DS_GWS_SEMA_P_vi : DS_Real_vi<0x9c, DS_GWS_SEMA_P>;
1606 def DS_GWS_BARRIER_vi : DS_Real_vi<0x9d, DS_GWS_BARRIER>;
1607 def DS_WRITE_ADDTID_B32_vi : DS_Real_vi<0x1d, DS_WRITE_ADDTID_B32>;
1608 def DS_WRITE_B8_vi : DS_Real_vi<0x1e, DS_WRITE_B8>;
1609 def DS_WRITE_B16_vi : DS_Real_vi<0x1f, DS_WRITE_B16>;
1610 def DS_ADD_RTN_U32_vi : DS_Real_vi<0x20, DS_ADD_RTN_U32>;
1611 def DS_SUB_RTN_U32_vi : DS_Real_vi<0x21, DS_SUB_RTN_U32>;
1612 def DS_RSUB_RTN_U32_vi : DS_Real_vi<0x22, DS_RSUB_RTN_U32>;
1613 def DS_INC_RTN_U32_vi : DS_Real_vi<0x23, DS_INC_RTN_U32>;
1614 def DS_DEC_RTN_U32_vi : DS_Real_vi<0x24, DS_DEC_RTN_U32>;
1615 def DS_MIN_RTN_I32_vi : DS_Real_vi<0x25, DS_MIN_RTN_I32>;
1616 def DS_MAX_RTN_I32_vi : DS_Real_vi<0x26, DS_MAX_RTN_I32>;
1617 def DS_MIN_RTN_U32_vi : DS_Real_vi<0x27, DS_MIN_RTN_U32>;
1618 def DS_MAX_RTN_U32_vi : DS_Real_vi<0x28, DS_MAX_RTN_U32>;
1619 def DS_AND_RTN_B32_vi : DS_Real_vi<0x29, DS_AND_RTN_B32>;
1620 def DS_OR_RTN_B32_vi : DS_Real_vi<0x2a, DS_OR_RTN_B32>;
1621 def DS_XOR_RTN_B32_vi : DS_Real_vi<0x2b, DS_XOR_RTN_B32>;
1622 def DS_MSKOR_RTN_B32_vi : DS_Real_vi<0x2c, DS_MSKOR_RTN_B32>;
1623 def DS_WRXCHG_RTN_B32_vi : DS_Real_vi<0x2d, DS_WRXCHG_RTN_B32>;
1624 def DS_WRXCHG2_RTN_B32_vi : DS_Real_vi<0x2e, DS_WRXCHG2_RTN_B32>;
1625 def DS_WRXCHG2ST64_RTN_B32_vi : DS_Real_vi<0x2f, DS_WRXCHG2ST64_RTN_B32>;
1626 def DS_CMPST_RTN_B32_vi : DS_Real_vi<0x30, DS_CMPST_RTN_B32>;
1627 def DS_CMPST_RTN_F32_vi : DS_Real_vi<0x31, DS_CMPST_RTN_F32>;
1628 def DS_MIN_RTN_F32_vi : DS_Real_vi<0x32, DS_MIN_RTN_F32>;
1629 def DS_MAX_RTN_F32_vi : DS_Real_vi<0x33, DS_MAX_RTN_F32>;
1630 def DS_WRAP_RTN_B32_vi : DS_Real_vi<0x34, DS_WRAP_RTN_B32>;
1631 def DS_ADD_RTN_F32_vi : DS_Real_vi<0x35, DS_ADD_RTN_F32>;
1632 def DS_READ_B32_vi : DS_Real_vi<0x36, DS_READ_B32>;
1633 def DS_READ2_B32_vi : DS_Real_vi<0x37, DS_READ2_B32>;
1634 def DS_READ2ST64_B32_vi : DS_Real_vi<0x38, DS_READ2ST64_B32>;
1635 def DS_READ_I8_vi : DS_Real_vi<0x39, DS_READ_I8>;
1636 def DS_READ_U8_vi : DS_Real_vi<0x3a, DS_READ_U8>;
1637 def DS_READ_I16_vi : DS_Real_vi<0x3b, DS_READ_I16>;
1638 def DS_READ_U16_vi : DS_Real_vi<0x3c, DS_READ_U16>;
1639 def DS_READ_ADDTID_B32_vi : DS_Real_vi<0xb6, DS_READ_ADDTID_B32>;
1640 def DS_CONSUME_vi : DS_Real_vi<0xbd, DS_CONSUME>;
1641 def DS_APPEND_vi : DS_Real_vi<0xbe, DS_APPEND>;
1642 def DS_ORDERED_COUNT_vi : DS_Real_vi<0xbf, DS_ORDERED_COUNT>;
1643 def DS_SWIZZLE_B32_vi : DS_Real_vi<0x3d, DS_SWIZZLE_B32>;
1644 def DS_PERMUTE_B32_vi : DS_Real_vi<0x3e, DS_PERMUTE_B32>;
1645 def DS_BPERMUTE_B32_vi : DS_Real_vi<0x3f, DS_BPERMUTE_B32>;
1647 def DS_ADD_U64_vi : DS_Real_vi<0x40, DS_ADD_U64>;
1648 def DS_SUB_U64_vi : DS_Real_vi<0x41, DS_SUB_U64>;
1649 def DS_RSUB_U64_vi : DS_Real_vi<0x42, DS_RSUB_U64>;
1650 def DS_INC_U64_vi : DS_Real_vi<0x43, DS_INC_U64>;
1651 def DS_DEC_U64_vi : DS_Real_vi<0x44, DS_DEC_U64>;
1652 def DS_MIN_I64_vi : DS_Real_vi<0x45, DS_MIN_I64>;
1653 def DS_MAX_I64_vi : DS_Real_vi<0x46, DS_MAX_I64>;
1654 def DS_MIN_U64_vi : DS_Real_vi<0x47, DS_MIN_U64>;
1655 def DS_MAX_U64_vi : DS_Real_vi<0x48, DS_MAX_U64>;
1656 def DS_AND_B64_vi : DS_Real_vi<0x49, DS_AND_B64>;
1657 def DS_OR_B64_vi : DS_Real_vi<0x4a, DS_OR_B64>;
1658 def DS_XOR_B64_vi : DS_Real_vi<0x4b, DS_XOR_B64>;
1659 def DS_MSKOR_B64_vi : DS_Real_vi<0x4c, DS_MSKOR_B64>;
1660 def DS_WRITE_B64_vi : DS_Real_vi<0x4d, DS_WRITE_B64>;
1661 def DS_WRITE2_B64_vi : DS_Real_vi<0x4E, DS_WRITE2_B64>;
1662 def DS_WRITE2ST64_B64_vi : DS_Real_vi<0x4f, DS_WRITE2ST64_B64>;
1663 def DS_CMPST_B64_vi : DS_Real_vi<0x50, DS_CMPST_B64>;
1664 def DS_CMPST_F64_vi : DS_Real_vi<0x51, DS_CMPST_F64>;
1665 def DS_MIN_F64_vi : DS_Real_vi<0x52, DS_MIN_F64>;
1666 def DS_MAX_F64_vi : DS_Real_vi<0x53, DS_MAX_F64>;
1668 def DS_WRITE_B8_D16_HI_vi : DS_Real_vi<0x54, DS_WRITE_B8_D16_HI>;
1669 def DS_WRITE_B16_D16_HI_vi : DS_Real_vi<0x55, DS_WRITE_B16_D16_HI>;
1671 def DS_READ_U8_D16_vi : DS_Real_vi<0x56, DS_READ_U8_D16>;
1672 def DS_READ_U8_D16_HI_vi : DS_Real_vi<0x57, DS_READ_U8_D16_HI>;
1673 def DS_READ_I8_D16_vi : DS_Real_vi<0x58, DS_READ_I8_D16>;
1674 def DS_READ_I8_D16_HI_vi : DS_Real_vi<0x59, DS_READ_I8_D16_HI>;
1675 def DS_READ_U16_D16_vi : DS_Real_vi<0x5a, DS_READ_U16_D16>;
1676 def DS_READ_U16_D16_HI_vi : DS_Real_vi<0x5b, DS_READ_U16_D16_HI>;
1678 def DS_ADD_RTN_U64_vi : DS_Real_vi<0x60, DS_ADD_RTN_U64>;
1679 def DS_SUB_RTN_U64_vi : DS_Real_vi<0x61, DS_SUB_RTN_U64>;
1680 def DS_RSUB_RTN_U64_vi : DS_Real_vi<0x62, DS_RSUB_RTN_U64>;
1681 def DS_INC_RTN_U64_vi : DS_Real_vi<0x63, DS_INC_RTN_U64>;
1682 def DS_DEC_RTN_U64_vi : DS_Real_vi<0x64, DS_DEC_RTN_U64>;
1683 def DS_MIN_RTN_I64_vi : DS_Real_vi<0x65, DS_MIN_RTN_I64>;
1684 def DS_MAX_RTN_I64_vi : DS_Real_vi<0x66, DS_MAX_RTN_I64>;
1685 def DS_MIN_RTN_U64_vi : DS_Real_vi<0x67, DS_MIN_RTN_U64>;
1686 def DS_MAX_RTN_U64_vi : DS_Real_vi<0x68, DS_MAX_RTN_U64>;
1687 def DS_AND_RTN_B64_vi : DS_Real_vi<0x69, DS_AND_RTN_B64>;
1688 def DS_OR_RTN_B64_vi : DS_Real_vi<0x6a, DS_OR_RTN_B64>;
1689 def DS_XOR_RTN_B64_vi : DS_Real_vi<0x6b, DS_XOR_RTN_B64>;
1690 def DS_MSKOR_RTN_B64_vi : DS_Real_vi<0x6c, DS_MSKOR_RTN_B64>;
1691 def DS_WRXCHG_RTN_B64_vi : DS_Real_vi<0x6d, DS_WRXCHG_RTN_B64>;
1692 def DS_WRXCHG2_RTN_B64_vi : DS_Real_vi<0x6e, DS_WRXCHG2_RTN_B64>;
1693 def DS_WRXCHG2ST64_RTN_B64_vi : DS_Real_vi<0x6f, DS_WRXCHG2ST64_RTN_B64>;
1694 def DS_CONDXCHG32_RTN_B64_vi : DS_Real_vi<0x7e, DS_CONDXCHG32_RTN_B64>;
1695 def DS_GWS_SEMA_RELEASE_ALL_vi : DS_Real_vi<0x98, DS_GWS_SEMA_RELEASE_ALL>;
1696 def DS_CMPST_RTN_B64_vi : DS_Real_vi<0x70, DS_CMPST_RTN_B64>;
1697 def DS_CMPST_RTN_F64_vi : DS_Real_vi<0x71, DS_CMPST_RTN_F64>;
1698 def DS_MIN_RTN_F64_vi : DS_Real_vi<0x72, DS_MIN_RTN_F64>;
1699 def DS_MAX_RTN_F64_vi : DS_Real_vi<0x73, DS_MAX_RTN_F64>;
1701 def DS_READ_B64_vi : DS_Real_vi<0x76, DS_READ_B64>;
1702 def DS_READ2_B64_vi : DS_Real_vi<0x77, DS_READ2_B64>;
1703 def DS_READ2ST64_B64_vi : DS_Real_vi<0x78, DS_READ2ST64_B64>;
1705 def DS_ADD_SRC2_U32_vi : DS_Real_vi<0x80, DS_ADD_SRC2_U32>;
1706 def DS_SUB_SRC2_U32_vi : DS_Real_vi<0x81, DS_SUB_SRC2_U32>;
1707 def DS_RSUB_SRC2_U32_vi : DS_Real_vi<0x82, DS_RSUB_SRC2_U32>;
1708 def DS_INC_SRC2_U32_vi : DS_Real_vi<0x83, DS_INC_SRC2_U32>;
1709 def DS_DEC_SRC2_U32_vi : DS_Real_vi<0x84, DS_DEC_SRC2_U32>;
1710 def DS_MIN_SRC2_I32_vi : DS_Real_vi<0x85, DS_MIN_SRC2_I32>;
1711 def DS_MAX_SRC2_I32_vi : DS_Real_vi<0x86, DS_MAX_SRC2_I32>;
1712 def DS_MIN_SRC2_U32_vi : DS_Real_vi<0x87, DS_MIN_SRC2_U32>;
1713 def DS_MAX_SRC2_U32_vi : DS_Real_vi<0x88, DS_MAX_SRC2_U32>;
1714 def DS_AND_SRC2_B32_vi : DS_Real_vi<0x89, DS_AND_SRC2_B32>;
1715 def DS_OR_SRC2_B32_vi : DS_Real_vi<0x8a, DS_OR_SRC2_B32>;
1716 def DS_XOR_SRC2_B32_vi : DS_Real_vi<0x8b, DS_XOR_SRC2_B32>;
1717 def DS_WRITE_SRC2_B32_vi : DS_Real_vi<0x8d, DS_WRITE_SRC2_B32>;
1718 def DS_MIN_SRC2_F32_vi : DS_Real_vi<0x92, DS_MIN_SRC2_F32>;
1719 def DS_MAX_SRC2_F32_vi : DS_Real_vi<0x93, DS_MAX_SRC2_F32>;
1720 def DS_ADD_SRC2_F32_vi : DS_Real_vi<0x95, DS_ADD_SRC2_F32>;
1721 def DS_ADD_SRC2_U64_vi : DS_Real_vi<0xc0, DS_ADD_SRC2_U64>;
1722 def DS_SUB_SRC2_U64_vi : DS_Real_vi<0xc1, DS_SUB_SRC2_U64>;
1723 def DS_RSUB_SRC2_U64_vi : DS_Real_vi<0xc2, DS_RSUB_SRC2_U64>;
1724 def DS_INC_SRC2_U64_vi : DS_Real_vi<0xc3, DS_INC_SRC2_U64>;
1725 def DS_DEC_SRC2_U64_vi : DS_Real_vi<0xc4, DS_DEC_SRC2_U64>;
1726 def DS_MIN_SRC2_I64_vi : DS_Real_vi<0xc5, DS_MIN_SRC2_I64>;
1727 def DS_MAX_SRC2_I64_vi : DS_Real_vi<0xc6, DS_MAX_SRC2_I64>;
1728 def DS_MIN_SRC2_U64_vi : DS_Real_vi<0xc7, DS_MIN_SRC2_U64>;
1729 def DS_MAX_SRC2_U64_vi : DS_Real_vi<0xc8, DS_MAX_SRC2_U64>;
1730 def DS_AND_SRC2_B64_vi : DS_Real_vi<0xc9, DS_AND_SRC2_B64>;
1731 def DS_OR_SRC2_B64_vi : DS_Real_vi<0xca, DS_OR_SRC2_B64>;
1732 def DS_XOR_SRC2_B64_vi : DS_Real_vi<0xcb, DS_XOR_SRC2_B64>;
1733 def DS_WRITE_SRC2_B64_vi : DS_Real_vi<0xcd, DS_WRITE_SRC2_B64>;
1734 def DS_MIN_SRC2_F64_vi : DS_Real_vi<0xd2, DS_MIN_SRC2_F64>;
1735 def DS_MAX_SRC2_F64_vi : DS_Real_vi<0xd3, DS_MAX_SRC2_F64>;
1736 def DS_WRITE_B96_vi : DS_Real_vi<0xde, DS_WRITE_B96>;
1737 def DS_WRITE_B128_vi : DS_Real_vi<0xdf, DS_WRITE_B128>;
1738 def DS_READ_B96_vi : DS_Real_vi<0xfe, DS_READ_B96>;
1739 def DS_READ_B128_vi : DS_Real_vi<0xff, DS_READ_B128>;
1742 def DS_ADD_F64_vi : DS_Real_vi<0x5c, DS_ADD_F64>;
1743 def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>;
1746 def DS_PK_ADD_F16_vi : DS_Real_vi<0x17, DS_PK_ADD_F16>;
1747 def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>;
1748 def DS_PK_ADD_BF16_vi : DS_Real_vi<0x18, DS_PK_ADD_BF16>;
1749 def DS_PK_ADD_RTN_BF16_vi : DS_Real_vi<0xb8, DS_PK_ADD_RTN_BF16>;