Revert " [LoongArch][ISel] Check the number of sign bits in `PatGprGpr_32` (#107432)"
[llvm-project.git] / llvm / lib / Target / AMDGPU / DSInstructions.td
blob219246b71fe80b96d3ddde32a7ff796008471442
1 //===-- DSInstructions.td - DS Instruction Definitions --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
10   InstSI <outs, ins, "", pattern>,
11   SIMCInstr <opName, SIEncodingFamily.NONE> {
13   let LGKM_CNT = 1;
14   let DS = 1;
15   let GWS = 0;
16   let Size = 8;
17   let UseNamedOperandTable = 1;
19   // Most instruction load and store data, so set this as the default.
20   let mayLoad = 1;
21   let mayStore = 1;
23   let hasSideEffects = 0;
24   let SchedRW = [WriteLDS];
26   let isPseudo = 1;
27   let isCodeGenOnly = 1;
29   string Mnemonic = opName;
30   string AsmOperands = asmOps;
32   // Well these bits a kind of hack because it would be more natural
33   // to test "outs" and "ins" dags for the presence of particular operands
34   bits<1> has_vdst = 1;
35   bits<1> has_addr = 1;
36   bits<1> has_data0 = 1;
37   bits<1> has_data1 = 1;
39   bits<1> has_gws_data0 = 0; // data0 is encoded as addr
41   bits<1> has_offset  = 1; // has "offset" that should be split to offset0,1
42   bits<1> has_offset0 = 1;
43   bits<1> has_offset1 = 1;
45   bits<1> has_gds = 1;
46   bits<1> gdsValue = 0; // if has_gds == 0 set gds to this value
48   bits<1> has_m0_read = 1;
50   let Uses = !if(has_m0_read, [M0, EXEC], [EXEC]);
53 class DS_Real <DS_Pseudo ps, string opName = ps.Mnemonic> :
54   InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands>,
55   Enc64 {
57   let isPseudo = 0;
58   let isCodeGenOnly = 0;
59   let LGKM_CNT = 1;
60   let DS = 1;
61   let UseNamedOperandTable = 1;
63   // copy relevant pseudo op flags
64   let GWS                = ps.GWS;
65   let SubtargetPredicate = ps.SubtargetPredicate;
66   let WaveSizePredicate  = ps.WaveSizePredicate;
67   let OtherPredicates    = ps.OtherPredicates;
68   let TSFlags            = ps.TSFlags;
69   let SchedRW            = ps.SchedRW;
70   let mayLoad            = ps.mayLoad;
71   let mayStore           = ps.mayStore;
72   let IsAtomicRet        = ps.IsAtomicRet;
73   let IsAtomicNoRet      = ps.IsAtomicNoRet;
74   let Uses               = ps.Uses;
75   let Defs               = ps.Defs;
77   let Constraints = ps.Constraints;
78   let DisableEncoding = ps.DisableEncoding;
80   // encoding fields
81   bits<10> vdst;
82   bits<1> gds;
83   bits<8> addr;
84   bits<10> data0;
85   bits<10> data1;
86   bits<8> offset0;
87   bits<8> offset1;
89   bits<16> offset;
90   let offset0 = !if(ps.has_offset, offset{7-0}, ?);
91   let offset1 = !if(ps.has_offset, offset{15-8}, ?);
93   bits<1> acc = !if(ps.has_vdst, vdst{9},
94                     !if(!or(ps.has_data0, ps.has_gws_data0), data0{9}, 0));
97 // DS Pseudo instructions
99 class DS_0A1D_NORET<string opName, RegisterClass rc = VGPR_32>
100 : DS_Pseudo<opName,
101   (outs),
102   (ins getLdStRegisterOperand<rc>.ret:$data0, Offset:$offset, gds:$gds),
103   " $data0$offset$gds"> {
105   let has_addr = 0;
106   let has_data1 = 0;
107   let has_vdst = 0;
110 class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
111 : DS_Pseudo<opName,
112   (outs),
113   (ins VGPR_32:$addr, getLdStRegisterOperand<rc>.ret:$data0, Offset:$offset, gds:$gds),
114   " $addr, $data0$offset$gds"> {
116   let has_data1 = 0;
117   let has_vdst = 0;
118   let IsAtomicNoRet = 1;
121 multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
122   def "" : DS_1A1D_NORET<opName, rc>;
124   let has_m0_read = 0 in {
125     def _gfx9 : DS_1A1D_NORET<opName, rc>;
126   }
129 multiclass DS_1A1D_NORET_mc_gfx9<string opName, RegisterClass rc = VGPR_32> {
130   let has_m0_read = 0 in {
131     def "" : DS_1A1D_NORET<opName, rc>;
132   }
135 class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32,
136                     RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
137 : DS_Pseudo<opName,
138   (outs),
139   (ins VGPR_32:$addr, data_op:$data0, data_op:$data1, Offset:$offset, gds:$gds),
140   " $addr, $data0, $data1$offset$gds"> {
142   let has_vdst = 0;
143   let IsAtomicNoRet = 1;
146 multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
147   def "" : DS_1A2D_NORET<opName, rc>;
149   let has_m0_read = 0 in {
150     def _gfx9 : DS_1A2D_NORET<opName, rc>;
151   }
154 class DS_1A2D_Off8_NORET <string opName, RegisterClass rc = VGPR_32,
155                           RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
156 : DS_Pseudo<opName,
157   (outs),
158   (ins VGPR_32:$addr, data_op:$data0, data_op:$data1,
159        Offset0:$offset0, Offset1:$offset1, gds:$gds),
160   " $addr, $data0, $data1$offset0$offset1$gds"> {
162   let has_vdst = 0;
163   let has_offset = 0;
166 multiclass DS_1A2D_Off8_NORET_mc <string opName, RegisterClass rc = VGPR_32> {
167   def "" : DS_1A2D_Off8_NORET<opName, rc>;
169   let has_m0_read = 0 in {
170     def _gfx9 : DS_1A2D_Off8_NORET<opName, rc>;
171   }
174 class DS_0A1D_RET_GDS<string opName, RegisterClass rc = VGPR_32, RegisterClass src = rc,
175                   RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
176                   RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
177 : DS_Pseudo<opName,
178   (outs dst_op:$vdst),
179   (ins src_op:$data0, Offset:$offset),
180   " $vdst, $data0$offset gds"> {
182   let has_addr = 0;
183   let has_data1 = 0;
184   let has_gds = 0;
185   let gdsValue = 1;
186   let hasSideEffects = 1;
189 class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32,
190                   RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
191 : DS_Pseudo<opName,
192   (outs data_op:$vdst),
193   (ins VGPR_32:$addr, data_op:$data0, Offset:$offset, gds:$gds),
194   " $vdst, $addr, $data0$offset$gds"> {
196   let hasPostISelHook = 1;
197   let has_data1 = 0;
198   let IsAtomicRet = 1;
201 multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32> {
202   def "" : DS_1A1D_RET<opName, rc>;
204   let has_m0_read = 0 in {
205     def _gfx9 : DS_1A1D_RET<opName, rc>;
206   }
209 multiclass DS_1A1D_RET_mc_gfx9 <string opName, RegisterClass rc = VGPR_32> {
210   let has_m0_read = 0 in {
211     def "" : DS_1A1D_RET<opName, rc>;
212   }
215 class DS_1A2D_RET<string opName,
216                   RegisterClass rc = VGPR_32,
217                   RegisterClass src = rc,
218                   RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
219                   RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
220 : DS_Pseudo<opName,
221   (outs dst_op:$vdst),
222   (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, Offset:$offset, gds:$gds),
223   " $vdst, $addr, $data0, $data1$offset$gds"> {
225   let hasPostISelHook = 1;
226   let IsAtomicRet = 1;
229 multiclass DS_1A2D_RET_mc<string opName,
230                           RegisterClass rc = VGPR_32,
231                           RegisterClass src = rc> {
232   def "" : DS_1A2D_RET<opName, rc, src>;
234   let has_m0_read = 0 in {
235     def _gfx9 : DS_1A2D_RET<opName, rc, src>;
236   }
239 class DS_1A2D_Off8_RET<string opName,
240                        RegisterClass rc = VGPR_32,
241                        RegisterClass src = rc,
242                        RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
243                        RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
244 : DS_Pseudo<opName,
245   (outs dst_op:$vdst),
246   (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, Offset0:$offset0, Offset1:$offset1, gds:$gds),
247   " $vdst, $addr, $data0, $data1$offset0$offset1$gds"> {
249   let has_offset = 0;
250   let hasPostISelHook = 1;
253 multiclass DS_1A2D_Off8_RET_mc<string opName,
254                                RegisterClass rc = VGPR_32,
255                                RegisterClass src = rc> {
256   def "" : DS_1A2D_Off8_RET<opName, rc, src>;
258   let has_m0_read = 0 in {
259     def _gfx9 : DS_1A2D_Off8_RET<opName, rc, src>;
260   }
263 class DS_BVH_STACK<string opName>
264 : DS_Pseudo<opName,
265   (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst, VGPR_32:$addr),
266   (ins VGPR_32:$addr_in, getLdStRegisterOperand<VGPR_32>.ret:$data0, VReg_128:$data1, Offset:$offset),
267   " $vdst, $addr, $data0, $data1$offset"> {
268   let Constraints = "$addr = $addr_in";
269   let DisableEncoding = "$addr_in";
270   let has_gds = 0;
271   let gdsValue = 0;
272   // TODO: Use MMOs in the LDS address space instead of hasSideEffects = 1.
273   let hasSideEffects = 1;
274   let SchedRW = [WriteLDS, WriteLDS];
277 class DS_1A_RET<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = Offset,
278                 RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
279 : DS_Pseudo<opName,
280   (outs data_op:$vdst),
281   !if(HasTiedOutput,
282     (ins VGPR_32:$addr, ofs:$offset, gds:$gds, data_op:$vdst_in),
283     (ins VGPR_32:$addr, ofs:$offset, gds:$gds)),
284   " $vdst, $addr$offset$gds"> {
285   let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
286   let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
287   let has_data0 = 0;
288   let has_data1 = 0;
291 multiclass DS_1A_RET_mc<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = Offset> {
292   def "" : DS_1A_RET<opName, rc, HasTiedOutput, ofs>;
294   let has_m0_read = 0 in {
295     def _gfx9 : DS_1A_RET<opName, rc, HasTiedOutput, ofs>;
296   }
299 class DS_1A_RET_Tied<string opName, RegisterClass rc = VGPR_32> :
300   DS_1A_RET<opName, rc, 1>;
302 class DS_1A_Off8_RET <string opName, RegisterClass rc = VGPR_32>
303 : DS_Pseudo<opName,
304   (outs getLdStRegisterOperand<rc>.ret:$vdst),
305   (ins VGPR_32:$addr, Offset0:$offset0, Offset1:$offset1, gds:$gds),
306   " $vdst, $addr$offset0$offset1$gds"> {
308   let has_offset = 0;
309   let has_data0 = 0;
310   let has_data1 = 0;
313 multiclass DS_1A_Off8_RET_mc <string opName, RegisterClass rc = VGPR_32> {
314   def "" : DS_1A_Off8_RET<opName, rc>;
316   let has_m0_read = 0 in {
317     def _gfx9 : DS_1A_Off8_RET<opName, rc>;
318   }
321 class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
322   (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
323   (ins VGPR_32:$addr, Offset:$offset),
324   " $vdst, $addr$offset gds"> {
326   let has_data0 = 0;
327   let has_data1 = 0;
328   let has_gds = 0;
329   let gdsValue = 1;
332 class DS_0A_RET <string opName> : DS_Pseudo<opName,
333   (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
334   (ins Offset:$offset, gds:$gds),
335   " $vdst$offset$gds"> {
337   let mayLoad = 1;
338   let mayStore = 1;
340   let has_addr = 0;
341   let has_data0 = 0;
342   let has_data1 = 0;
345 class DS_1A <string opName> : DS_Pseudo<opName,
346   (outs),
347   (ins VGPR_32:$addr, Offset:$offset, gds:$gds),
348   " $addr$offset$gds"> {
350   let mayLoad = 1;
351   let mayStore = 1;
353   let has_vdst = 0;
354   let has_data0 = 0;
355   let has_data1 = 0;
358 multiclass DS_1A_mc <string opName> {
359   def "" : DS_1A<opName>;
361   let has_m0_read = 0 in {
362     def _gfx9 : DS_1A<opName>;
363   }
367 class DS_GWS <string opName, dag ins, string asmOps>
368 : DS_Pseudo<opName, (outs), ins, asmOps> {
369   let GWS = 1;
371   let has_vdst  = 0;
372   let has_addr  = 0;
373   let has_data0 = 0;
374   let has_data1 = 0;
376   let has_gds   = 0;
377   let gdsValue  = 1;
380 class DS_GWS_0D <string opName>
381 : DS_GWS<opName,
382   (ins Offset:$offset), "$offset gds"> {
383   let hasSideEffects = 1;
386 class DS_GWS_1D <string opName>
387 : DS_GWS<opName,
388   (ins getLdStRegisterOperand<VGPR_32>.ret:$data0, Offset:$offset),
389   " $data0$offset gds"> {
391   let has_gws_data0 = 1;
392   let hasSideEffects = 1;
395 class DS_VOID <string opName> : DS_Pseudo<opName,
396   (outs), (ins), ""> {
397   let mayLoad = 0;
398   let mayStore = 0;
399   let hasSideEffects = 1;
400   let UseNamedOperandTable = 0;
402   let has_vdst = 0;
403   let has_addr = 0;
404   let has_data0 = 0;
405   let has_data1 = 0;
406   let has_offset = 0;
407   let has_offset0 = 0;
408   let has_offset1 = 0;
409   let has_gds = 0;
412 class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag,
413                        RegisterOperand data_op = getLdStRegisterOperand<VGPR_32>.ret>
414 : DS_Pseudo<opName,
415   (outs data_op:$vdst),
416   (ins VGPR_32:$addr, data_op:$data0, Offset:$offset),
417   " $vdst, $addr, $data0$offset",
418   [(set i32:$vdst,
419    (node (DS1Addr1Offset i32:$addr, i32:$offset), i32:$data0))] > {
421   let mayLoad = 0;
422   let mayStore = 0;
423   let isConvergent = 1;
425   let has_data1 = 0;
426   let has_gds = 0;
429 class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, int complexity = 0,
430   bit gds=0> : GCNPat <(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
431   (inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))> {
432   let AddedComplexity = complexity;
435 defm DS_ADD_U32       : DS_1A1D_NORET_mc<"ds_add_u32">;
436 defm DS_SUB_U32       : DS_1A1D_NORET_mc<"ds_sub_u32">;
437 defm DS_RSUB_U32      : DS_1A1D_NORET_mc<"ds_rsub_u32">;
438 defm DS_INC_U32       : DS_1A1D_NORET_mc<"ds_inc_u32">;
439 defm DS_DEC_U32       : DS_1A1D_NORET_mc<"ds_dec_u32">;
440 defm DS_MIN_I32       : DS_1A1D_NORET_mc<"ds_min_i32">;
441 defm DS_MAX_I32       : DS_1A1D_NORET_mc<"ds_max_i32">;
442 defm DS_MIN_U32       : DS_1A1D_NORET_mc<"ds_min_u32">;
443 defm DS_MAX_U32       : DS_1A1D_NORET_mc<"ds_max_u32">;
444 defm DS_AND_B32       : DS_1A1D_NORET_mc<"ds_and_b32">;
445 defm DS_OR_B32        : DS_1A1D_NORET_mc<"ds_or_b32">;
446 defm DS_XOR_B32       : DS_1A1D_NORET_mc<"ds_xor_b32">;
448 let SubtargetPredicate = HasLDSFPAtomicAddF32 in {
449 defm DS_ADD_F32       : DS_1A1D_NORET_mc<"ds_add_f32">;
452 defm DS_MIN_F32       : DS_1A1D_NORET_mc<"ds_min_f32">;
453 defm DS_MAX_F32       : DS_1A1D_NORET_mc<"ds_max_f32">;
455 let mayLoad = 0 in {
456 defm DS_WRITE_B8      : DS_1A1D_NORET_mc<"ds_write_b8">;
457 defm DS_WRITE_B16     : DS_1A1D_NORET_mc<"ds_write_b16">;
458 defm DS_WRITE_B32     : DS_1A1D_NORET_mc<"ds_write_b32">;
459 defm DS_WRITE2_B32    : DS_1A2D_Off8_NORET_mc<"ds_write2_b32">;
460 defm DS_WRITE2ST64_B32: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b32">;
463 let has_m0_read = 0 in {
465 let SubtargetPredicate = HasD16LoadStore in {
466 def DS_WRITE_B8_D16_HI  : DS_1A1D_NORET<"ds_write_b8_d16_hi">;
467 def DS_WRITE_B16_D16_HI : DS_1A1D_NORET<"ds_write_b16_d16_hi">;
470 } // End has_m0_read = 0
472 let SubtargetPredicate = HasDSAddTid in {
473 def DS_WRITE_ADDTID_B32 : DS_0A1D_NORET<"ds_write_addtid_b32">;
476 } // End mayLoad = 0
478 let SubtargetPredicate = HasLdsAtomicAddF64 in {
479   defm DS_ADD_F64     : DS_1A1D_NORET_mc_gfx9<"ds_add_f64", VReg_64>;
480   defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", VReg_64>;
481 } // End SubtargetPredicate = HasLdsAtomicAddF64
483 let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
484   defm DS_PK_ADD_F16      : DS_1A1D_NORET_mc<"ds_pk_add_f16">;
485   defm DS_PK_ADD_RTN_F16  : DS_1A1D_RET_mc<"ds_pk_add_rtn_f16", VGPR_32>;
486   defm DS_PK_ADD_BF16     : DS_1A1D_NORET_mc<"ds_pk_add_bf16">;
487   defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc<"ds_pk_add_rtn_bf16", VGPR_32>;
488 } // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
490 defm DS_CMPSTORE_B32     : DS_1A2D_NORET_mc<"ds_cmpstore_b32">;
491 defm DS_CMPSTORE_F32     : DS_1A2D_NORET_mc<"ds_cmpstore_f32">;
492 defm DS_CMPSTORE_B64     : DS_1A2D_NORET_mc<"ds_cmpstore_b64", VReg_64>;
493 defm DS_CMPSTORE_F64     : DS_1A2D_NORET_mc<"ds_cmpstore_f64", VReg_64>;
494 defm DS_CMPSTORE_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b32", VGPR_32>;
495 defm DS_CMPSTORE_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f32", VGPR_32>;
496 defm DS_CMPSTORE_RTN_B64  : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b64", VReg_64>;
497 defm DS_CMPSTORE_RTN_F64  : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f64", VReg_64>;
499 defm DS_MSKOR_B32     : DS_1A2D_NORET_mc<"ds_mskor_b32">;
500 defm DS_CMPST_B32     : DS_1A2D_NORET_mc<"ds_cmpst_b32">;
501 defm DS_CMPST_F32     : DS_1A2D_NORET_mc<"ds_cmpst_f32">;
503 defm DS_ADD_U64       : DS_1A1D_NORET_mc<"ds_add_u64", VReg_64>;
504 defm DS_SUB_U64       : DS_1A1D_NORET_mc<"ds_sub_u64", VReg_64>;
505 defm DS_RSUB_U64      : DS_1A1D_NORET_mc<"ds_rsub_u64", VReg_64>;
506 defm DS_INC_U64       : DS_1A1D_NORET_mc<"ds_inc_u64", VReg_64>;
507 defm DS_DEC_U64       : DS_1A1D_NORET_mc<"ds_dec_u64", VReg_64>;
508 defm DS_MIN_I64       : DS_1A1D_NORET_mc<"ds_min_i64", VReg_64>;
509 defm DS_MAX_I64       : DS_1A1D_NORET_mc<"ds_max_i64", VReg_64>;
510 defm DS_MIN_U64       : DS_1A1D_NORET_mc<"ds_min_u64", VReg_64>;
511 defm DS_MAX_U64       : DS_1A1D_NORET_mc<"ds_max_u64", VReg_64>;
512 defm DS_AND_B64       : DS_1A1D_NORET_mc<"ds_and_b64", VReg_64>;
513 defm DS_OR_B64        : DS_1A1D_NORET_mc<"ds_or_b64", VReg_64>;
514 defm DS_XOR_B64       : DS_1A1D_NORET_mc<"ds_xor_b64", VReg_64>;
515 defm DS_MSKOR_B64     : DS_1A2D_NORET_mc<"ds_mskor_b64", VReg_64>;
516 let mayLoad = 0 in {
517 defm DS_WRITE_B64     : DS_1A1D_NORET_mc<"ds_write_b64", VReg_64>;
518 defm DS_WRITE2_B64    : DS_1A2D_Off8_NORET_mc<"ds_write2_b64", VReg_64>;
519 defm DS_WRITE2ST64_B64: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b64", VReg_64>;
521 defm DS_CMPST_B64     : DS_1A2D_NORET_mc<"ds_cmpst_b64", VReg_64>;
522 defm DS_CMPST_F64     : DS_1A2D_NORET_mc<"ds_cmpst_f64", VReg_64>;
523 defm DS_MIN_F64       : DS_1A1D_NORET_mc<"ds_min_f64", VReg_64>;
524 defm DS_MAX_F64       : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>;
526 defm DS_ADD_RTN_U32   : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32>;
528 let SubtargetPredicate = HasLDSFPAtomicAddF32 in {
529 defm DS_ADD_RTN_F32   : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32>;
531 defm DS_SUB_RTN_U32   : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32>;
532 defm DS_RSUB_RTN_U32  : DS_1A1D_RET_mc<"ds_rsub_rtn_u32", VGPR_32>;
533 defm DS_INC_RTN_U32   : DS_1A1D_RET_mc<"ds_inc_rtn_u32", VGPR_32>;
534 defm DS_DEC_RTN_U32   : DS_1A1D_RET_mc<"ds_dec_rtn_u32", VGPR_32>;
535 defm DS_MIN_RTN_I32   : DS_1A1D_RET_mc<"ds_min_rtn_i32", VGPR_32>;
536 defm DS_MAX_RTN_I32   : DS_1A1D_RET_mc<"ds_max_rtn_i32", VGPR_32>;
537 defm DS_MIN_RTN_U32   : DS_1A1D_RET_mc<"ds_min_rtn_u32", VGPR_32>;
538 defm DS_MAX_RTN_U32   : DS_1A1D_RET_mc<"ds_max_rtn_u32", VGPR_32>;
539 defm DS_AND_RTN_B32   : DS_1A1D_RET_mc<"ds_and_rtn_b32", VGPR_32>;
540 defm DS_OR_RTN_B32    : DS_1A1D_RET_mc<"ds_or_rtn_b32", VGPR_32>;
541 defm DS_XOR_RTN_B32   : DS_1A1D_RET_mc<"ds_xor_rtn_b32", VGPR_32>;
542 defm DS_MSKOR_RTN_B32 : DS_1A2D_RET_mc<"ds_mskor_rtn_b32", VGPR_32>;
543 defm DS_CMPST_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b32", VGPR_32>;
544 defm DS_CMPST_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f32", VGPR_32>;
545 defm DS_MIN_RTN_F32   : DS_1A1D_RET_mc<"ds_min_rtn_f32", VGPR_32>;
546 defm DS_MAX_RTN_F32   : DS_1A1D_RET_mc<"ds_max_rtn_f32", VGPR_32>;
548 defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b32">;
549 defm DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>;
550 defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>;
552 defm DS_ADD_RTN_U64  : DS_1A1D_RET_mc<"ds_add_rtn_u64", VReg_64>;
553 defm DS_SUB_RTN_U64  : DS_1A1D_RET_mc<"ds_sub_rtn_u64", VReg_64>;
554 defm DS_RSUB_RTN_U64  : DS_1A1D_RET_mc<"ds_rsub_rtn_u64", VReg_64>;
555 defm DS_INC_RTN_U64   : DS_1A1D_RET_mc<"ds_inc_rtn_u64", VReg_64>;
556 defm DS_DEC_RTN_U64   : DS_1A1D_RET_mc<"ds_dec_rtn_u64", VReg_64>;
557 defm DS_MIN_RTN_I64    : DS_1A1D_RET_mc<"ds_min_rtn_i64", VReg_64>;
558 defm DS_MAX_RTN_I64    : DS_1A1D_RET_mc<"ds_max_rtn_i64", VReg_64>;
559 defm DS_MIN_RTN_U64   : DS_1A1D_RET_mc<"ds_min_rtn_u64", VReg_64>;
560 defm DS_MAX_RTN_U64   : DS_1A1D_RET_mc<"ds_max_rtn_u64", VReg_64>;
561 defm DS_AND_RTN_B64    : DS_1A1D_RET_mc<"ds_and_rtn_b64", VReg_64>;
562 defm DS_OR_RTN_B64     : DS_1A1D_RET_mc<"ds_or_rtn_b64", VReg_64>;
563 defm DS_XOR_RTN_B64    : DS_1A1D_RET_mc<"ds_xor_rtn_b64", VReg_64>;
564 defm DS_MSKOR_RTN_B64  : DS_1A2D_RET_mc<"ds_mskor_rtn_b64", VReg_64>;
565 defm DS_CMPST_RTN_B64  : DS_1A2D_RET_mc<"ds_cmpst_rtn_b64", VReg_64>;
566 defm DS_CMPST_RTN_F64  : DS_1A2D_RET_mc<"ds_cmpst_rtn_f64", VReg_64>;
567 defm DS_MIN_RTN_F64    : DS_1A1D_RET_mc<"ds_min_rtn_f64", VReg_64>;
568 defm DS_MAX_RTN_F64    : DS_1A1D_RET_mc<"ds_max_rtn_f64", VReg_64>;
570 defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>;
571 defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>;
572 defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>;
574 let isConvergent = 1, usesCustomInserter = 1 in {
575 def DS_GWS_INIT       : DS_GWS_1D<"ds_gws_init"> {
576   let mayLoad = 0;
578 def DS_GWS_SEMA_V     : DS_GWS_0D<"ds_gws_sema_v">;
579 def DS_GWS_SEMA_BR    : DS_GWS_1D<"ds_gws_sema_br">;
580 def DS_GWS_SEMA_P     : DS_GWS_0D<"ds_gws_sema_p">;
581 def DS_GWS_BARRIER    : DS_GWS_1D<"ds_gws_barrier">;
584 let SubtargetPredicate = HasDsSrc2Insts in {
585 def DS_ADD_SRC2_U32   : DS_1A<"ds_add_src2_u32">;
586 def DS_SUB_SRC2_U32   : DS_1A<"ds_sub_src2_u32">;
587 def DS_RSUB_SRC2_U32  : DS_1A<"ds_rsub_src2_u32">;
588 def DS_INC_SRC2_U32   : DS_1A<"ds_inc_src2_u32">;
589 def DS_DEC_SRC2_U32   : DS_1A<"ds_dec_src2_u32">;
590 def DS_MIN_SRC2_I32   : DS_1A<"ds_min_src2_i32">;
591 def DS_MAX_SRC2_I32   : DS_1A<"ds_max_src2_i32">;
592 def DS_MIN_SRC2_U32   : DS_1A<"ds_min_src2_u32">;
593 def DS_MAX_SRC2_U32   : DS_1A<"ds_max_src2_u32">;
594 def DS_AND_SRC2_B32   : DS_1A<"ds_and_src2_b32">;
595 def DS_OR_SRC2_B32    : DS_1A<"ds_or_src2_b32">;
596 def DS_XOR_SRC2_B32   : DS_1A<"ds_xor_src2_b32">;
597 def DS_MIN_SRC2_F32   : DS_1A<"ds_min_src2_f32">;
598 def DS_MAX_SRC2_F32   : DS_1A<"ds_max_src2_f32">;
600 def DS_ADD_SRC2_U64   : DS_1A<"ds_add_src2_u64">;
601 def DS_SUB_SRC2_U64   : DS_1A<"ds_sub_src2_u64">;
602 def DS_RSUB_SRC2_U64  : DS_1A<"ds_rsub_src2_u64">;
603 def DS_INC_SRC2_U64   : DS_1A<"ds_inc_src2_u64">;
604 def DS_DEC_SRC2_U64   : DS_1A<"ds_dec_src2_u64">;
605 def DS_MIN_SRC2_I64   : DS_1A<"ds_min_src2_i64">;
606 def DS_MAX_SRC2_I64   : DS_1A<"ds_max_src2_i64">;
607 def DS_MIN_SRC2_U64   : DS_1A<"ds_min_src2_u64">;
608 def DS_MAX_SRC2_U64   : DS_1A<"ds_max_src2_u64">;
609 def DS_AND_SRC2_B64   : DS_1A<"ds_and_src2_b64">;
610 def DS_OR_SRC2_B64    : DS_1A<"ds_or_src2_b64">;
611 def DS_XOR_SRC2_B64   : DS_1A<"ds_xor_src2_b64">;
612 def DS_MIN_SRC2_F64   : DS_1A<"ds_min_src2_f64">;
613 def DS_MAX_SRC2_F64   : DS_1A<"ds_max_src2_f64">;
615 def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">;
616 def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">;
617 } // End SubtargetPredicate = HasDsSrc2Insts
619 let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
620 def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, Swizzle>;
623 let mayStore = 0 in {
624 defm DS_READ_I8      : DS_1A_RET_mc<"ds_read_i8">;
625 defm DS_READ_U8      : DS_1A_RET_mc<"ds_read_u8">;
626 defm DS_READ_I16     : DS_1A_RET_mc<"ds_read_i16">;
627 defm DS_READ_U16     : DS_1A_RET_mc<"ds_read_u16">;
628 defm DS_READ_B32     : DS_1A_RET_mc<"ds_read_b32">;
629 defm DS_READ_B64     : DS_1A_RET_mc<"ds_read_b64", VReg_64>;
631 defm DS_READ2_B32    : DS_1A_Off8_RET_mc<"ds_read2_b32", VReg_64>;
632 defm DS_READ2ST64_B32: DS_1A_Off8_RET_mc<"ds_read2st64_b32", VReg_64>;
634 defm DS_READ2_B64    : DS_1A_Off8_RET_mc<"ds_read2_b64", VReg_128>;
635 defm DS_READ2ST64_B64: DS_1A_Off8_RET_mc<"ds_read2st64_b64", VReg_128>;
637 let has_m0_read = 0 in {
638 let SubtargetPredicate = HasD16LoadStore, TiedSourceNotRead = 1 in {
639 def DS_READ_U8_D16     : DS_1A_RET_Tied<"ds_read_u8_d16">;
640 def DS_READ_U8_D16_HI  : DS_1A_RET_Tied<"ds_read_u8_d16_hi">;
641 def DS_READ_I8_D16     : DS_1A_RET_Tied<"ds_read_i8_d16">;
642 def DS_READ_I8_D16_HI  : DS_1A_RET_Tied<"ds_read_i8_d16_hi">;
643 def DS_READ_U16_D16    : DS_1A_RET_Tied<"ds_read_u16_d16">;
644 def DS_READ_U16_D16_HI : DS_1A_RET_Tied<"ds_read_u16_d16_hi">;
646 } // End has_m0_read = 0
648 let SubtargetPredicate = HasDSAddTid in {
649 def DS_READ_ADDTID_B32 : DS_0A_RET<"ds_read_addtid_b32">;
652 } // End mayStore = 0
654 def DS_CONSUME       : DS_0A_RET<"ds_consume">;
655 def DS_APPEND        : DS_0A_RET<"ds_append">;
657 let SubtargetPredicate = isNotGFX90APlus in
658 def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">;
660 //===----------------------------------------------------------------------===//
661 // Instruction definitions for CI and newer.
662 //===----------------------------------------------------------------------===//
664 let SubtargetPredicate = isGFX7Plus in {
666 defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>;
667 defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>;
669 let isConvergent = 1, usesCustomInserter = 1 in {
670 def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">;
673 let mayStore = 0 in {
674 defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", VReg_96>;
675 defm DS_READ_B128: DS_1A_RET_mc<"ds_read_b128", VReg_128>;
676 } // End mayStore = 0
678 let mayLoad = 0 in {
679 defm DS_WRITE_B96 : DS_1A1D_NORET_mc<"ds_write_b96", VReg_96>;
680 defm DS_WRITE_B128 : DS_1A1D_NORET_mc<"ds_write_b128", VReg_128>;
681 } // End mayLoad = 0
683 def DS_NOP : DS_VOID<"ds_nop">;
685 } // let SubtargetPredicate = isGFX7Plus
687 //===----------------------------------------------------------------------===//
688 // Instruction definitions for VI and newer.
689 //===----------------------------------------------------------------------===//
691 let SubtargetPredicate = isGFX8Plus in {
693 let Uses = [EXEC] in {
694 def DS_PERMUTE_B32  : DS_1A1D_PERMUTE <"ds_permute_b32",
695                                        int_amdgcn_ds_permute>;
696 def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
697                                        int_amdgcn_ds_bpermute>;
700 } // let SubtargetPredicate = isGFX8Plus
702 let SubtargetPredicate = HasLDSFPAtomicAddF32, OtherPredicates = [HasDsSrc2Insts] in {
703 def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
707 //===----------------------------------------------------------------------===//
708 // Instruction definitions for GFX11.
709 //===----------------------------------------------------------------------===//
711 let SubtargetPredicate = isGFX11Only in {
713 def DS_ADD_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_add_gs_reg_rtn", VReg_64, VGPR_32>;
714 def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VReg_64, VGPR_32>;
716 } // let SubtargetPredicate = isGFX11Only
718 let SubtargetPredicate = isGFX11Plus in {
720 let OtherPredicates = [HasImageInsts] in
721 def DS_BVH_STACK_RTN_B32 : DS_BVH_STACK<"ds_bvh_stack_rtn_b32">;
723 } // let SubtargetPredicate = isGFX11Plus
725 //===----------------------------------------------------------------------===//
726 // Instruction definitions for GFX12 and newer.
727 //===----------------------------------------------------------------------===//
729 let SubtargetPredicate = isGFX12Plus in {
731 defm DS_COND_SUB_U32      : DS_1A1D_NORET_mc<"ds_cond_sub_u32">;
732 defm DS_COND_SUB_RTN_U32  : DS_1A1D_RET_mc<"ds_cond_sub_rtn_u32", VGPR_32>;
733 defm DS_SUB_CLAMP_U32     : DS_1A1D_NORET_mc<"ds_sub_clamp_u32">;
734 defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_clamp_rtn_u32", VGPR_32>;
736 multiclass DSAtomicRetNoRetPatIntrinsic_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
737                                   ValueType vt, string frag> {
738   def : DSAtomicRetPat<inst, vt,
739                         !cast<PatFrag>(frag#"_local_addrspace")>;
741   let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
742     def : DSAtomicRetPat<noRetInst, vt,
743                           !cast<PatFrag>(frag#"_noret_local_addrspace"), /* complexity */ 1>;
746 defm : DSAtomicRetNoRetPatIntrinsic_mc<DS_COND_SUB_RTN_U32, DS_COND_SUB_U32, i32, "int_amdgcn_atomic_cond_sub_u32">;
747 } // let SubtargetPredicate = isGFX12Plus
749 //===----------------------------------------------------------------------===//
750 // DS Patterns
751 //===----------------------------------------------------------------------===//
753 def : GCNPat <
754   (int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
755   (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16timm $offset16), (i1 0))
758 class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
759   (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
760   (inst $ptr, Offset:$offset, (i1 gds))
763 multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
765   let OtherPredicates = [LDSRequiresM0Init] in {
766     def : DSReadPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
767   }
769   let OtherPredicates = [NotLDSRequiresM0Init] in {
770     def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
771   }
774 class DSReadPat_D16 <DS_Pseudo inst, PatFrag frag, ValueType vt> : GCNPat <
775   (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in),
776   (inst $ptr, Offset:$offset, (i1 0), $in)
779 defm : DSReadPat_mc <DS_READ_I8, i32, "sextloadi8_local">;
780 defm : DSReadPat_mc <DS_READ_I8,  i16, "sextloadi8_local">;
781 defm : DSReadPat_mc <DS_READ_U8,  i32, "extloadi8_local">;
782 defm : DSReadPat_mc <DS_READ_U8,  i32, "zextloadi8_local">;
783 defm : DSReadPat_mc <DS_READ_U8,  i16, "extloadi8_local">;
784 defm : DSReadPat_mc <DS_READ_U8,  i16, "zextloadi8_local">;
785 defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
786 defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
787 defm : DSReadPat_mc <DS_READ_U16, i32, "extloadi16_local">;
788 defm : DSReadPat_mc <DS_READ_U16, i32, "zextloadi16_local">;
789 defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
791 foreach vt = Reg32Types.types in {
792 defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
795 defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
796 defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
797 defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
798 defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
799 defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
800 defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
802 let OtherPredicates = [D16PreservesUnusedBits] in {
803 def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>;
804 def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>;
805 def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2i16>;
806 def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2f16>;
807 def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2i16>;
808 def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2f16>;
810 def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2i16>;
811 def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2f16>;
812 def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2i16>;
813 def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2f16>;
814 def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2i16>;
815 def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2f16>;
818 class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
819   (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
820   (inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))
823 multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
824   let OtherPredicates = [LDSRequiresM0Init] in {
825     def : DSWritePat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
826   }
828   let OtherPredicates = [NotLDSRequiresM0Init] in {
829     def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
830   }
833 defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">;
834 defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
835 defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
836 defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
838 foreach vt = Reg32Types.types in {
839 defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
842 defm : DSWritePat_mc <DS_WRITE_B8, i16, "atomic_store_8_local">;
843 defm : DSWritePat_mc <DS_WRITE_B8, i32, "atomic_store_8_local">;
844 defm : DSWritePat_mc <DS_WRITE_B16, i16, "atomic_store_16_local">;
845 defm : DSWritePat_mc <DS_WRITE_B16, i32, "atomic_store_16_local">;
846 defm : DSWritePat_mc <DS_WRITE_B32, i32, "atomic_store_32_local">;
847 defm : DSWritePat_mc <DS_WRITE_B64, i64, "atomic_store_64_local">;
849 let OtherPredicates = [HasD16LoadStore] in {
850 def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_hi16_local>;
851 def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_hi16_local>;
854 class DS64Bit4ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
855   (vt:$value (frag (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
856   (inst $ptr, $offset0, $offset1, (i1 0))
859 class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
860   (frag vt:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
861   (inst $ptr, (i32 (EXTRACT_SUBREG VReg_64:$value, sub0)),
862               (i32 (EXTRACT_SUBREG VReg_64:$value, sub1)), $offset0, $offset1,
863               (i1 0))
866 class DS128Bit8ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
867   (vt:$value (frag (DS128Bit8ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
868   (inst $ptr, $offset0, $offset1, (i1 0))
871 class DS128Bit8ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
872   (frag vt:$value, (DS128Bit8ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
873   (inst $ptr, (i64 (EXTRACT_SUBREG VReg_128:$value, sub0_sub1)),
874               (i64 (EXTRACT_SUBREG VReg_128:$value, sub2_sub3)), $offset0, $offset1,
875               (i1 0))
878 multiclass DS64Bit4ByteAlignedPat_mc<ValueType vt> {
879   let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
880     def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, vt, load_local_m0>;
881     def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, vt, store_local_m0>;
882   }
884   let OtherPredicates = [NotLDSRequiresM0Init] in {
885     def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32_gfx9, vt, load_local>;
886     def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32_gfx9, vt, store_local>;
887   }
890 multiclass DS128Bit8ByteAlignedPat_mc<ValueType vt> {
891   let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
892     def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64, vt, load_local_m0>;
893     def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64, vt, store_local_m0>;
894   }
896   let OtherPredicates = [NotLDSRequiresM0Init] in {
897     def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64_gfx9, vt, load_local>;
898     def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64_gfx9, vt, store_local>;
899   }
902 // v2i32 loads are split into i32 loads on SI during lowering, due to a bug
903 // related to bounds checking.
904 foreach vt = VReg_64.RegTypes in {
905 defm : DS64Bit4ByteAlignedPat_mc<vt>;
908 foreach vt = VReg_128.RegTypes in {
909 defm : DS128Bit8ByteAlignedPat_mc<vt>;
912 // Prefer ds_read over ds_read2 and ds_write over ds_write2, all other things
913 // being equal, because it has a larger immediate offset range.
914 let AddedComplexity = 100 in {
916 foreach vt = VReg_64.RegTypes in {
917 defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
918 defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align8_local">;
921 let SubtargetPredicate = isGFX7Plus in {
923 foreach vt = VReg_96.RegTypes in {
924 defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">;
925 defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_align16_local">;
928 foreach vt = VReg_128.RegTypes in {
929 defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">;
930 defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align16_local">;
933 let SubtargetPredicate = HasUnalignedAccessMode in {
935 // Select 64 bit loads and stores aligned less than 4 as a single ds_read_b64/
936 // ds_write_b64 instruction as this is faster than ds_read2_b32/ds_write2_b32
937 // which would be used otherwise. In this case a b32 access would still be
938 // misaligned, but we will have 2 of them.
939 foreach vt = VReg_64.RegTypes in {
940 defm : DSReadPat_mc <DS_READ_B64, vt, "load_align_less_than_4_local">;
941 defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align_less_than_4_local">;
944 // Selection will split most of the unaligned 3 dword accesses due to performance
945 // reasons when beneficial. Keep these two patterns for the rest of the cases.
946 foreach vt = VReg_96.RegTypes in {
947 defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">;
948 defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_local">;
951 // Select 128 bit loads and stores aligned less than 4 as a single ds_read_b128/
952 // ds_write_b128 instruction as this is faster than ds_read2_b64/ds_write2_b64
953 // which would be used otherwise. In this case a b64 access would still be
954 // misaligned, but we will have 2 of them.
955 foreach vt = VReg_128.RegTypes in {
956 defm : DSReadPat_mc <DS_READ_B128, vt, "load_align_less_than_4_local">;
957 defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align_less_than_4_local">;
960 } // End SubtargetPredicate = HasUnalignedAccessMode
962 } // End SubtargetPredicate = isGFX7Plus
964 } // End AddedComplexity = 100
966 multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
967   let OtherPredicates = [LDSRequiresM0Init] in {
968     def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt)>;
969   }
971   let OtherPredicates = [NotLDSRequiresM0Init] in {
972     def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
973                          !cast<PatFrag>(frag#"_local_"#vt)>;
974   }
976   let OtherPredicates = [HasGDS] in {
977     def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt),
978                          /* complexity */ 0, /* gds */ 1>;
979   }
982 multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
983                                   ValueType vt, string frag> {
984   let OtherPredicates = [LDSRequiresM0Init] in {
985     def : DSAtomicRetPat<inst, vt,
986                          !cast<PatFrag>(frag#"_local_m0_"#vt)>;
987     def : DSAtomicRetPat<noRetInst, vt,
988                          !cast<PatFrag>(frag#"_local_m0_noret_"#vt), /* complexity */ 1>;
989   }
991   let OtherPredicates = [NotLDSRequiresM0Init] in {
992     def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
993                          !cast<PatFrag>(frag#"_local_"#vt)>;
994     def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
995                          !cast<PatFrag>(frag#"_local_noret_"#vt), /* complexity */ 1>;
996   }
998   let OtherPredicates = [HasGDS] in {
999     def : DSAtomicRetPat<inst, vt,
1000                          !cast<PatFrag>(frag#"_region_m0_"#vt),
1001                          /* complexity */ 0, /* gds */ 1>;
1002     def : DSAtomicRetPat<noRetInst, vt,
1003                          !cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1004                          /* complexity */ 1, /* gds */ 1>;
1005   }
1010 let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1011 // Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode.
1012 class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag,
1013   int complexity = 0, bit gds=0> : GCNPat<
1014   (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
1015   (inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, Offset:$offset, (i1 gds))> {
1016   let AddedComplexity = complexity;
1019 multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
1020                                      string frag> {
1021   let OtherPredicates = [LDSRequiresM0Init] in {
1022     def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt)>;
1023     def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_local_m0_noret_"#vt),
1024                                  /* complexity */ 1>;
1025   }
1027   let OtherPredicates = [NotLDSRequiresM0Init] in {
1028     def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1029                                  !cast<PatFrag>(frag#"_local_"#vt)>;
1030     def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1031                                  !cast<PatFrag>(frag#"_local_noret_"#vt),
1032                                  /* complexity */ 1>;
1033   }
1035   let OtherPredicates = [HasGDS] in {
1036     def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt),
1037                                  /* complexity */ 0, /* gds */ 1>;
1038     def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1039                                  /* complexity */ 1, /* gds */ 1>;
1040   }
1042 } // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
1044 let SubtargetPredicate = isGFX11Plus in {
1045 // The order of src and cmp agrees with the BUFFER_ATOMIC_CMPSWAP opcode.
1046 class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag,
1047   int complexity = 0, bit gds=0> : GCNPat<
1048   (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
1049   (inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, Offset:$offset, (i1 gds))> {
1050   let AddedComplexity = complexity;
1053 multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt, string frag> {
1055   def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1056                         !cast<PatFrag>(frag#"_local_"#vt)>;
1057   def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1058                         !cast<PatFrag>(frag#"_local_noret_"#vt), /* complexity */ 1>;
1060   let OtherPredicates = [HasGDS] in {
1061     def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt),
1062                           /* complexity */ 0, /* gds */ 1>;
1063     def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1064                           /* complexity */ 1, /* gds */ 1>;
1065   }
1067 } // End SubtargetPredicate = isGFX11Plus
1069 // 32-bit atomics.
1070 defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap">;
1071 defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U32, DS_ADD_U32, i32, "atomic_load_add">;
1072 defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U32, DS_SUB_U32, i32, "atomic_load_sub">;
1073 defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U32, DS_INC_U32, i32, "atomic_load_uinc_wrap">;
1074 defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U32, DS_DEC_U32, i32, "atomic_load_udec_wrap">;
1075 defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B32, DS_AND_B32, i32, "atomic_load_and">;
1076 defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B32, DS_OR_B32, i32, "atomic_load_or">;
1077 defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B32, DS_XOR_B32, i32, "atomic_load_xor">;
1078 defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I32, DS_MIN_I32, i32, "atomic_load_min">;
1079 defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I32, DS_MAX_I32, i32, "atomic_load_max">;
1080 defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U32, DS_MIN_U32, i32, "atomic_load_umin">;
1081 defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U32, DS_MAX_U32, i32, "atomic_load_umax">;
1082 defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F32, DS_MIN_F32, f32, "atomic_load_fmin">;
1083 defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F32, DS_MAX_F32, f32, "atomic_load_fmax">;
1086 let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
1087 defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_load_fadd">;
1088 defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_BF16, DS_PK_ADD_BF16, v2bf16, "atomic_load_fadd">;
1091 let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1092 defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B32, DS_CMPST_B32, i32, "atomic_cmp_swap">;
1095 let SubtargetPredicate = isGFX11Plus in {
1096 defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B32, DS_CMPSTORE_B32, i32, "atomic_cmp_swap">;
1099 let SubtargetPredicate = HasLDSFPAtomicAddF32 in {
1100 defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_F32, DS_ADD_F32, f32, "atomic_load_fadd">;
1103 // 64-bit atomics.
1104 defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;
1105 defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U64, DS_ADD_U64, i64, "atomic_load_add">;
1106 defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U64, DS_SUB_U64, i64, "atomic_load_sub">;
1107 defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U64, DS_INC_U64, i64, "atomic_load_uinc_wrap">;
1108 defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U64, DS_DEC_U64, i64, "atomic_load_udec_wrap">;
1109 defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B64, DS_AND_B64, i64, "atomic_load_and">;
1110 defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B64, DS_OR_B64, i64, "atomic_load_or">;
1111 defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B64, DS_XOR_B64, i64, "atomic_load_xor">;
1112 defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I64, DS_MIN_I64, i64, "atomic_load_min">;
1113 defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I64, DS_MAX_I64, i64, "atomic_load_max">;
1114 defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U64, DS_MIN_U64, i64, "atomic_load_umin">;
1115 defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U64, DS_MAX_U64, i64, "atomic_load_umax">;
1116 defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F64, DS_MIN_F64, f64, "atomic_load_fmin">;
1117 defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F64, DS_MAX_F64, f64, "atomic_load_fmax">;
1119 let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1120 defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B64, DS_CMPST_B64, i64, "atomic_cmp_swap">;
1121 } // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
1123 let SubtargetPredicate = isGFX11Plus in {
1124 defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B64, DS_CMPSTORE_B64, i64, "atomic_cmp_swap">;
1125 } // End SubtargetPredicate = isGFX11Plus
1127 let SubtargetPredicate = HasLdsAtomicAddF64 in {
1128 def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_f64>;
1129 let AddedComplexity = 1 in
1130 def : DSAtomicRetPat<DS_ADD_F64, f64, atomic_load_fadd_local_noret_f64>;
1132 class DSAtomicRetPatIntrinsic<DS_Pseudo inst, ValueType vt, PatFrag frag,
1133   bit gds=0> : GCNPat <
1134   (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value)),
1135   (inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))> {
1138 def : DSAtomicRetPatIntrinsic<DS_ADD_RTN_F64, f64, int_amdgcn_flat_atomic_fadd_local_addrspace>;
1139 let AddedComplexity = 1 in
1140 def : DSAtomicRetPatIntrinsic<DS_ADD_F64, f64, int_amdgcn_flat_atomic_fadd_noret_local_addrspace>;
1143 let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
1144 defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_load_fadd">;
1145 } // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
1147 let OtherPredicates = [HasGDS] in
1148 def : GCNPat <
1149   (SIds_ordered_count i32:$value, i16:$offset),
1150   (DS_ORDERED_COUNT $value, (as_i16imm $offset))
1153 def : GCNPat <
1154   (i64 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
1155   (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
1158 def : GCNPat <
1159   (i32 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
1160   (EXTRACT_SUBREG
1161     (i64 (COPY_TO_REGCLASS
1162       (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
1163       VReg_64)),
1164     sub0)
1167 def : GCNPat <
1168   (i64 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
1169   (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
1172 def : GCNPat <
1173   (i32 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
1174   (EXTRACT_SUBREG
1175     (i64 (COPY_TO_REGCLASS
1176       (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
1177       VReg_64)),
1178     sub0)
1181 //===----------------------------------------------------------------------===//
1182 // Target-specific instruction encodings.
1183 //===----------------------------------------------------------------------===//
1185 //===----------------------------------------------------------------------===//
1186 // Base ENC_DS for GFX6, GFX7, GFX10, GFX11, GFX12.
1187 //===----------------------------------------------------------------------===//
1189 class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op, DS_Pseudo ps, int ef,
1190                                                string opName = ps.Mnemonic,
1191                                                bit hasGDS = true>
1192     : DS_Real<ps, opName>, SIMCInstr <ps.PseudoInstr, ef> {
1194   let Inst{7-0}   = !if(ps.has_offset0, offset0, 0);
1195   let Inst{15-8}  = !if(ps.has_offset1, offset1, 0);
1196   let Inst{17}    = !if(ps.has_gds, gds, ps.gdsValue);
1197   let Inst{25-18} = op;
1198   let Inst{31-26} = 0x36;
1199   let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
1200   let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
1201   let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
1202   let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
1204   let gds = !if(hasGDS, ?, 0);
1207 //===----------------------------------------------------------------------===//
1208 // GFX12.
1209 //===----------------------------------------------------------------------===//
1211 multiclass DS_Real_gfx12<bits<8> op, string name = !tolower(NAME), bit needAlias = true> {
1212   defvar ps = !cast<DS_Pseudo>(NAME);
1213   let AssemblerPredicate = isGFX12Plus in {
1214     let DecoderNamespace = "GFX12" in
1215       def _gfx12 :
1216         Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, ps, SIEncodingFamily.GFX12,
1217                                                name, /*hasGDS=*/false>;
1218     if !and(needAlias, !ne(ps.Mnemonic, name)) then
1219       def : AMDGPUMnemonicAlias<ps.Mnemonic, name>;
1220   } // End AssemblerPredicate
1223 defm DS_MIN_F32           : DS_Real_gfx12<0x012, "ds_min_num_f32">;
1224 defm DS_MAX_F32           : DS_Real_gfx12<0x013, "ds_max_num_f32">;
1225 defm DS_MIN_RTN_F32       : DS_Real_gfx12<0x032, "ds_min_num_rtn_f32">;
1226 defm DS_MAX_RTN_F32       : DS_Real_gfx12<0x033, "ds_max_num_rtn_f32">;
1227 defm DS_MIN_F64           : DS_Real_gfx12<0x052, "ds_min_num_f64">;
1228 defm DS_MAX_F64           : DS_Real_gfx12<0x053, "ds_max_num_f64">;
1229 defm DS_MIN_RTN_F64       : DS_Real_gfx12<0x072, "ds_min_num_rtn_f64">;
1230 defm DS_MAX_RTN_F64       : DS_Real_gfx12<0x073, "ds_max_num_rtn_f64">;
1231 defm DS_COND_SUB_U32      : DS_Real_gfx12<0x098>;
1232 defm DS_SUB_CLAMP_U32     : DS_Real_gfx12<0x099>;
1233 defm DS_COND_SUB_RTN_U32  : DS_Real_gfx12<0x0a8>;
1234 defm DS_SUB_CLAMP_RTN_U32 : DS_Real_gfx12<0x0a9>;
1235 defm DS_PK_ADD_F16        : DS_Real_gfx12<0x09a>;
1236 defm DS_PK_ADD_RTN_F16    : DS_Real_gfx12<0x0aa>;
1237 defm DS_PK_ADD_BF16       : DS_Real_gfx12<0x09b>;
1238 defm DS_PK_ADD_RTN_BF16   : DS_Real_gfx12<0x0ab>;
1240 // New aliases added in GFX12 without renaming the instructions.
1241 let AssemblerPredicate = isGFX12Plus in {
1242   def : AMDGPUMnemonicAlias<"ds_subrev_u32", "ds_rsub_u32">;
1243   def : AMDGPUMnemonicAlias<"ds_subrev_rtn_u32", "ds_rsub_rtn_u32">;
1244   def : AMDGPUMnemonicAlias<"ds_subrev_u64", "ds_rsub_u64">;
1245   def : AMDGPUMnemonicAlias<"ds_subrev_rtn_u64", "ds_rsub_rtn_u64">;
1248 //===----------------------------------------------------------------------===//
1249 // GFX11.
1250 //===----------------------------------------------------------------------===//
1252 multiclass DS_Real_gfx11<bits<8> op, string name = !tolower(NAME)> {
1253   defvar ps = !cast<DS_Pseudo>(NAME);
1254   let AssemblerPredicate = isGFX11Only in {
1255     let DecoderNamespace = "GFX11" in
1256       def _gfx11 :
1257         Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, ps, SIEncodingFamily.GFX11,
1258                                                name>;
1259     if !ne(ps.Mnemonic, name) then
1260       def : AMDGPUMnemonicAlias<ps.Mnemonic, name>;
1261   } // End AssemblerPredicate
1264 multiclass DS_Real_gfx11_gfx12<bits<8> op, string name = !tolower(NAME)>
1265   : DS_Real_gfx11<op, name>, DS_Real_gfx12<op, name>;
1267 defm DS_WRITE_B32           : DS_Real_gfx11_gfx12<0x00d, "ds_store_b32">;
1268 defm DS_WRITE2_B32          : DS_Real_gfx11_gfx12<0x00e, "ds_store_2addr_b32">;
1269 defm DS_WRITE2ST64_B32      : DS_Real_gfx11_gfx12<0x00f, "ds_store_2addr_stride64_b32">;
1270 defm DS_WRITE_B8            : DS_Real_gfx11_gfx12<0x01e, "ds_store_b8">;
1271 defm DS_WRITE_B16           : DS_Real_gfx11_gfx12<0x01f, "ds_store_b16">;
1272 defm DS_WRXCHG_RTN_B32      : DS_Real_gfx11_gfx12<0x02d, "ds_storexchg_rtn_b32">;
1273 defm DS_WRXCHG2_RTN_B32     : DS_Real_gfx11_gfx12<0x02e, "ds_storexchg_2addr_rtn_b32">;
1274 defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx11_gfx12<0x02f, "ds_storexchg_2addr_stride64_rtn_b32">;
1275 defm DS_READ_B32            : DS_Real_gfx11_gfx12<0x036, "ds_load_b32">;
1276 defm DS_READ2_B32           : DS_Real_gfx11_gfx12<0x037, "ds_load_2addr_b32">;
1277 defm DS_READ2ST64_B32       : DS_Real_gfx11_gfx12<0x038, "ds_load_2addr_stride64_b32">;
1278 defm DS_READ_I8             : DS_Real_gfx11_gfx12<0x039, "ds_load_i8">;
1279 defm DS_READ_U8             : DS_Real_gfx11_gfx12<0x03a, "ds_load_u8">;
1280 defm DS_READ_I16            : DS_Real_gfx11_gfx12<0x03b, "ds_load_i16">;
1281 defm DS_READ_U16            : DS_Real_gfx11_gfx12<0x03c, "ds_load_u16">;
1282 defm DS_WRITE_B64           : DS_Real_gfx11_gfx12<0x04d, "ds_store_b64">;
1283 defm DS_WRITE2_B64          : DS_Real_gfx11_gfx12<0x04e, "ds_store_2addr_b64">;
1284 defm DS_WRITE2ST64_B64      : DS_Real_gfx11_gfx12<0x04f, "ds_store_2addr_stride64_b64">;
1285 defm DS_WRXCHG_RTN_B64      : DS_Real_gfx11_gfx12<0x06d, "ds_storexchg_rtn_b64">;
1286 defm DS_WRXCHG2_RTN_B64     : DS_Real_gfx11_gfx12<0x06e, "ds_storexchg_2addr_rtn_b64">;
1287 defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx11_gfx12<0x06f, "ds_storexchg_2addr_stride64_rtn_b64">;
1288 defm DS_READ_B64            : DS_Real_gfx11_gfx12<0x076, "ds_load_b64">;
1289 defm DS_READ2_B64           : DS_Real_gfx11_gfx12<0x077, "ds_load_2addr_b64">;
1290 defm DS_READ2ST64_B64       : DS_Real_gfx11_gfx12<0x078, "ds_load_2addr_stride64_b64">;
1291 defm DS_WRITE_B8_D16_HI     : DS_Real_gfx11_gfx12<0x0a0, "ds_store_b8_d16_hi">;
1292 defm DS_WRITE_B16_D16_HI    : DS_Real_gfx11_gfx12<0x0a1, "ds_store_b16_d16_hi">;
1293 defm DS_READ_U8_D16         : DS_Real_gfx11_gfx12<0x0a2, "ds_load_u8_d16">;
1294 defm DS_READ_U8_D16_HI      : DS_Real_gfx11_gfx12<0x0a3, "ds_load_u8_d16_hi">;
1295 defm DS_READ_I8_D16         : DS_Real_gfx11_gfx12<0x0a4, "ds_load_i8_d16">;
1296 defm DS_READ_I8_D16_HI      : DS_Real_gfx11_gfx12<0x0a5, "ds_load_i8_d16_hi">;
1297 defm DS_READ_U16_D16        : DS_Real_gfx11_gfx12<0x0a6, "ds_load_u16_d16">;
1298 defm DS_READ_U16_D16_HI     : DS_Real_gfx11_gfx12<0x0a7, "ds_load_u16_d16_hi">;
1299 defm DS_WRITE_ADDTID_B32    : DS_Real_gfx11_gfx12<0x0b0, "ds_store_addtid_b32">;
1300 defm DS_READ_ADDTID_B32     : DS_Real_gfx11_gfx12<0x0b1, "ds_load_addtid_b32">;
1301 defm DS_WRITE_B96           : DS_Real_gfx11_gfx12<0x0de, "ds_store_b96">;
1302 defm DS_WRITE_B128          : DS_Real_gfx11_gfx12<0x0df, "ds_store_b128">;
1303 defm DS_READ_B96            : DS_Real_gfx11_gfx12<0x0fe, "ds_load_b96">;
1304 defm DS_READ_B128           : DS_Real_gfx11_gfx12<0x0ff, "ds_load_b128">;
1306 // DS_CMPST_* are renamed to DS_CMPSTORE_* in GFX11, but also the data operands (src and cmp) are swapped
1307 // comparing to pre-GFX11.
1308 // Note: the mnemonic alias is not generated to avoid a potential ambiguity due to the semantics change.
1310 defm DS_CMPSTORE_B32                     : DS_Real_gfx11_gfx12<0x010>;
1311 defm DS_CMPSTORE_F32                     : DS_Real_gfx11<0x011>;
1312 defm DS_CMPSTORE_RTN_B32                 : DS_Real_gfx11_gfx12<0x030>;
1313 defm DS_CMPSTORE_RTN_F32                 : DS_Real_gfx11<0x031>;
1314 defm DS_CMPSTORE_B64                     : DS_Real_gfx11_gfx12<0x050>;
1315 defm DS_CMPSTORE_F64                     : DS_Real_gfx11<0x051>;
1316 defm DS_CMPSTORE_RTN_B64                 : DS_Real_gfx11_gfx12<0x070>;
1317 defm DS_CMPSTORE_RTN_F64                 : DS_Real_gfx11<0x071>;
1319 defm DS_ADD_RTN_F32                      : DS_Real_gfx11_gfx12<0x079>;
1320 defm DS_ADD_GS_REG_RTN                   : DS_Real_gfx11<0x07a>;
1321 defm DS_SUB_GS_REG_RTN                   : DS_Real_gfx11<0x07b>;
1322 defm DS_BVH_STACK_RTN_B32                : DS_Real_gfx11<0x0ad>;
1324 //===----------------------------------------------------------------------===//
1325 // GFX10.
1326 //===----------------------------------------------------------------------===//
1328 let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
1329   multiclass DS_Real_gfx10<bits<8> op>  {
1330     def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1331       !cast<DS_Pseudo>(NAME), SIEncodingFamily.GFX10>;
1332   }
1333 } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
1335 defm DS_ADD_RTN_F32      : DS_Real_gfx10<0x055>;
1336 defm DS_WRITE_B8_D16_HI  : DS_Real_gfx10<0x0a0>;
1337 defm DS_WRITE_B16_D16_HI : DS_Real_gfx10<0x0a1>;
1338 defm DS_READ_U8_D16      : DS_Real_gfx10<0x0a2>;
1339 defm DS_READ_U8_D16_HI   : DS_Real_gfx10<0x0a3>;
1340 defm DS_READ_I8_D16      : DS_Real_gfx10<0x0a4>;
1341 defm DS_READ_I8_D16_HI   : DS_Real_gfx10<0x0a5>;
1342 defm DS_READ_U16_D16     : DS_Real_gfx10<0x0a6>;
1343 defm DS_READ_U16_D16_HI  : DS_Real_gfx10<0x0a7>;
1344 defm DS_WRITE_ADDTID_B32 : DS_Real_gfx10<0x0b0>;
1345 defm DS_READ_ADDTID_B32  : DS_Real_gfx10<0x0b1>;
1347 //===----------------------------------------------------------------------===//
1348 // GFX10, GFX11, GFX12.
1349 //===----------------------------------------------------------------------===//
1351 multiclass DS_Real_gfx10_gfx11_gfx12<bits<8> op> :
1352   DS_Real_gfx10<op>, DS_Real_gfx11<op>, DS_Real_gfx12<op>;
1354 multiclass DS_Real_gfx10_gfx11<bits<8> op> :
1355   DS_Real_gfx10<op>, DS_Real_gfx11<op>;
1357 defm DS_ADD_F32          : DS_Real_gfx10_gfx11_gfx12<0x015>;
1358 defm DS_ADD_SRC2_F32     : DS_Real_gfx10<0x095>;
1359 defm DS_PERMUTE_B32      : DS_Real_gfx10_gfx11_gfx12<0x0b2>;
1360 defm DS_BPERMUTE_B32     : DS_Real_gfx10_gfx11_gfx12<0x0b3>;
1362 //===----------------------------------------------------------------------===//
1363 // GFX7, GFX10, GFX11, GFX12.
1364 //===----------------------------------------------------------------------===//
1366 let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
1367   multiclass DS_Real_gfx7<bits<8> op> {
1368     def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1369       !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
1370   }
1371 } // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
1373 multiclass DS_Real_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
1374   DS_Real_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
1376 multiclass DS_Real_gfx7_gfx10_gfx11<bits<8> op> :
1377   DS_Real_gfx7<op>, DS_Real_gfx10_gfx11<op>;
1379 multiclass DS_Real_gfx7_gfx10<bits<8> op> :
1380   DS_Real_gfx7<op>, DS_Real_gfx10<op>;
1382 // FIXME-GFX7: Add tests when upstreaming this part.
1383 defm DS_GWS_SEMA_RELEASE_ALL : DS_Real_gfx7_gfx10_gfx11<0x018>;
1384 defm DS_WRAP_RTN_B32         : DS_Real_gfx7_gfx10_gfx11<0x034>;
1385 defm DS_CONDXCHG32_RTN_B64   : DS_Real_gfx7_gfx10_gfx11_gfx12<0x07e>;
1386 defm DS_WRITE_B96            : DS_Real_gfx7_gfx10<0x0de>;
1387 defm DS_WRITE_B128           : DS_Real_gfx7_gfx10<0x0df>;
1388 defm DS_READ_B96             : DS_Real_gfx7_gfx10<0x0fe>;
1389 defm DS_READ_B128            : DS_Real_gfx7_gfx10<0x0ff>;
1391 //===----------------------------------------------------------------------===//
1392 // GFX6, GFX7, GFX10, GFX11.
1393 //===----------------------------------------------------------------------===//
1395 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
1396   multiclass DS_Real_gfx6_gfx7<bits<8> op> {
1397     def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1398       !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
1399   }
1400 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
1402 multiclass DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
1403   DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
1405 multiclass DS_Real_gfx6_gfx7_gfx10_gfx11<bits<8> op> :
1406   DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11<op>;
1408 multiclass DS_Real_gfx6_gfx7_gfx10<bits<8> op> :
1409   DS_Real_gfx6_gfx7<op>, DS_Real_gfx10<op>;
1411 defm DS_ADD_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x000>;
1412 defm DS_SUB_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x001>;
1413 defm DS_RSUB_U32            : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>;
1414 defm DS_INC_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x003>;
1415 defm DS_DEC_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x004>;
1416 defm DS_MIN_I32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x005>;
1417 defm DS_MAX_I32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x006>;
1418 defm DS_MIN_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x007>;
1419 defm DS_MAX_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x008>;
1420 defm DS_AND_B32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x009>;
1421 defm DS_OR_B32              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00a>;
1422 defm DS_XOR_B32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00b>;
1423 defm DS_MSKOR_B32           : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00c>;
1425 defm DS_WRITE_B32           : DS_Real_gfx6_gfx7_gfx10<0x00d>;
1426 defm DS_WRITE2_B32          : DS_Real_gfx6_gfx7_gfx10<0x00e>;
1427 defm DS_WRITE2ST64_B32      : DS_Real_gfx6_gfx7_gfx10<0x00f>;
1428 defm DS_CMPST_B32           : DS_Real_gfx6_gfx7_gfx10<0x010>;
1429 defm DS_CMPST_F32           : DS_Real_gfx6_gfx7_gfx10<0x011>;
1431 defm DS_MIN_F32             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x012>;
1432 defm DS_MAX_F32             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x013>;
1433 defm DS_NOP                 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x014>;
1434 defm DS_GWS_INIT            : DS_Real_gfx6_gfx7_gfx10_gfx11<0x019>;
1435 defm DS_GWS_SEMA_V          : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01a>;
1436 defm DS_GWS_SEMA_BR         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01b>;
1437 defm DS_GWS_SEMA_P          : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01c>;
1438 defm DS_GWS_BARRIER         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01d>;
1440 defm DS_WRITE_B8            : DS_Real_gfx6_gfx7_gfx10<0x01e>;
1441 defm DS_WRITE_B16           : DS_Real_gfx6_gfx7_gfx10<0x01f>;
1443 defm DS_ADD_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x020>;
1444 defm DS_SUB_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x021>;
1445 defm DS_RSUB_RTN_U32        : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x022>;
1446 defm DS_INC_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x023>;
1447 defm DS_DEC_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x024>;
1448 defm DS_MIN_RTN_I32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x025>;
1449 defm DS_MAX_RTN_I32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x026>;
1450 defm DS_MIN_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x027>;
1451 defm DS_MAX_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x028>;
1452 defm DS_AND_RTN_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x029>;
1453 defm DS_OR_RTN_B32          : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02a>;
1454 defm DS_XOR_RTN_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02b>;
1455 defm DS_MSKOR_RTN_B32       : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02c>;
1457 defm DS_WRXCHG_RTN_B32      : DS_Real_gfx6_gfx7_gfx10<0x02d>;
1458 defm DS_WRXCHG2_RTN_B32     : DS_Real_gfx6_gfx7_gfx10<0x02e>;
1459 defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02f>;
1460 defm DS_CMPST_RTN_B32       : DS_Real_gfx6_gfx7_gfx10<0x030>;
1461 defm DS_CMPST_RTN_F32       : DS_Real_gfx6_gfx7_gfx10<0x031>;
1463 defm DS_MIN_RTN_F32         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x032>;
1464 defm DS_MAX_RTN_F32         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x033>;
1465 defm DS_SWIZZLE_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x035>;
1467 defm DS_READ_B32            : DS_Real_gfx6_gfx7_gfx10<0x036>;
1468 defm DS_READ2_B32           : DS_Real_gfx6_gfx7_gfx10<0x037>;
1469 defm DS_READ2ST64_B32       : DS_Real_gfx6_gfx7_gfx10<0x038>;
1470 defm DS_READ_I8             : DS_Real_gfx6_gfx7_gfx10<0x039>;
1471 defm DS_READ_U8             : DS_Real_gfx6_gfx7_gfx10<0x03a>;
1472 defm DS_READ_I16            : DS_Real_gfx6_gfx7_gfx10<0x03b>;
1473 defm DS_READ_U16            : DS_Real_gfx6_gfx7_gfx10<0x03c>;
1475 defm DS_CONSUME             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03d>;
1476 defm DS_APPEND              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03e>;
1477 defm DS_ORDERED_COUNT       : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03f>;
1478 defm DS_ADD_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x040>;
1479 defm DS_SUB_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x041>;
1480 defm DS_RSUB_U64            : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x042>;
1481 defm DS_INC_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x043>;
1482 defm DS_DEC_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x044>;
1483 defm DS_MIN_I64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x045>;
1484 defm DS_MAX_I64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x046>;
1485 defm DS_MIN_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x047>;
1486 defm DS_MAX_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x048>;
1487 defm DS_AND_B64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x049>;
1488 defm DS_OR_B64              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04a>;
1489 defm DS_XOR_B64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04b>;
1490 defm DS_MSKOR_B64           : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04c>;
1492 defm DS_WRITE_B64           : DS_Real_gfx6_gfx7_gfx10<0x04d>;
1493 defm DS_WRITE2_B64          : DS_Real_gfx6_gfx7_gfx10<0x04e>;
1494 defm DS_WRITE2ST64_B64      : DS_Real_gfx6_gfx7_gfx10<0x04f>;
1495 defm DS_CMPST_B64           : DS_Real_gfx6_gfx7_gfx10<0x050>;
1496 defm DS_CMPST_F64           : DS_Real_gfx6_gfx7_gfx10<0x051>;
1498 defm DS_MIN_F64             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x052>;
1499 defm DS_MAX_F64             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x053>;
1500 defm DS_ADD_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x060>;
1501 defm DS_SUB_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x061>;
1502 defm DS_RSUB_RTN_U64        : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x062>;
1503 defm DS_INC_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x063>;
1504 defm DS_DEC_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x064>;
1505 defm DS_MIN_RTN_I64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x065>;
1506 defm DS_MAX_RTN_I64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x066>;
1507 defm DS_MIN_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x067>;
1508 defm DS_MAX_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x068>;
1509 defm DS_AND_RTN_B64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x069>;
1510 defm DS_OR_RTN_B64          : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06a>;
1511 defm DS_XOR_RTN_B64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06b>;
1512 defm DS_MSKOR_RTN_B64       : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06c>;
1514 defm DS_WRXCHG_RTN_B64      : DS_Real_gfx6_gfx7_gfx10<0x06d>;
1515 defm DS_WRXCHG2_RTN_B64     : DS_Real_gfx6_gfx7_gfx10<0x06e>;
1516 defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06f>;
1517 defm DS_CMPST_RTN_B64       : DS_Real_gfx6_gfx7_gfx10<0x070>;
1518 defm DS_CMPST_RTN_F64       : DS_Real_gfx6_gfx7_gfx10<0x071>;
1520 defm DS_MIN_RTN_F64         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x072>;
1521 defm DS_MAX_RTN_F64         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x073>;
1523 defm DS_READ_B64            : DS_Real_gfx6_gfx7_gfx10<0x076>;
1524 defm DS_READ2_B64           : DS_Real_gfx6_gfx7_gfx10<0x077>;
1525 defm DS_READ2ST64_B64       : DS_Real_gfx6_gfx7_gfx10<0x078>;
1526 defm DS_ADD_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x080>;
1527 defm DS_SUB_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x081>;
1528 defm DS_RSUB_SRC2_U32       : DS_Real_gfx6_gfx7_gfx10<0x082>;
1529 defm DS_INC_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x083>;
1530 defm DS_DEC_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x084>;
1531 defm DS_MIN_SRC2_I32        : DS_Real_gfx6_gfx7_gfx10<0x085>;
1532 defm DS_MAX_SRC2_I32        : DS_Real_gfx6_gfx7_gfx10<0x086>;
1533 defm DS_MIN_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x087>;
1534 defm DS_MAX_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x088>;
1535 defm DS_AND_SRC2_B32        : DS_Real_gfx6_gfx7_gfx10<0x089>;
1536 defm DS_OR_SRC2_B32         : DS_Real_gfx6_gfx7_gfx10<0x08a>;
1537 defm DS_XOR_SRC2_B32        : DS_Real_gfx6_gfx7_gfx10<0x08b>;
1538 defm DS_WRITE_SRC2_B32      : DS_Real_gfx6_gfx7_gfx10<0x08d>;
1539 defm DS_MIN_SRC2_F32        : DS_Real_gfx6_gfx7_gfx10<0x092>;
1540 defm DS_MAX_SRC2_F32        : DS_Real_gfx6_gfx7_gfx10<0x093>;
1541 defm DS_ADD_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c0>;
1542 defm DS_SUB_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c1>;
1543 defm DS_RSUB_SRC2_U64       : DS_Real_gfx6_gfx7_gfx10<0x0c2>;
1544 defm DS_INC_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c3>;
1545 defm DS_DEC_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c4>;
1546 defm DS_MIN_SRC2_I64        : DS_Real_gfx6_gfx7_gfx10<0x0c5>;
1547 defm DS_MAX_SRC2_I64        : DS_Real_gfx6_gfx7_gfx10<0x0c6>;
1548 defm DS_MIN_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c7>;
1549 defm DS_MAX_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c8>;
1550 defm DS_AND_SRC2_B64        : DS_Real_gfx6_gfx7_gfx10<0x0c9>;
1551 defm DS_OR_SRC2_B64         : DS_Real_gfx6_gfx7_gfx10<0x0ca>;
1552 defm DS_XOR_SRC2_B64        : DS_Real_gfx6_gfx7_gfx10<0x0cb>;
1553 defm DS_WRITE_SRC2_B64      : DS_Real_gfx6_gfx7_gfx10<0x0cd>;
1554 defm DS_MIN_SRC2_F64        : DS_Real_gfx6_gfx7_gfx10<0x0d2>;
1555 defm DS_MAX_SRC2_F64        : DS_Real_gfx6_gfx7_gfx10<0x0d3>;
1557 //===----------------------------------------------------------------------===//
1558 // GFX8, GFX9 (VI).
1559 //===----------------------------------------------------------------------===//
1561 class DS_Real_vi <bits<8> op, DS_Pseudo ps> :
1562   DS_Real <ps>,
1563   SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1564   let AssemblerPredicate = isGFX8GFX9;
1565   let DecoderNamespace = "GFX8";
1567   // encoding
1568   let Inst{7-0}   = !if(ps.has_offset0, offset0, 0);
1569   let Inst{15-8}  = !if(ps.has_offset1, offset1, 0);
1570   let Inst{16}    = !if(ps.has_gds, gds, ps.gdsValue);
1571   let Inst{24-17} = op;
1572   let Inst{25}    = acc;
1573   let Inst{31-26} = 0x36; // ds prefix
1574   let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
1575   let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
1576   let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
1577   let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
1580 def DS_ADD_U32_vi         : DS_Real_vi<0x0,  DS_ADD_U32>;
1581 def DS_SUB_U32_vi         : DS_Real_vi<0x1,  DS_SUB_U32>;
1582 def DS_RSUB_U32_vi        : DS_Real_vi<0x2,  DS_RSUB_U32>;
1583 def DS_INC_U32_vi         : DS_Real_vi<0x3,  DS_INC_U32>;
1584 def DS_DEC_U32_vi         : DS_Real_vi<0x4,  DS_DEC_U32>;
1585 def DS_MIN_I32_vi         : DS_Real_vi<0x5,  DS_MIN_I32>;
1586 def DS_MAX_I32_vi         : DS_Real_vi<0x6,  DS_MAX_I32>;
1587 def DS_MIN_U32_vi         : DS_Real_vi<0x7,  DS_MIN_U32>;
1588 def DS_MAX_U32_vi         : DS_Real_vi<0x8,  DS_MAX_U32>;
1589 def DS_AND_B32_vi         : DS_Real_vi<0x9,  DS_AND_B32>;
1590 def DS_OR_B32_vi          : DS_Real_vi<0xa,  DS_OR_B32>;
1591 def DS_XOR_B32_vi         : DS_Real_vi<0xb,  DS_XOR_B32>;
1592 def DS_MSKOR_B32_vi       : DS_Real_vi<0xc,  DS_MSKOR_B32>;
1593 def DS_WRITE_B32_vi       : DS_Real_vi<0xd,  DS_WRITE_B32>;
1594 def DS_WRITE2_B32_vi      : DS_Real_vi<0xe,  DS_WRITE2_B32>;
1595 def DS_WRITE2ST64_B32_vi  : DS_Real_vi<0xf,  DS_WRITE2ST64_B32>;
1596 def DS_CMPST_B32_vi       : DS_Real_vi<0x10, DS_CMPST_B32>;
1597 def DS_CMPST_F32_vi       : DS_Real_vi<0x11, DS_CMPST_F32>;
1598 def DS_MIN_F32_vi         : DS_Real_vi<0x12, DS_MIN_F32>;
1599 def DS_MAX_F32_vi         : DS_Real_vi<0x13, DS_MAX_F32>;
1600 def DS_NOP_vi             : DS_Real_vi<0x14, DS_NOP>;
1601 def DS_ADD_F32_vi         : DS_Real_vi<0x15, DS_ADD_F32>;
1602 def DS_GWS_INIT_vi        : DS_Real_vi<0x99, DS_GWS_INIT>;
1603 def DS_GWS_SEMA_V_vi      : DS_Real_vi<0x9a, DS_GWS_SEMA_V>;
1604 def DS_GWS_SEMA_BR_vi     : DS_Real_vi<0x9b, DS_GWS_SEMA_BR>;
1605 def DS_GWS_SEMA_P_vi      : DS_Real_vi<0x9c, DS_GWS_SEMA_P>;
1606 def DS_GWS_BARRIER_vi     : DS_Real_vi<0x9d, DS_GWS_BARRIER>;
1607 def DS_WRITE_ADDTID_B32_vi : DS_Real_vi<0x1d, DS_WRITE_ADDTID_B32>;
1608 def DS_WRITE_B8_vi        : DS_Real_vi<0x1e, DS_WRITE_B8>;
1609 def DS_WRITE_B16_vi       : DS_Real_vi<0x1f, DS_WRITE_B16>;
1610 def DS_ADD_RTN_U32_vi     : DS_Real_vi<0x20, DS_ADD_RTN_U32>;
1611 def DS_SUB_RTN_U32_vi     : DS_Real_vi<0x21, DS_SUB_RTN_U32>;
1612 def DS_RSUB_RTN_U32_vi    : DS_Real_vi<0x22, DS_RSUB_RTN_U32>;
1613 def DS_INC_RTN_U32_vi     : DS_Real_vi<0x23, DS_INC_RTN_U32>;
1614 def DS_DEC_RTN_U32_vi     : DS_Real_vi<0x24, DS_DEC_RTN_U32>;
1615 def DS_MIN_RTN_I32_vi     : DS_Real_vi<0x25, DS_MIN_RTN_I32>;
1616 def DS_MAX_RTN_I32_vi     : DS_Real_vi<0x26, DS_MAX_RTN_I32>;
1617 def DS_MIN_RTN_U32_vi     : DS_Real_vi<0x27, DS_MIN_RTN_U32>;
1618 def DS_MAX_RTN_U32_vi     : DS_Real_vi<0x28, DS_MAX_RTN_U32>;
1619 def DS_AND_RTN_B32_vi     : DS_Real_vi<0x29, DS_AND_RTN_B32>;
1620 def DS_OR_RTN_B32_vi      : DS_Real_vi<0x2a, DS_OR_RTN_B32>;
1621 def DS_XOR_RTN_B32_vi     : DS_Real_vi<0x2b, DS_XOR_RTN_B32>;
1622 def DS_MSKOR_RTN_B32_vi   : DS_Real_vi<0x2c, DS_MSKOR_RTN_B32>;
1623 def DS_WRXCHG_RTN_B32_vi  : DS_Real_vi<0x2d, DS_WRXCHG_RTN_B32>;
1624 def DS_WRXCHG2_RTN_B32_vi : DS_Real_vi<0x2e, DS_WRXCHG2_RTN_B32>;
1625 def DS_WRXCHG2ST64_RTN_B32_vi : DS_Real_vi<0x2f, DS_WRXCHG2ST64_RTN_B32>;
1626 def DS_CMPST_RTN_B32_vi   : DS_Real_vi<0x30, DS_CMPST_RTN_B32>;
1627 def DS_CMPST_RTN_F32_vi   : DS_Real_vi<0x31, DS_CMPST_RTN_F32>;
1628 def DS_MIN_RTN_F32_vi     : DS_Real_vi<0x32, DS_MIN_RTN_F32>;
1629 def DS_MAX_RTN_F32_vi     : DS_Real_vi<0x33, DS_MAX_RTN_F32>;
1630 def DS_WRAP_RTN_B32_vi    : DS_Real_vi<0x34, DS_WRAP_RTN_B32>;
1631 def DS_ADD_RTN_F32_vi     : DS_Real_vi<0x35, DS_ADD_RTN_F32>;
1632 def DS_READ_B32_vi        : DS_Real_vi<0x36, DS_READ_B32>;
1633 def DS_READ2_B32_vi       : DS_Real_vi<0x37, DS_READ2_B32>;
1634 def DS_READ2ST64_B32_vi   : DS_Real_vi<0x38, DS_READ2ST64_B32>;
1635 def DS_READ_I8_vi         : DS_Real_vi<0x39, DS_READ_I8>;
1636 def DS_READ_U8_vi         : DS_Real_vi<0x3a, DS_READ_U8>;
1637 def DS_READ_I16_vi        : DS_Real_vi<0x3b, DS_READ_I16>;
1638 def DS_READ_U16_vi        : DS_Real_vi<0x3c, DS_READ_U16>;
1639 def DS_READ_ADDTID_B32_vi : DS_Real_vi<0xb6, DS_READ_ADDTID_B32>;
1640 def DS_CONSUME_vi         : DS_Real_vi<0xbd, DS_CONSUME>;
1641 def DS_APPEND_vi          : DS_Real_vi<0xbe, DS_APPEND>;
1642 def DS_ORDERED_COUNT_vi   : DS_Real_vi<0xbf, DS_ORDERED_COUNT>;
1643 def DS_SWIZZLE_B32_vi     : DS_Real_vi<0x3d, DS_SWIZZLE_B32>;
1644 def DS_PERMUTE_B32_vi     : DS_Real_vi<0x3e, DS_PERMUTE_B32>;
1645 def DS_BPERMUTE_B32_vi    : DS_Real_vi<0x3f, DS_BPERMUTE_B32>;
1647 def DS_ADD_U64_vi         : DS_Real_vi<0x40, DS_ADD_U64>;
1648 def DS_SUB_U64_vi         : DS_Real_vi<0x41, DS_SUB_U64>;
1649 def DS_RSUB_U64_vi        : DS_Real_vi<0x42, DS_RSUB_U64>;
1650 def DS_INC_U64_vi         : DS_Real_vi<0x43, DS_INC_U64>;
1651 def DS_DEC_U64_vi         : DS_Real_vi<0x44, DS_DEC_U64>;
1652 def DS_MIN_I64_vi         : DS_Real_vi<0x45, DS_MIN_I64>;
1653 def DS_MAX_I64_vi         : DS_Real_vi<0x46, DS_MAX_I64>;
1654 def DS_MIN_U64_vi         : DS_Real_vi<0x47, DS_MIN_U64>;
1655 def DS_MAX_U64_vi         : DS_Real_vi<0x48, DS_MAX_U64>;
1656 def DS_AND_B64_vi         : DS_Real_vi<0x49, DS_AND_B64>;
1657 def DS_OR_B64_vi          : DS_Real_vi<0x4a, DS_OR_B64>;
1658 def DS_XOR_B64_vi         : DS_Real_vi<0x4b, DS_XOR_B64>;
1659 def DS_MSKOR_B64_vi       : DS_Real_vi<0x4c, DS_MSKOR_B64>;
1660 def DS_WRITE_B64_vi       : DS_Real_vi<0x4d, DS_WRITE_B64>;
1661 def DS_WRITE2_B64_vi      : DS_Real_vi<0x4E, DS_WRITE2_B64>;
1662 def DS_WRITE2ST64_B64_vi  : DS_Real_vi<0x4f, DS_WRITE2ST64_B64>;
1663 def DS_CMPST_B64_vi       : DS_Real_vi<0x50, DS_CMPST_B64>;
1664 def DS_CMPST_F64_vi       : DS_Real_vi<0x51, DS_CMPST_F64>;
1665 def DS_MIN_F64_vi         : DS_Real_vi<0x52, DS_MIN_F64>;
1666 def DS_MAX_F64_vi         : DS_Real_vi<0x53, DS_MAX_F64>;
1668 def DS_WRITE_B8_D16_HI_vi  : DS_Real_vi<0x54, DS_WRITE_B8_D16_HI>;
1669 def DS_WRITE_B16_D16_HI_vi : DS_Real_vi<0x55, DS_WRITE_B16_D16_HI>;
1671 def DS_READ_U8_D16_vi     : DS_Real_vi<0x56, DS_READ_U8_D16>;
1672 def DS_READ_U8_D16_HI_vi  : DS_Real_vi<0x57, DS_READ_U8_D16_HI>;
1673 def DS_READ_I8_D16_vi     : DS_Real_vi<0x58, DS_READ_I8_D16>;
1674 def DS_READ_I8_D16_HI_vi  : DS_Real_vi<0x59, DS_READ_I8_D16_HI>;
1675 def DS_READ_U16_D16_vi    : DS_Real_vi<0x5a, DS_READ_U16_D16>;
1676 def DS_READ_U16_D16_HI_vi : DS_Real_vi<0x5b, DS_READ_U16_D16_HI>;
1678 def DS_ADD_RTN_U64_vi     : DS_Real_vi<0x60, DS_ADD_RTN_U64>;
1679 def DS_SUB_RTN_U64_vi     : DS_Real_vi<0x61, DS_SUB_RTN_U64>;
1680 def DS_RSUB_RTN_U64_vi    : DS_Real_vi<0x62, DS_RSUB_RTN_U64>;
1681 def DS_INC_RTN_U64_vi     : DS_Real_vi<0x63, DS_INC_RTN_U64>;
1682 def DS_DEC_RTN_U64_vi     : DS_Real_vi<0x64, DS_DEC_RTN_U64>;
1683 def DS_MIN_RTN_I64_vi     : DS_Real_vi<0x65, DS_MIN_RTN_I64>;
1684 def DS_MAX_RTN_I64_vi     : DS_Real_vi<0x66, DS_MAX_RTN_I64>;
1685 def DS_MIN_RTN_U64_vi     : DS_Real_vi<0x67, DS_MIN_RTN_U64>;
1686 def DS_MAX_RTN_U64_vi     : DS_Real_vi<0x68, DS_MAX_RTN_U64>;
1687 def DS_AND_RTN_B64_vi     : DS_Real_vi<0x69, DS_AND_RTN_B64>;
1688 def DS_OR_RTN_B64_vi      : DS_Real_vi<0x6a, DS_OR_RTN_B64>;
1689 def DS_XOR_RTN_B64_vi     : DS_Real_vi<0x6b, DS_XOR_RTN_B64>;
1690 def DS_MSKOR_RTN_B64_vi   : DS_Real_vi<0x6c, DS_MSKOR_RTN_B64>;
1691 def DS_WRXCHG_RTN_B64_vi  : DS_Real_vi<0x6d, DS_WRXCHG_RTN_B64>;
1692 def DS_WRXCHG2_RTN_B64_vi : DS_Real_vi<0x6e, DS_WRXCHG2_RTN_B64>;
1693 def DS_WRXCHG2ST64_RTN_B64_vi : DS_Real_vi<0x6f, DS_WRXCHG2ST64_RTN_B64>;
1694 def DS_CONDXCHG32_RTN_B64_vi   : DS_Real_vi<0x7e, DS_CONDXCHG32_RTN_B64>;
1695 def DS_GWS_SEMA_RELEASE_ALL_vi : DS_Real_vi<0x98, DS_GWS_SEMA_RELEASE_ALL>;
1696 def DS_CMPST_RTN_B64_vi   : DS_Real_vi<0x70, DS_CMPST_RTN_B64>;
1697 def DS_CMPST_RTN_F64_vi   : DS_Real_vi<0x71, DS_CMPST_RTN_F64>;
1698 def DS_MIN_RTN_F64_vi     : DS_Real_vi<0x72, DS_MIN_RTN_F64>;
1699 def DS_MAX_RTN_F64_vi     : DS_Real_vi<0x73, DS_MAX_RTN_F64>;
1701 def DS_READ_B64_vi        : DS_Real_vi<0x76, DS_READ_B64>;
1702 def DS_READ2_B64_vi       : DS_Real_vi<0x77, DS_READ2_B64>;
1703 def DS_READ2ST64_B64_vi   : DS_Real_vi<0x78, DS_READ2ST64_B64>;
1705 def DS_ADD_SRC2_U32_vi    : DS_Real_vi<0x80, DS_ADD_SRC2_U32>;
1706 def DS_SUB_SRC2_U32_vi    : DS_Real_vi<0x81, DS_SUB_SRC2_U32>;
1707 def DS_RSUB_SRC2_U32_vi   : DS_Real_vi<0x82, DS_RSUB_SRC2_U32>;
1708 def DS_INC_SRC2_U32_vi    : DS_Real_vi<0x83, DS_INC_SRC2_U32>;
1709 def DS_DEC_SRC2_U32_vi    : DS_Real_vi<0x84, DS_DEC_SRC2_U32>;
1710 def DS_MIN_SRC2_I32_vi    : DS_Real_vi<0x85, DS_MIN_SRC2_I32>;
1711 def DS_MAX_SRC2_I32_vi    : DS_Real_vi<0x86, DS_MAX_SRC2_I32>;
1712 def DS_MIN_SRC2_U32_vi    : DS_Real_vi<0x87, DS_MIN_SRC2_U32>;
1713 def DS_MAX_SRC2_U32_vi    : DS_Real_vi<0x88, DS_MAX_SRC2_U32>;
1714 def DS_AND_SRC2_B32_vi    : DS_Real_vi<0x89, DS_AND_SRC2_B32>;
1715 def DS_OR_SRC2_B32_vi     : DS_Real_vi<0x8a, DS_OR_SRC2_B32>;
1716 def DS_XOR_SRC2_B32_vi    : DS_Real_vi<0x8b, DS_XOR_SRC2_B32>;
1717 def DS_WRITE_SRC2_B32_vi  : DS_Real_vi<0x8d, DS_WRITE_SRC2_B32>;
1718 def DS_MIN_SRC2_F32_vi    : DS_Real_vi<0x92, DS_MIN_SRC2_F32>;
1719 def DS_MAX_SRC2_F32_vi    : DS_Real_vi<0x93, DS_MAX_SRC2_F32>;
1720 def DS_ADD_SRC2_F32_vi    : DS_Real_vi<0x95, DS_ADD_SRC2_F32>;
1721 def DS_ADD_SRC2_U64_vi    : DS_Real_vi<0xc0, DS_ADD_SRC2_U64>;
1722 def DS_SUB_SRC2_U64_vi    : DS_Real_vi<0xc1, DS_SUB_SRC2_U64>;
1723 def DS_RSUB_SRC2_U64_vi   : DS_Real_vi<0xc2, DS_RSUB_SRC2_U64>;
1724 def DS_INC_SRC2_U64_vi    : DS_Real_vi<0xc3, DS_INC_SRC2_U64>;
1725 def DS_DEC_SRC2_U64_vi    : DS_Real_vi<0xc4, DS_DEC_SRC2_U64>;
1726 def DS_MIN_SRC2_I64_vi    : DS_Real_vi<0xc5, DS_MIN_SRC2_I64>;
1727 def DS_MAX_SRC2_I64_vi    : DS_Real_vi<0xc6, DS_MAX_SRC2_I64>;
1728 def DS_MIN_SRC2_U64_vi    : DS_Real_vi<0xc7, DS_MIN_SRC2_U64>;
1729 def DS_MAX_SRC2_U64_vi    : DS_Real_vi<0xc8, DS_MAX_SRC2_U64>;
1730 def DS_AND_SRC2_B64_vi    : DS_Real_vi<0xc9, DS_AND_SRC2_B64>;
1731 def DS_OR_SRC2_B64_vi     : DS_Real_vi<0xca, DS_OR_SRC2_B64>;
1732 def DS_XOR_SRC2_B64_vi    : DS_Real_vi<0xcb, DS_XOR_SRC2_B64>;
1733 def DS_WRITE_SRC2_B64_vi  : DS_Real_vi<0xcd, DS_WRITE_SRC2_B64>;
1734 def DS_MIN_SRC2_F64_vi    : DS_Real_vi<0xd2, DS_MIN_SRC2_F64>;
1735 def DS_MAX_SRC2_F64_vi    : DS_Real_vi<0xd3, DS_MAX_SRC2_F64>;
1736 def DS_WRITE_B96_vi       : DS_Real_vi<0xde, DS_WRITE_B96>;
1737 def DS_WRITE_B128_vi      : DS_Real_vi<0xdf, DS_WRITE_B128>;
1738 def DS_READ_B96_vi        : DS_Real_vi<0xfe, DS_READ_B96>;
1739 def DS_READ_B128_vi       : DS_Real_vi<0xff, DS_READ_B128>;
1741 // GFX90A+.
1742 def DS_ADD_F64_vi     : DS_Real_vi<0x5c, DS_ADD_F64>;
1743 def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>;
1745 // GFX940+.
1746 def DS_PK_ADD_F16_vi     : DS_Real_vi<0x17, DS_PK_ADD_F16>;
1747 def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>;
1748 def DS_PK_ADD_BF16_vi     : DS_Real_vi<0x18, DS_PK_ADD_BF16>;
1749 def DS_PK_ADD_RTN_BF16_vi : DS_Real_vi<0xb8, DS_PK_ADD_RTN_BF16>;