1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
6 define float @raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
7 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
9 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10 ; GFX90A-NEXT: s_mov_b32 s11, s17
11 ; GFX90A-NEXT: s_mov_b32 s10, s16
12 ; GFX90A-NEXT: s_mov_b32 s9, s7
13 ; GFX90A-NEXT: s_mov_b32 s8, s6
14 ; GFX90A-NEXT: buffer_atomic_add_f32 v0, v1, s[8:11], s18 offen glc scc
15 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
16 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
18 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
20 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21 ; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s6 offen sc0 sc1
22 ; GFX940-NEXT: s_waitcnt vmcnt(0)
23 ; GFX940-NEXT: s_setpc_b64 s[30:31]
25 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
27 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
28 ; GFX12-NEXT: s_wait_expcnt 0x0
29 ; GFX12-NEXT: s_wait_samplecnt 0x0
30 ; GFX12-NEXT: s_wait_bvhcnt 0x0
31 ; GFX12-NEXT: s_wait_kmcnt 0x0
32 ; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s6 offen th:TH_ATOMIC_RETURN scope:SCOPE_SYS
33 ; GFX12-NEXT: s_wait_loadcnt 0x0
34 ; GFX12-NEXT: s_setpc_b64 s[30:31]
35 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 24)
39 define float @raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) #0 {
40 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
42 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43 ; GFX90A-NEXT: s_mov_b32 s11, s17
44 ; GFX90A-NEXT: s_mov_b32 s10, s16
45 ; GFX90A-NEXT: s_mov_b32 s9, s7
46 ; GFX90A-NEXT: s_mov_b32 s8, s6
47 ; GFX90A-NEXT: buffer_atomic_add_f32 v0, off, s[8:11], s18 glc
48 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
49 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
51 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
53 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54 ; GFX940-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s6 sc0
55 ; GFX940-NEXT: s_waitcnt vmcnt(0)
56 ; GFX940-NEXT: s_setpc_b64 s[30:31]
58 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
60 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
61 ; GFX12-NEXT: s_wait_expcnt 0x0
62 ; GFX12-NEXT: s_wait_samplecnt 0x0
63 ; GFX12-NEXT: s_wait_bvhcnt 0x0
64 ; GFX12-NEXT: s_wait_kmcnt 0x0
65 ; GFX12-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s6 th:TH_ATOMIC_RETURN
66 ; GFX12-NEXT: s_wait_loadcnt 0x0
67 ; GFX12-NEXT: s_setpc_b64 s[30:31]
68 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
72 define <2 x half> @raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
73 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
75 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GFX90A-NEXT: s_mov_b32 s11, s17
77 ; GFX90A-NEXT: s_mov_b32 s10, s16
78 ; GFX90A-NEXT: s_mov_b32 s9, s7
79 ; GFX90A-NEXT: s_mov_b32 s8, s6
80 ; GFX90A-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[8:11], s18 offen glc
81 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
82 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
84 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
86 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87 ; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s6 offen sc0
88 ; GFX940-NEXT: s_waitcnt vmcnt(0)
89 ; GFX940-NEXT: s_setpc_b64 s[30:31]
91 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
93 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
94 ; GFX12-NEXT: s_wait_expcnt 0x0
95 ; GFX12-NEXT: s_wait_samplecnt 0x0
96 ; GFX12-NEXT: s_wait_bvhcnt 0x0
97 ; GFX12-NEXT: s_wait_kmcnt 0x0
98 ; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s6 offen th:TH_ATOMIC_RETURN
99 ; GFX12-NEXT: s_wait_loadcnt 0x0
100 ; GFX12-NEXT: s_setpc_b64 s[30:31]
101 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
105 define <2 x half> @raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
106 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
108 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109 ; GFX90A-NEXT: s_mov_b32 s11, s17
110 ; GFX90A-NEXT: s_mov_b32 s10, s16
111 ; GFX90A-NEXT: s_mov_b32 s9, s7
112 ; GFX90A-NEXT: s_mov_b32 s8, s6
113 ; GFX90A-NEXT: buffer_atomic_pk_add_f16 v0, off, s[8:11], s18 offset:92 glc
114 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
115 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
117 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
119 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120 ; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s6 offset:92 sc0
121 ; GFX940-NEXT: s_waitcnt vmcnt(0)
122 ; GFX940-NEXT: s_setpc_b64 s[30:31]
124 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
126 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
127 ; GFX12-NEXT: s_wait_expcnt 0x0
128 ; GFX12-NEXT: s_wait_samplecnt 0x0
129 ; GFX12-NEXT: s_wait_bvhcnt 0x0
130 ; GFX12-NEXT: s_wait_kmcnt 0x0
131 ; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s6 offset:92 th:TH_ATOMIC_RETURN
132 ; GFX12-NEXT: s_wait_loadcnt 0x0
133 ; GFX12-NEXT: s_setpc_b64 s[30:31]
134 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 92, i32 %soffset, i32 0)
138 define float @raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
139 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
141 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; GFX90A-NEXT: s_mov_b32 s11, s17
143 ; GFX90A-NEXT: s_mov_b32 s10, s16
144 ; GFX90A-NEXT: s_mov_b32 s9, s7
145 ; GFX90A-NEXT: s_mov_b32 s8, s6
146 ; GFX90A-NEXT: buffer_atomic_add_f32 v0, v1, s[8:11], s18 offen glc slc
147 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
148 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
150 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
152 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153 ; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s6 offen sc0 nt
154 ; GFX940-NEXT: s_waitcnt vmcnt(0)
155 ; GFX940-NEXT: s_setpc_b64 s[30:31]
157 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
159 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
160 ; GFX12-NEXT: s_wait_expcnt 0x0
161 ; GFX12-NEXT: s_wait_samplecnt 0x0
162 ; GFX12-NEXT: s_wait_bvhcnt 0x0
163 ; GFX12-NEXT: s_wait_kmcnt 0x0
164 ; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
165 ; GFX12-NEXT: s_wait_loadcnt 0x0
166 ; GFX12-NEXT: s_setpc_b64 s[30:31]
167 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 2)
171 declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
172 declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
174 attributes #0 = { nounwind }