1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
7 define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
8 ; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
10 ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX908-NEXT: s_mov_b32 s11, s17
12 ; GFX908-NEXT: s_mov_b32 s10, s16
13 ; GFX908-NEXT: s_mov_b32 s9, s7
14 ; GFX908-NEXT: s_mov_b32 s8, s6
15 ; GFX908-NEXT: buffer_atomic_add_f32 v0, v1, s[8:11], s18 offen
16 ; GFX908-NEXT: s_waitcnt vmcnt(0)
17 ; GFX908-NEXT: s_setpc_b64 s[30:31]
19 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
21 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX90A-NEXT: s_mov_b32 s11, s17
23 ; GFX90A-NEXT: s_mov_b32 s10, s16
24 ; GFX90A-NEXT: s_mov_b32 s9, s7
25 ; GFX90A-NEXT: s_mov_b32 s8, s6
26 ; GFX90A-NEXT: buffer_atomic_add_f32 v0, v1, s[8:11], s18 offen scc
27 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
28 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
30 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
32 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s6 offen sc1
34 ; GFX940-NEXT: s_waitcnt vmcnt(0)
35 ; GFX940-NEXT: s_setpc_b64 s[30:31]
37 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
39 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
40 ; GFX12-NEXT: s_wait_expcnt 0x0
41 ; GFX12-NEXT: s_wait_samplecnt 0x0
42 ; GFX12-NEXT: s_wait_bvhcnt 0x0
43 ; GFX12-NEXT: s_wait_kmcnt 0x0
44 ; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s6 offen scope:SCOPE_SYS
45 ; GFX12-NEXT: s_setpc_b64 s[30:31]
46 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 24)
50 define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) #0 {
51 ; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
53 ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54 ; GFX908-NEXT: s_mov_b32 s11, s17
55 ; GFX908-NEXT: s_mov_b32 s10, s16
56 ; GFX908-NEXT: s_mov_b32 s9, s7
57 ; GFX908-NEXT: s_mov_b32 s8, s6
58 ; GFX908-NEXT: buffer_atomic_add_f32 v0, off, s[8:11], s18
59 ; GFX908-NEXT: s_waitcnt vmcnt(0)
60 ; GFX908-NEXT: s_setpc_b64 s[30:31]
62 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
64 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65 ; GFX90A-NEXT: s_mov_b32 s11, s17
66 ; GFX90A-NEXT: s_mov_b32 s10, s16
67 ; GFX90A-NEXT: s_mov_b32 s9, s7
68 ; GFX90A-NEXT: s_mov_b32 s8, s6
69 ; GFX90A-NEXT: buffer_atomic_add_f32 v0, off, s[8:11], s18
70 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
71 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
73 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
75 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GFX940-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s6
77 ; GFX940-NEXT: s_waitcnt vmcnt(0)
78 ; GFX940-NEXT: s_setpc_b64 s[30:31]
80 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
82 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
83 ; GFX12-NEXT: s_wait_expcnt 0x0
84 ; GFX12-NEXT: s_wait_samplecnt 0x0
85 ; GFX12-NEXT: s_wait_bvhcnt 0x0
86 ; GFX12-NEXT: s_wait_kmcnt 0x0
87 ; GFX12-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s6
88 ; GFX12-NEXT: s_setpc_b64 s[30:31]
89 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
93 define void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
94 ; GFX908-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
96 ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97 ; GFX908-NEXT: s_mov_b32 s11, s17
98 ; GFX908-NEXT: s_mov_b32 s10, s16
99 ; GFX908-NEXT: s_mov_b32 s9, s7
100 ; GFX908-NEXT: s_mov_b32 s8, s6
101 ; GFX908-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[8:11], s18 offen
102 ; GFX908-NEXT: s_waitcnt vmcnt(0)
103 ; GFX908-NEXT: s_setpc_b64 s[30:31]
105 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
107 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; GFX90A-NEXT: s_mov_b32 s11, s17
109 ; GFX90A-NEXT: s_mov_b32 s10, s16
110 ; GFX90A-NEXT: s_mov_b32 s9, s7
111 ; GFX90A-NEXT: s_mov_b32 s8, s6
112 ; GFX90A-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[8:11], s18 offen
113 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
114 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
116 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
118 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119 ; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s6 offen
120 ; GFX940-NEXT: s_waitcnt vmcnt(0)
121 ; GFX940-NEXT: s_setpc_b64 s[30:31]
123 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
125 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
126 ; GFX12-NEXT: s_wait_expcnt 0x0
127 ; GFX12-NEXT: s_wait_samplecnt 0x0
128 ; GFX12-NEXT: s_wait_bvhcnt 0x0
129 ; GFX12-NEXT: s_wait_kmcnt 0x0
130 ; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s6 offen
131 ; GFX12-NEXT: s_setpc_b64 s[30:31]
132 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
136 define void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
137 ; GFX908-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
139 ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140 ; GFX908-NEXT: s_mov_b32 s11, s17
141 ; GFX908-NEXT: s_mov_b32 s10, s16
142 ; GFX908-NEXT: s_mov_b32 s9, s7
143 ; GFX908-NEXT: s_mov_b32 s8, s6
144 ; GFX908-NEXT: buffer_atomic_pk_add_f16 v0, off, s[8:11], s18 offset:92
145 ; GFX908-NEXT: s_waitcnt vmcnt(0)
146 ; GFX908-NEXT: s_setpc_b64 s[30:31]
148 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
150 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151 ; GFX90A-NEXT: s_mov_b32 s11, s17
152 ; GFX90A-NEXT: s_mov_b32 s10, s16
153 ; GFX90A-NEXT: s_mov_b32 s9, s7
154 ; GFX90A-NEXT: s_mov_b32 s8, s6
155 ; GFX90A-NEXT: buffer_atomic_pk_add_f16 v0, off, s[8:11], s18 offset:92
156 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
157 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
159 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
161 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162 ; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s6 offset:92
163 ; GFX940-NEXT: s_waitcnt vmcnt(0)
164 ; GFX940-NEXT: s_setpc_b64 s[30:31]
166 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
168 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
169 ; GFX12-NEXT: s_wait_expcnt 0x0
170 ; GFX12-NEXT: s_wait_samplecnt 0x0
171 ; GFX12-NEXT: s_wait_bvhcnt 0x0
172 ; GFX12-NEXT: s_wait_kmcnt 0x0
173 ; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s6 offset:92
174 ; GFX12-NEXT: s_setpc_b64 s[30:31]
175 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 92, i32 %soffset, i32 0)
179 define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
180 ; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
182 ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183 ; GFX908-NEXT: s_mov_b32 s11, s17
184 ; GFX908-NEXT: s_mov_b32 s10, s16
185 ; GFX908-NEXT: s_mov_b32 s9, s7
186 ; GFX908-NEXT: s_mov_b32 s8, s6
187 ; GFX908-NEXT: buffer_atomic_add_f32 v0, v1, s[8:11], s18 offen slc
188 ; GFX908-NEXT: s_waitcnt vmcnt(0)
189 ; GFX908-NEXT: s_setpc_b64 s[30:31]
191 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
193 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194 ; GFX90A-NEXT: s_mov_b32 s11, s17
195 ; GFX90A-NEXT: s_mov_b32 s10, s16
196 ; GFX90A-NEXT: s_mov_b32 s9, s7
197 ; GFX90A-NEXT: s_mov_b32 s8, s6
198 ; GFX90A-NEXT: buffer_atomic_add_f32 v0, v1, s[8:11], s18 offen slc
199 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
200 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
202 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
204 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205 ; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s6 offen nt
206 ; GFX940-NEXT: s_waitcnt vmcnt(0)
207 ; GFX940-NEXT: s_setpc_b64 s[30:31]
209 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
211 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
212 ; GFX12-NEXT: s_wait_expcnt 0x0
213 ; GFX12-NEXT: s_wait_samplecnt 0x0
214 ; GFX12-NEXT: s_wait_bvhcnt 0x0
215 ; GFX12-NEXT: s_wait_kmcnt 0x0
216 ; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s6 offen th:TH_ATOMIC_NT
217 ; GFX12-NEXT: s_setpc_b64 s[30:31]
218 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 2)
222 declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
223 declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
225 attributes #0 = { nounwind }