1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
4 ; Not supported in gfx8 or gfx9
5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
7 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
9 define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
10 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
12 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13 ; GFX6-NEXT: s_mov_b32 s11, s17
14 ; GFX6-NEXT: s_mov_b32 s10, s16
15 ; GFX6-NEXT: s_mov_b32 s9, s7
16 ; GFX6-NEXT: s_mov_b32 s8, s6
17 ; GFX6-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen glc
18 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
19 ; GFX6-NEXT: s_setpc_b64 s[30:31]
21 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
23 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX7-NEXT: s_mov_b32 s11, s17
25 ; GFX7-NEXT: s_mov_b32 s10, s16
26 ; GFX7-NEXT: s_mov_b32 s9, s7
27 ; GFX7-NEXT: s_mov_b32 s8, s6
28 ; GFX7-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen glc
29 ; GFX7-NEXT: s_waitcnt vmcnt(0)
30 ; GFX7-NEXT: s_setpc_b64 s[30:31]
32 ; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
34 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; GFX10-NEXT: s_mov_b32 s11, s17
36 ; GFX10-NEXT: s_mov_b32 s10, s16
37 ; GFX10-NEXT: s_mov_b32 s9, s7
38 ; GFX10-NEXT: s_mov_b32 s8, s6
39 ; GFX10-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen glc
40 ; GFX10-NEXT: s_waitcnt vmcnt(0)
41 ; GFX10-NEXT: s_setpc_b64 s[30:31]
43 ; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
45 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46 ; GFX11-NEXT: buffer_atomic_min_f32 v0, v[1:2], s[0:3], s6 idxen offen glc
47 ; GFX11-NEXT: s_waitcnt vmcnt(0)
48 ; GFX11-NEXT: s_setpc_b64 s[30:31]
50 ; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
52 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
53 ; GFX12-NEXT: s_wait_expcnt 0x0
54 ; GFX12-NEXT: s_wait_samplecnt 0x0
55 ; GFX12-NEXT: s_wait_bvhcnt 0x0
56 ; GFX12-NEXT: s_wait_kmcnt 0x0
57 ; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s6 idxen offen th:TH_ATOMIC_RETURN
58 ; GFX12-NEXT: s_wait_loadcnt 0x0
59 ; GFX12-NEXT: s_setpc_b64 s[30:31]
60 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
64 define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
65 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
67 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68 ; GFX6-NEXT: s_mov_b32 s11, s17
69 ; GFX6-NEXT: s_mov_b32 s10, s16
70 ; GFX6-NEXT: s_mov_b32 s9, s7
71 ; GFX6-NEXT: s_mov_b32 s8, s6
72 ; GFX6-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen offset:256 glc
73 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
74 ; GFX6-NEXT: s_setpc_b64 s[30:31]
76 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
78 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; GFX7-NEXT: s_mov_b32 s11, s17
80 ; GFX7-NEXT: s_mov_b32 s10, s16
81 ; GFX7-NEXT: s_mov_b32 s9, s7
82 ; GFX7-NEXT: s_mov_b32 s8, s6
83 ; GFX7-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen offset:256 glc
84 ; GFX7-NEXT: s_waitcnt vmcnt(0)
85 ; GFX7-NEXT: s_setpc_b64 s[30:31]
87 ; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
89 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90 ; GFX10-NEXT: s_mov_b32 s11, s17
91 ; GFX10-NEXT: s_mov_b32 s10, s16
92 ; GFX10-NEXT: s_mov_b32 s9, s7
93 ; GFX10-NEXT: s_mov_b32 s8, s6
94 ; GFX10-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen offset:256 glc
95 ; GFX10-NEXT: s_waitcnt vmcnt(0)
96 ; GFX10-NEXT: s_setpc_b64 s[30:31]
98 ; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
100 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101 ; GFX11-NEXT: buffer_atomic_min_f32 v0, v[1:2], s[0:3], s6 idxen offen offset:256 glc
102 ; GFX11-NEXT: s_waitcnt vmcnt(0)
103 ; GFX11-NEXT: s_setpc_b64 s[30:31]
105 ; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
107 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
108 ; GFX12-NEXT: s_wait_expcnt 0x0
109 ; GFX12-NEXT: s_wait_samplecnt 0x0
110 ; GFX12-NEXT: s_wait_bvhcnt 0x0
111 ; GFX12-NEXT: s_wait_kmcnt 0x0
112 ; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s6 idxen offen offset:256 th:TH_ATOMIC_RETURN
113 ; GFX12-NEXT: s_wait_loadcnt 0x0
114 ; GFX12-NEXT: s_setpc_b64 s[30:31]
115 %voffset.add = add i32 %voffset, 256
116 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
120 define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
121 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
123 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124 ; GFX6-NEXT: s_mov_b32 s11, s17
125 ; GFX6-NEXT: s_mov_b32 s10, s16
126 ; GFX6-NEXT: s_mov_b32 s9, s7
127 ; GFX6-NEXT: s_mov_b32 s8, s6
128 ; GFX6-NEXT: buffer_atomic_fmin v0, v1, s[8:11], s18 idxen glc
129 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
130 ; GFX6-NEXT: s_setpc_b64 s[30:31]
132 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
134 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
135 ; GFX7-NEXT: s_mov_b32 s11, s17
136 ; GFX7-NEXT: s_mov_b32 s10, s16
137 ; GFX7-NEXT: s_mov_b32 s9, s7
138 ; GFX7-NEXT: s_mov_b32 s8, s6
139 ; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[8:11], s18 idxen glc
140 ; GFX7-NEXT: s_waitcnt vmcnt(0)
141 ; GFX7-NEXT: s_setpc_b64 s[30:31]
143 ; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
145 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146 ; GFX10-NEXT: s_mov_b32 s11, s17
147 ; GFX10-NEXT: s_mov_b32 s10, s16
148 ; GFX10-NEXT: s_mov_b32 s9, s7
149 ; GFX10-NEXT: s_mov_b32 s8, s6
150 ; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[8:11], s18 idxen glc
151 ; GFX10-NEXT: s_waitcnt vmcnt(0)
152 ; GFX10-NEXT: s_setpc_b64 s[30:31]
154 ; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
156 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX11-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], s6 idxen glc
158 ; GFX11-NEXT: s_waitcnt vmcnt(0)
159 ; GFX11-NEXT: s_setpc_b64 s[30:31]
161 ; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
163 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
164 ; GFX12-NEXT: s_wait_expcnt 0x0
165 ; GFX12-NEXT: s_wait_samplecnt 0x0
166 ; GFX12-NEXT: s_wait_bvhcnt 0x0
167 ; GFX12-NEXT: s_wait_kmcnt 0x0
168 ; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], s6 idxen th:TH_ATOMIC_RETURN
169 ; GFX12-NEXT: s_wait_loadcnt 0x0
170 ; GFX12-NEXT: s_setpc_b64 s[30:31]
171 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
175 define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
176 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
178 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179 ; GFX6-NEXT: s_mov_b32 s11, s17
180 ; GFX6-NEXT: s_mov_b32 s10, s16
181 ; GFX6-NEXT: s_mov_b32 s9, s7
182 ; GFX6-NEXT: s_mov_b32 s8, s6
183 ; GFX6-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen glc slc
184 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
185 ; GFX6-NEXT: s_setpc_b64 s[30:31]
187 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
189 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190 ; GFX7-NEXT: s_mov_b32 s11, s17
191 ; GFX7-NEXT: s_mov_b32 s10, s16
192 ; GFX7-NEXT: s_mov_b32 s9, s7
193 ; GFX7-NEXT: s_mov_b32 s8, s6
194 ; GFX7-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen glc slc
195 ; GFX7-NEXT: s_waitcnt vmcnt(0)
196 ; GFX7-NEXT: s_setpc_b64 s[30:31]
198 ; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
200 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201 ; GFX10-NEXT: s_mov_b32 s11, s17
202 ; GFX10-NEXT: s_mov_b32 s10, s16
203 ; GFX10-NEXT: s_mov_b32 s9, s7
204 ; GFX10-NEXT: s_mov_b32 s8, s6
205 ; GFX10-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen glc slc
206 ; GFX10-NEXT: s_waitcnt vmcnt(0)
207 ; GFX10-NEXT: s_setpc_b64 s[30:31]
209 ; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
211 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; GFX11-NEXT: buffer_atomic_min_f32 v0, v[1:2], s[0:3], s6 idxen offen glc slc
213 ; GFX11-NEXT: s_waitcnt vmcnt(0)
214 ; GFX11-NEXT: s_setpc_b64 s[30:31]
216 ; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
218 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
219 ; GFX12-NEXT: s_wait_expcnt 0x0
220 ; GFX12-NEXT: s_wait_samplecnt 0x0
221 ; GFX12-NEXT: s_wait_bvhcnt 0x0
222 ; GFX12-NEXT: s_wait_kmcnt 0x0
223 ; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s6 idxen offen th:TH_ATOMIC_NT_RETURN
224 ; GFX12-NEXT: s_wait_loadcnt 0x0
225 ; GFX12-NEXT: s_setpc_b64 s[30:31]
226 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
230 define void @struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
231 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
233 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234 ; GFX6-NEXT: s_mov_b32 s11, s17
235 ; GFX6-NEXT: s_mov_b32 s10, s16
236 ; GFX6-NEXT: s_mov_b32 s9, s7
237 ; GFX6-NEXT: s_mov_b32 s8, s6
238 ; GFX6-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen
239 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
240 ; GFX6-NEXT: s_setpc_b64 s[30:31]
242 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
244 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245 ; GFX7-NEXT: s_mov_b32 s11, s17
246 ; GFX7-NEXT: s_mov_b32 s10, s16
247 ; GFX7-NEXT: s_mov_b32 s9, s7
248 ; GFX7-NEXT: s_mov_b32 s8, s6
249 ; GFX7-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen
250 ; GFX7-NEXT: s_waitcnt vmcnt(0)
251 ; GFX7-NEXT: s_setpc_b64 s[30:31]
253 ; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
255 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256 ; GFX10-NEXT: s_mov_b32 s11, s17
257 ; GFX10-NEXT: s_mov_b32 s10, s16
258 ; GFX10-NEXT: s_mov_b32 s9, s7
259 ; GFX10-NEXT: s_mov_b32 s8, s6
260 ; GFX10-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen
261 ; GFX10-NEXT: s_setpc_b64 s[30:31]
263 ; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
265 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GFX11-NEXT: buffer_atomic_min_f32 v0, v[1:2], s[0:3], s6 idxen offen
267 ; GFX11-NEXT: s_setpc_b64 s[30:31]
269 ; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
271 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
272 ; GFX12-NEXT: s_wait_expcnt 0x0
273 ; GFX12-NEXT: s_wait_samplecnt 0x0
274 ; GFX12-NEXT: s_wait_bvhcnt 0x0
275 ; GFX12-NEXT: s_wait_kmcnt 0x0
276 ; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s6 idxen offen
277 ; GFX12-NEXT: s_setpc_b64 s[30:31]
278 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
282 define void @struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
283 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
285 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
286 ; GFX6-NEXT: s_mov_b32 s11, s17
287 ; GFX6-NEXT: s_mov_b32 s10, s16
288 ; GFX6-NEXT: s_mov_b32 s9, s7
289 ; GFX6-NEXT: s_mov_b32 s8, s6
290 ; GFX6-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen offset:256
291 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
292 ; GFX6-NEXT: s_setpc_b64 s[30:31]
294 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
296 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297 ; GFX7-NEXT: s_mov_b32 s11, s17
298 ; GFX7-NEXT: s_mov_b32 s10, s16
299 ; GFX7-NEXT: s_mov_b32 s9, s7
300 ; GFX7-NEXT: s_mov_b32 s8, s6
301 ; GFX7-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen offset:256
302 ; GFX7-NEXT: s_waitcnt vmcnt(0)
303 ; GFX7-NEXT: s_setpc_b64 s[30:31]
305 ; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
307 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308 ; GFX10-NEXT: s_mov_b32 s11, s17
309 ; GFX10-NEXT: s_mov_b32 s10, s16
310 ; GFX10-NEXT: s_mov_b32 s9, s7
311 ; GFX10-NEXT: s_mov_b32 s8, s6
312 ; GFX10-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen offset:256
313 ; GFX10-NEXT: s_setpc_b64 s[30:31]
315 ; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
317 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318 ; GFX11-NEXT: buffer_atomic_min_f32 v0, v[1:2], s[0:3], s6 idxen offen offset:256
319 ; GFX11-NEXT: s_setpc_b64 s[30:31]
321 ; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
323 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
324 ; GFX12-NEXT: s_wait_expcnt 0x0
325 ; GFX12-NEXT: s_wait_samplecnt 0x0
326 ; GFX12-NEXT: s_wait_bvhcnt 0x0
327 ; GFX12-NEXT: s_wait_kmcnt 0x0
328 ; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s6 idxen offen offset:256
329 ; GFX12-NEXT: s_setpc_b64 s[30:31]
330 %voffset.add = add i32 %voffset, 256
331 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
335 ; Natural mapping, no voffset
336 define void @struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
337 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
339 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
340 ; GFX6-NEXT: s_mov_b32 s11, s17
341 ; GFX6-NEXT: s_mov_b32 s10, s16
342 ; GFX6-NEXT: s_mov_b32 s9, s7
343 ; GFX6-NEXT: s_mov_b32 s8, s6
344 ; GFX6-NEXT: buffer_atomic_fmin v0, v1, s[8:11], s18 idxen
345 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
346 ; GFX6-NEXT: s_setpc_b64 s[30:31]
348 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
350 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351 ; GFX7-NEXT: s_mov_b32 s11, s17
352 ; GFX7-NEXT: s_mov_b32 s10, s16
353 ; GFX7-NEXT: s_mov_b32 s9, s7
354 ; GFX7-NEXT: s_mov_b32 s8, s6
355 ; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[8:11], s18 idxen
356 ; GFX7-NEXT: s_waitcnt vmcnt(0)
357 ; GFX7-NEXT: s_setpc_b64 s[30:31]
359 ; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
361 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362 ; GFX10-NEXT: s_mov_b32 s11, s17
363 ; GFX10-NEXT: s_mov_b32 s10, s16
364 ; GFX10-NEXT: s_mov_b32 s9, s7
365 ; GFX10-NEXT: s_mov_b32 s8, s6
366 ; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[8:11], s18 idxen
367 ; GFX10-NEXT: s_setpc_b64 s[30:31]
369 ; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
371 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372 ; GFX11-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], s6 idxen
373 ; GFX11-NEXT: s_setpc_b64 s[30:31]
375 ; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
377 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
378 ; GFX12-NEXT: s_wait_expcnt 0x0
379 ; GFX12-NEXT: s_wait_samplecnt 0x0
380 ; GFX12-NEXT: s_wait_bvhcnt 0x0
381 ; GFX12-NEXT: s_wait_kmcnt 0x0
382 ; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], s6 idxen
383 ; GFX12-NEXT: s_setpc_b64 s[30:31]
384 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
388 define void @struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
389 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
391 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
392 ; GFX6-NEXT: s_mov_b32 s11, s17
393 ; GFX6-NEXT: s_mov_b32 s10, s16
394 ; GFX6-NEXT: s_mov_b32 s9, s7
395 ; GFX6-NEXT: s_mov_b32 s8, s6
396 ; GFX6-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen slc
397 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
398 ; GFX6-NEXT: s_setpc_b64 s[30:31]
400 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
402 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403 ; GFX7-NEXT: s_mov_b32 s11, s17
404 ; GFX7-NEXT: s_mov_b32 s10, s16
405 ; GFX7-NEXT: s_mov_b32 s9, s7
406 ; GFX7-NEXT: s_mov_b32 s8, s6
407 ; GFX7-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen slc
408 ; GFX7-NEXT: s_waitcnt vmcnt(0)
409 ; GFX7-NEXT: s_setpc_b64 s[30:31]
411 ; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
413 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414 ; GFX10-NEXT: s_mov_b32 s11, s17
415 ; GFX10-NEXT: s_mov_b32 s10, s16
416 ; GFX10-NEXT: s_mov_b32 s9, s7
417 ; GFX10-NEXT: s_mov_b32 s8, s6
418 ; GFX10-NEXT: buffer_atomic_fmin v0, v[1:2], s[8:11], s18 idxen offen slc
419 ; GFX10-NEXT: s_setpc_b64 s[30:31]
421 ; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
423 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
424 ; GFX11-NEXT: buffer_atomic_min_f32 v0, v[1:2], s[0:3], s6 idxen offen slc
425 ; GFX11-NEXT: s_setpc_b64 s[30:31]
427 ; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
429 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
430 ; GFX12-NEXT: s_wait_expcnt 0x0
431 ; GFX12-NEXT: s_wait_samplecnt 0x0
432 ; GFX12-NEXT: s_wait_bvhcnt 0x0
433 ; GFX12-NEXT: s_wait_kmcnt 0x0
434 ; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s6 idxen offen th:TH_ATOMIC_NT
435 ; GFX12-NEXT: s_setpc_b64 s[30:31]
436 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
440 ; Test waterfall loop on resource
441 define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
442 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
444 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445 ; GFX6-NEXT: s_mov_b64 s[12:13], exec
446 ; GFX6-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
447 ; GFX6-NEXT: v_readfirstlane_b32 s8, v1
448 ; GFX6-NEXT: v_readfirstlane_b32 s9, v2
449 ; GFX6-NEXT: v_readfirstlane_b32 s10, v3
450 ; GFX6-NEXT: v_readfirstlane_b32 s11, v4
451 ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
452 ; GFX6-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[3:4]
453 ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
454 ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
455 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
456 ; GFX6-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s6 idxen offen offset:256 glc
457 ; GFX6-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
458 ; GFX6-NEXT: ; implicit-def: $vgpr5_vgpr6
459 ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5]
460 ; GFX6-NEXT: s_cbranch_execnz .LBB8_1
461 ; GFX6-NEXT: ; %bb.2:
462 ; GFX6-NEXT: s_mov_b64 exec, s[12:13]
463 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
464 ; GFX6-NEXT: s_setpc_b64 s[30:31]
466 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
468 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
469 ; GFX7-NEXT: s_mov_b64 s[12:13], exec
470 ; GFX7-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
471 ; GFX7-NEXT: v_readfirstlane_b32 s8, v1
472 ; GFX7-NEXT: v_readfirstlane_b32 s9, v2
473 ; GFX7-NEXT: v_readfirstlane_b32 s10, v3
474 ; GFX7-NEXT: v_readfirstlane_b32 s11, v4
475 ; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
476 ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[3:4]
477 ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
478 ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
479 ; GFX7-NEXT: s_waitcnt vmcnt(0)
480 ; GFX7-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s6 idxen offen offset:256 glc
481 ; GFX7-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
482 ; GFX7-NEXT: ; implicit-def: $vgpr5_vgpr6
483 ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5]
484 ; GFX7-NEXT: s_cbranch_execnz .LBB8_1
485 ; GFX7-NEXT: ; %bb.2:
486 ; GFX7-NEXT: s_mov_b64 exec, s[12:13]
487 ; GFX7-NEXT: s_waitcnt vmcnt(0)
488 ; GFX7-NEXT: s_setpc_b64 s[30:31]
490 ; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
492 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
493 ; GFX10-NEXT: s_mov_b32 s5, exec_lo
494 ; GFX10-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
495 ; GFX10-NEXT: v_readfirstlane_b32 s8, v1
496 ; GFX10-NEXT: v_readfirstlane_b32 s9, v2
497 ; GFX10-NEXT: v_readfirstlane_b32 s10, v3
498 ; GFX10-NEXT: v_readfirstlane_b32 s11, v4
499 ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[1:2]
500 ; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[3:4]
501 ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4
502 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4
503 ; GFX10-NEXT: s_waitcnt vmcnt(0)
504 ; GFX10-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s6 idxen offen offset:256 glc
505 ; GFX10-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
506 ; GFX10-NEXT: ; implicit-def: $vgpr5_vgpr6
507 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
508 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4
509 ; GFX10-NEXT: s_cbranch_execnz .LBB8_1
510 ; GFX10-NEXT: ; %bb.2:
511 ; GFX10-NEXT: s_mov_b32 exec_lo, s5
512 ; GFX10-NEXT: s_waitcnt vmcnt(0)
513 ; GFX10-NEXT: s_setpc_b64 s[30:31]
515 ; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
517 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518 ; GFX11-NEXT: s_mov_b32 s2, exec_lo
519 ; GFX11-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
520 ; GFX11-NEXT: v_readfirstlane_b32 s4, v1
521 ; GFX11-NEXT: v_readfirstlane_b32 s5, v2
522 ; GFX11-NEXT: v_readfirstlane_b32 s6, v3
523 ; GFX11-NEXT: v_readfirstlane_b32 s7, v4
524 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
525 ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2]
526 ; GFX11-NEXT: v_cmp_eq_u64_e64 s1, s[6:7], v[3:4]
527 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
528 ; GFX11-NEXT: s_and_b32 s1, vcc_lo, s1
529 ; GFX11-NEXT: s_and_saveexec_b32 s1, s1
530 ; GFX11-NEXT: s_waitcnt vmcnt(0)
531 ; GFX11-NEXT: buffer_atomic_min_f32 v0, v[5:6], s[4:7], s0 idxen offen offset:256 glc
532 ; GFX11-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
533 ; GFX11-NEXT: ; implicit-def: $vgpr5_vgpr6
534 ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s1
535 ; GFX11-NEXT: s_cbranch_execnz .LBB8_1
536 ; GFX11-NEXT: ; %bb.2:
537 ; GFX11-NEXT: s_mov_b32 exec_lo, s2
538 ; GFX11-NEXT: s_waitcnt vmcnt(0)
539 ; GFX11-NEXT: s_setpc_b64 s[30:31]
541 ; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
543 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
544 ; GFX12-NEXT: s_wait_expcnt 0x0
545 ; GFX12-NEXT: s_wait_samplecnt 0x0
546 ; GFX12-NEXT: s_wait_bvhcnt 0x0
547 ; GFX12-NEXT: s_wait_kmcnt 0x0
548 ; GFX12-NEXT: s_mov_b32 s2, exec_lo
549 ; GFX12-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
550 ; GFX12-NEXT: v_readfirstlane_b32 s4, v1
551 ; GFX12-NEXT: v_readfirstlane_b32 s5, v2
552 ; GFX12-NEXT: v_readfirstlane_b32 s6, v3
553 ; GFX12-NEXT: v_readfirstlane_b32 s7, v4
554 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
555 ; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2]
556 ; GFX12-NEXT: v_cmp_eq_u64_e64 s1, s[6:7], v[3:4]
557 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
558 ; GFX12-NEXT: s_and_b32 s1, vcc_lo, s1
559 ; GFX12-NEXT: s_and_saveexec_b32 s1, s1
560 ; GFX12-NEXT: s_wait_loadcnt 0x0
561 ; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[5:6], s[4:7], s0 idxen offen offset:256 th:TH_ATOMIC_RETURN
562 ; GFX12-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
563 ; GFX12-NEXT: ; implicit-def: $vgpr5_vgpr6
564 ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s1
565 ; GFX12-NEXT: s_cbranch_execnz .LBB8_1
566 ; GFX12-NEXT: ; %bb.2:
567 ; GFX12-NEXT: s_mov_b32 exec_lo, s2
568 ; GFX12-NEXT: s_wait_loadcnt 0x0
569 ; GFX12-NEXT: s_setpc_b64 s[30:31]
570 %voffset.add = add i32 %voffset, 256
571 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
575 ; Test waterfall loop on soffset
576 define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) {
577 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset:
579 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
580 ; GFX6-NEXT: s_mov_b64 s[6:7], exec
581 ; GFX6-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
582 ; GFX6-NEXT: v_readfirstlane_b32 s8, v1
583 ; GFX6-NEXT: v_readfirstlane_b32 s9, v2
584 ; GFX6-NEXT: v_readfirstlane_b32 s10, v3
585 ; GFX6-NEXT: v_readfirstlane_b32 s11, v4
586 ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
587 ; GFX6-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[3:4]
588 ; GFX6-NEXT: v_readfirstlane_b32 s12, v7
589 ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
590 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, s12, v7
591 ; GFX6-NEXT: s_and_b64 s[4:5], s[4:5], vcc
592 ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
593 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
594 ; GFX6-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s12 idxen offen offset:256 glc
595 ; GFX6-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
596 ; GFX6-NEXT: ; implicit-def: $vgpr7
597 ; GFX6-NEXT: ; implicit-def: $vgpr5_vgpr6
598 ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5]
599 ; GFX6-NEXT: s_cbranch_execnz .LBB9_1
600 ; GFX6-NEXT: ; %bb.2:
601 ; GFX6-NEXT: s_mov_b64 exec, s[6:7]
602 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
603 ; GFX6-NEXT: s_setpc_b64 s[30:31]
605 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset:
607 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
608 ; GFX7-NEXT: s_mov_b64 s[6:7], exec
609 ; GFX7-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
610 ; GFX7-NEXT: v_readfirstlane_b32 s8, v1
611 ; GFX7-NEXT: v_readfirstlane_b32 s9, v2
612 ; GFX7-NEXT: v_readfirstlane_b32 s10, v3
613 ; GFX7-NEXT: v_readfirstlane_b32 s11, v4
614 ; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
615 ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[3:4]
616 ; GFX7-NEXT: v_readfirstlane_b32 s12, v7
617 ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
618 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, s12, v7
619 ; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], vcc
620 ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
621 ; GFX7-NEXT: s_waitcnt vmcnt(0)
622 ; GFX7-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s12 idxen offen offset:256 glc
623 ; GFX7-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
624 ; GFX7-NEXT: ; implicit-def: $vgpr7
625 ; GFX7-NEXT: ; implicit-def: $vgpr5_vgpr6
626 ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5]
627 ; GFX7-NEXT: s_cbranch_execnz .LBB9_1
628 ; GFX7-NEXT: ; %bb.2:
629 ; GFX7-NEXT: s_mov_b64 exec, s[6:7]
630 ; GFX7-NEXT: s_waitcnt vmcnt(0)
631 ; GFX7-NEXT: s_setpc_b64 s[30:31]
633 ; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset:
635 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
636 ; GFX10-NEXT: s_mov_b32 s6, exec_lo
637 ; GFX10-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
638 ; GFX10-NEXT: v_readfirstlane_b32 s8, v1
639 ; GFX10-NEXT: v_readfirstlane_b32 s9, v2
640 ; GFX10-NEXT: v_readfirstlane_b32 s10, v3
641 ; GFX10-NEXT: v_readfirstlane_b32 s11, v4
642 ; GFX10-NEXT: v_readfirstlane_b32 s7, v7
643 ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[1:2]
644 ; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[3:4]
645 ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, s7, v7
646 ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4
647 ; GFX10-NEXT: s_and_b32 s4, s4, s5
648 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4
649 ; GFX10-NEXT: s_waitcnt vmcnt(0)
650 ; GFX10-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s7 idxen offen offset:256 glc
651 ; GFX10-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
652 ; GFX10-NEXT: ; implicit-def: $vgpr7
653 ; GFX10-NEXT: ; implicit-def: $vgpr5_vgpr6
654 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
655 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4
656 ; GFX10-NEXT: s_cbranch_execnz .LBB9_1
657 ; GFX10-NEXT: ; %bb.2:
658 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
659 ; GFX10-NEXT: s_waitcnt vmcnt(0)
660 ; GFX10-NEXT: s_setpc_b64 s[30:31]
662 ; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset:
664 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
665 ; GFX11-NEXT: s_mov_b32 s2, exec_lo
666 ; GFX11-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
667 ; GFX11-NEXT: v_readfirstlane_b32 s4, v1
668 ; GFX11-NEXT: v_readfirstlane_b32 s5, v2
669 ; GFX11-NEXT: v_readfirstlane_b32 s6, v3
670 ; GFX11-NEXT: v_readfirstlane_b32 s7, v4
671 ; GFX11-NEXT: v_readfirstlane_b32 s3, v7
672 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
673 ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2]
674 ; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4]
675 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
676 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, s3, v7
677 ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0
678 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
679 ; GFX11-NEXT: s_and_b32 s0, s0, s1
680 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
681 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0
682 ; GFX11-NEXT: s_waitcnt vmcnt(0)
683 ; GFX11-NEXT: buffer_atomic_min_f32 v0, v[5:6], s[4:7], s3 idxen offen offset:256 glc
684 ; GFX11-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
685 ; GFX11-NEXT: ; implicit-def: $vgpr7
686 ; GFX11-NEXT: ; implicit-def: $vgpr5_vgpr6
687 ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0
688 ; GFX11-NEXT: s_cbranch_execnz .LBB9_1
689 ; GFX11-NEXT: ; %bb.2:
690 ; GFX11-NEXT: s_mov_b32 exec_lo, s2
691 ; GFX11-NEXT: s_waitcnt vmcnt(0)
692 ; GFX11-NEXT: s_setpc_b64 s[30:31]
694 ; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset:
696 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
697 ; GFX12-NEXT: s_wait_expcnt 0x0
698 ; GFX12-NEXT: s_wait_samplecnt 0x0
699 ; GFX12-NEXT: s_wait_bvhcnt 0x0
700 ; GFX12-NEXT: s_wait_kmcnt 0x0
701 ; GFX12-NEXT: s_mov_b32 s2, exec_lo
702 ; GFX12-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
703 ; GFX12-NEXT: v_readfirstlane_b32 s4, v1
704 ; GFX12-NEXT: v_readfirstlane_b32 s5, v2
705 ; GFX12-NEXT: v_readfirstlane_b32 s6, v3
706 ; GFX12-NEXT: v_readfirstlane_b32 s7, v4
707 ; GFX12-NEXT: v_readfirstlane_b32 s3, v7
708 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
709 ; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2]
710 ; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4]
711 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
712 ; GFX12-NEXT: v_cmp_eq_u32_e64 s1, s3, v7
713 ; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0
714 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
715 ; GFX12-NEXT: s_and_b32 s0, s0, s1
716 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
717 ; GFX12-NEXT: s_and_saveexec_b32 s0, s0
718 ; GFX12-NEXT: s_wait_loadcnt 0x0
719 ; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[5:6], s[4:7], s3 idxen offen offset:256 th:TH_ATOMIC_RETURN
720 ; GFX12-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
721 ; GFX12-NEXT: ; implicit-def: $vgpr7
722 ; GFX12-NEXT: ; implicit-def: $vgpr5_vgpr6
723 ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0
724 ; GFX12-NEXT: s_cbranch_execnz .LBB9_1
725 ; GFX12-NEXT: ; %bb.2:
726 ; GFX12-NEXT: s_mov_b32 exec_lo, s2
727 ; GFX12-NEXT: s_wait_loadcnt 0x0
728 ; GFX12-NEXT: s_setpc_b64 s[30:31]
729 %voffset.add = add i32 %voffset, 256
730 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
734 declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg)