1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX900 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - %s | FileCheck -check-prefix=GFX90A %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10PLUS %s
7 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s | FileCheck -check-prefix=GFX10PLUS %s
8 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GFX12 %s
10 define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
11 ; GFX6-LABEL: atomic_swap_i32_1d:
12 ; GFX6: ; %bb.0: ; %main_body
13 ; GFX6-NEXT: s_mov_b32 s0, s2
14 ; GFX6-NEXT: s_mov_b32 s1, s3
15 ; GFX6-NEXT: s_mov_b32 s2, s4
16 ; GFX6-NEXT: s_mov_b32 s3, s5
17 ; GFX6-NEXT: s_mov_b32 s4, s6
18 ; GFX6-NEXT: s_mov_b32 s5, s7
19 ; GFX6-NEXT: s_mov_b32 s6, s8
20 ; GFX6-NEXT: s_mov_b32 s7, s9
21 ; GFX6-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
22 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
23 ; GFX6-NEXT: ; return to shader part epilog
25 ; GFX8-LABEL: atomic_swap_i32_1d:
26 ; GFX8: ; %bb.0: ; %main_body
27 ; GFX8-NEXT: s_mov_b32 s0, s2
28 ; GFX8-NEXT: s_mov_b32 s1, s3
29 ; GFX8-NEXT: s_mov_b32 s2, s4
30 ; GFX8-NEXT: s_mov_b32 s3, s5
31 ; GFX8-NEXT: s_mov_b32 s4, s6
32 ; GFX8-NEXT: s_mov_b32 s5, s7
33 ; GFX8-NEXT: s_mov_b32 s6, s8
34 ; GFX8-NEXT: s_mov_b32 s7, s9
35 ; GFX8-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
36 ; GFX8-NEXT: s_waitcnt vmcnt(0)
37 ; GFX8-NEXT: ; return to shader part epilog
39 ; GFX900-LABEL: atomic_swap_i32_1d:
40 ; GFX900: ; %bb.0: ; %main_body
41 ; GFX900-NEXT: s_mov_b32 s0, s2
42 ; GFX900-NEXT: s_mov_b32 s1, s3
43 ; GFX900-NEXT: s_mov_b32 s2, s4
44 ; GFX900-NEXT: s_mov_b32 s3, s5
45 ; GFX900-NEXT: s_mov_b32 s4, s6
46 ; GFX900-NEXT: s_mov_b32 s5, s7
47 ; GFX900-NEXT: s_mov_b32 s6, s8
48 ; GFX900-NEXT: s_mov_b32 s7, s9
49 ; GFX900-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
50 ; GFX900-NEXT: s_waitcnt vmcnt(0)
51 ; GFX900-NEXT: ; return to shader part epilog
53 ; GFX90A-LABEL: atomic_swap_i32_1d:
54 ; GFX90A: ; %bb.0: ; %main_body
55 ; GFX90A-NEXT: s_mov_b32 s0, s2
56 ; GFX90A-NEXT: s_mov_b32 s1, s3
57 ; GFX90A-NEXT: s_mov_b32 s2, s4
58 ; GFX90A-NEXT: s_mov_b32 s3, s5
59 ; GFX90A-NEXT: s_mov_b32 s4, s6
60 ; GFX90A-NEXT: s_mov_b32 s5, s7
61 ; GFX90A-NEXT: s_mov_b32 s6, s8
62 ; GFX90A-NEXT: s_mov_b32 s7, s9
63 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
64 ; GFX90A-NEXT: image_atomic_swap v0, v2, s[0:7] dmask:0x1 unorm glc
65 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
66 ; GFX90A-NEXT: ; return to shader part epilog
68 ; GFX10PLUS-LABEL: atomic_swap_i32_1d:
69 ; GFX10PLUS: ; %bb.0: ; %main_body
70 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
71 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
72 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
73 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
74 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
75 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
76 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
77 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
78 ; GFX10PLUS-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
79 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
80 ; GFX10PLUS-NEXT: ; return to shader part epilog
82 ; GFX12-LABEL: atomic_swap_i32_1d:
83 ; GFX12: ; %bb.0: ; %main_body
84 ; GFX12-NEXT: s_mov_b32 s0, s2
85 ; GFX12-NEXT: s_mov_b32 s1, s3
86 ; GFX12-NEXT: s_mov_b32 s2, s4
87 ; GFX12-NEXT: s_mov_b32 s3, s5
88 ; GFX12-NEXT: s_mov_b32 s4, s6
89 ; GFX12-NEXT: s_mov_b32 s5, s7
90 ; GFX12-NEXT: s_mov_b32 s6, s8
91 ; GFX12-NEXT: s_mov_b32 s7, s9
92 ; GFX12-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
93 ; GFX12-NEXT: s_wait_loadcnt 0x0
94 ; GFX12-NEXT: ; return to shader part epilog
96 %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
97 %out = bitcast i32 %v to float
101 define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
102 ; GFX6-LABEL: atomic_add_i32_1d:
103 ; GFX6: ; %bb.0: ; %main_body
104 ; GFX6-NEXT: s_mov_b32 s0, s2
105 ; GFX6-NEXT: s_mov_b32 s1, s3
106 ; GFX6-NEXT: s_mov_b32 s2, s4
107 ; GFX6-NEXT: s_mov_b32 s3, s5
108 ; GFX6-NEXT: s_mov_b32 s4, s6
109 ; GFX6-NEXT: s_mov_b32 s5, s7
110 ; GFX6-NEXT: s_mov_b32 s6, s8
111 ; GFX6-NEXT: s_mov_b32 s7, s9
112 ; GFX6-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
113 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
114 ; GFX6-NEXT: ; return to shader part epilog
116 ; GFX8-LABEL: atomic_add_i32_1d:
117 ; GFX8: ; %bb.0: ; %main_body
118 ; GFX8-NEXT: s_mov_b32 s0, s2
119 ; GFX8-NEXT: s_mov_b32 s1, s3
120 ; GFX8-NEXT: s_mov_b32 s2, s4
121 ; GFX8-NEXT: s_mov_b32 s3, s5
122 ; GFX8-NEXT: s_mov_b32 s4, s6
123 ; GFX8-NEXT: s_mov_b32 s5, s7
124 ; GFX8-NEXT: s_mov_b32 s6, s8
125 ; GFX8-NEXT: s_mov_b32 s7, s9
126 ; GFX8-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
127 ; GFX8-NEXT: s_waitcnt vmcnt(0)
128 ; GFX8-NEXT: ; return to shader part epilog
130 ; GFX900-LABEL: atomic_add_i32_1d:
131 ; GFX900: ; %bb.0: ; %main_body
132 ; GFX900-NEXT: s_mov_b32 s0, s2
133 ; GFX900-NEXT: s_mov_b32 s1, s3
134 ; GFX900-NEXT: s_mov_b32 s2, s4
135 ; GFX900-NEXT: s_mov_b32 s3, s5
136 ; GFX900-NEXT: s_mov_b32 s4, s6
137 ; GFX900-NEXT: s_mov_b32 s5, s7
138 ; GFX900-NEXT: s_mov_b32 s6, s8
139 ; GFX900-NEXT: s_mov_b32 s7, s9
140 ; GFX900-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
141 ; GFX900-NEXT: s_waitcnt vmcnt(0)
142 ; GFX900-NEXT: ; return to shader part epilog
144 ; GFX90A-LABEL: atomic_add_i32_1d:
145 ; GFX90A: ; %bb.0: ; %main_body
146 ; GFX90A-NEXT: s_mov_b32 s0, s2
147 ; GFX90A-NEXT: s_mov_b32 s1, s3
148 ; GFX90A-NEXT: s_mov_b32 s2, s4
149 ; GFX90A-NEXT: s_mov_b32 s3, s5
150 ; GFX90A-NEXT: s_mov_b32 s4, s6
151 ; GFX90A-NEXT: s_mov_b32 s5, s7
152 ; GFX90A-NEXT: s_mov_b32 s6, s8
153 ; GFX90A-NEXT: s_mov_b32 s7, s9
154 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
155 ; GFX90A-NEXT: image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc
156 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
157 ; GFX90A-NEXT: ; return to shader part epilog
159 ; GFX10PLUS-LABEL: atomic_add_i32_1d:
160 ; GFX10PLUS: ; %bb.0: ; %main_body
161 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
162 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
163 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
164 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
165 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
166 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
167 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
168 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
169 ; GFX10PLUS-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
170 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
171 ; GFX10PLUS-NEXT: ; return to shader part epilog
173 ; GFX12-LABEL: atomic_add_i32_1d:
174 ; GFX12: ; %bb.0: ; %main_body
175 ; GFX12-NEXT: s_mov_b32 s0, s2
176 ; GFX12-NEXT: s_mov_b32 s1, s3
177 ; GFX12-NEXT: s_mov_b32 s2, s4
178 ; GFX12-NEXT: s_mov_b32 s3, s5
179 ; GFX12-NEXT: s_mov_b32 s4, s6
180 ; GFX12-NEXT: s_mov_b32 s5, s7
181 ; GFX12-NEXT: s_mov_b32 s6, s8
182 ; GFX12-NEXT: s_mov_b32 s7, s9
183 ; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
184 ; GFX12-NEXT: s_wait_loadcnt 0x0
185 ; GFX12-NEXT: ; return to shader part epilog
187 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
188 %out = bitcast i32 %v to float
192 define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
193 ; GFX6-LABEL: atomic_sub_i32_1d:
194 ; GFX6: ; %bb.0: ; %main_body
195 ; GFX6-NEXT: s_mov_b32 s0, s2
196 ; GFX6-NEXT: s_mov_b32 s1, s3
197 ; GFX6-NEXT: s_mov_b32 s2, s4
198 ; GFX6-NEXT: s_mov_b32 s3, s5
199 ; GFX6-NEXT: s_mov_b32 s4, s6
200 ; GFX6-NEXT: s_mov_b32 s5, s7
201 ; GFX6-NEXT: s_mov_b32 s6, s8
202 ; GFX6-NEXT: s_mov_b32 s7, s9
203 ; GFX6-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
204 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
205 ; GFX6-NEXT: ; return to shader part epilog
207 ; GFX8-LABEL: atomic_sub_i32_1d:
208 ; GFX8: ; %bb.0: ; %main_body
209 ; GFX8-NEXT: s_mov_b32 s0, s2
210 ; GFX8-NEXT: s_mov_b32 s1, s3
211 ; GFX8-NEXT: s_mov_b32 s2, s4
212 ; GFX8-NEXT: s_mov_b32 s3, s5
213 ; GFX8-NEXT: s_mov_b32 s4, s6
214 ; GFX8-NEXT: s_mov_b32 s5, s7
215 ; GFX8-NEXT: s_mov_b32 s6, s8
216 ; GFX8-NEXT: s_mov_b32 s7, s9
217 ; GFX8-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
218 ; GFX8-NEXT: s_waitcnt vmcnt(0)
219 ; GFX8-NEXT: ; return to shader part epilog
221 ; GFX900-LABEL: atomic_sub_i32_1d:
222 ; GFX900: ; %bb.0: ; %main_body
223 ; GFX900-NEXT: s_mov_b32 s0, s2
224 ; GFX900-NEXT: s_mov_b32 s1, s3
225 ; GFX900-NEXT: s_mov_b32 s2, s4
226 ; GFX900-NEXT: s_mov_b32 s3, s5
227 ; GFX900-NEXT: s_mov_b32 s4, s6
228 ; GFX900-NEXT: s_mov_b32 s5, s7
229 ; GFX900-NEXT: s_mov_b32 s6, s8
230 ; GFX900-NEXT: s_mov_b32 s7, s9
231 ; GFX900-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
232 ; GFX900-NEXT: s_waitcnt vmcnt(0)
233 ; GFX900-NEXT: ; return to shader part epilog
235 ; GFX90A-LABEL: atomic_sub_i32_1d:
236 ; GFX90A: ; %bb.0: ; %main_body
237 ; GFX90A-NEXT: s_mov_b32 s0, s2
238 ; GFX90A-NEXT: s_mov_b32 s1, s3
239 ; GFX90A-NEXT: s_mov_b32 s2, s4
240 ; GFX90A-NEXT: s_mov_b32 s3, s5
241 ; GFX90A-NEXT: s_mov_b32 s4, s6
242 ; GFX90A-NEXT: s_mov_b32 s5, s7
243 ; GFX90A-NEXT: s_mov_b32 s6, s8
244 ; GFX90A-NEXT: s_mov_b32 s7, s9
245 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
246 ; GFX90A-NEXT: image_atomic_sub v0, v2, s[0:7] dmask:0x1 unorm glc
247 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
248 ; GFX90A-NEXT: ; return to shader part epilog
250 ; GFX10PLUS-LABEL: atomic_sub_i32_1d:
251 ; GFX10PLUS: ; %bb.0: ; %main_body
252 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
253 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
254 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
255 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
256 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
257 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
258 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
259 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
260 ; GFX10PLUS-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
261 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
262 ; GFX10PLUS-NEXT: ; return to shader part epilog
264 ; GFX12-LABEL: atomic_sub_i32_1d:
265 ; GFX12: ; %bb.0: ; %main_body
266 ; GFX12-NEXT: s_mov_b32 s0, s2
267 ; GFX12-NEXT: s_mov_b32 s1, s3
268 ; GFX12-NEXT: s_mov_b32 s2, s4
269 ; GFX12-NEXT: s_mov_b32 s3, s5
270 ; GFX12-NEXT: s_mov_b32 s4, s6
271 ; GFX12-NEXT: s_mov_b32 s5, s7
272 ; GFX12-NEXT: s_mov_b32 s6, s8
273 ; GFX12-NEXT: s_mov_b32 s7, s9
274 ; GFX12-NEXT: image_atomic_sub_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
275 ; GFX12-NEXT: s_wait_loadcnt 0x0
276 ; GFX12-NEXT: ; return to shader part epilog
278 %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
279 %out = bitcast i32 %v to float
283 define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
284 ; GFX6-LABEL: atomic_smin_i32_1d:
285 ; GFX6: ; %bb.0: ; %main_body
286 ; GFX6-NEXT: s_mov_b32 s0, s2
287 ; GFX6-NEXT: s_mov_b32 s1, s3
288 ; GFX6-NEXT: s_mov_b32 s2, s4
289 ; GFX6-NEXT: s_mov_b32 s3, s5
290 ; GFX6-NEXT: s_mov_b32 s4, s6
291 ; GFX6-NEXT: s_mov_b32 s5, s7
292 ; GFX6-NEXT: s_mov_b32 s6, s8
293 ; GFX6-NEXT: s_mov_b32 s7, s9
294 ; GFX6-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
295 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
296 ; GFX6-NEXT: ; return to shader part epilog
298 ; GFX8-LABEL: atomic_smin_i32_1d:
299 ; GFX8: ; %bb.0: ; %main_body
300 ; GFX8-NEXT: s_mov_b32 s0, s2
301 ; GFX8-NEXT: s_mov_b32 s1, s3
302 ; GFX8-NEXT: s_mov_b32 s2, s4
303 ; GFX8-NEXT: s_mov_b32 s3, s5
304 ; GFX8-NEXT: s_mov_b32 s4, s6
305 ; GFX8-NEXT: s_mov_b32 s5, s7
306 ; GFX8-NEXT: s_mov_b32 s6, s8
307 ; GFX8-NEXT: s_mov_b32 s7, s9
308 ; GFX8-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
309 ; GFX8-NEXT: s_waitcnt vmcnt(0)
310 ; GFX8-NEXT: ; return to shader part epilog
312 ; GFX900-LABEL: atomic_smin_i32_1d:
313 ; GFX900: ; %bb.0: ; %main_body
314 ; GFX900-NEXT: s_mov_b32 s0, s2
315 ; GFX900-NEXT: s_mov_b32 s1, s3
316 ; GFX900-NEXT: s_mov_b32 s2, s4
317 ; GFX900-NEXT: s_mov_b32 s3, s5
318 ; GFX900-NEXT: s_mov_b32 s4, s6
319 ; GFX900-NEXT: s_mov_b32 s5, s7
320 ; GFX900-NEXT: s_mov_b32 s6, s8
321 ; GFX900-NEXT: s_mov_b32 s7, s9
322 ; GFX900-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
323 ; GFX900-NEXT: s_waitcnt vmcnt(0)
324 ; GFX900-NEXT: ; return to shader part epilog
326 ; GFX90A-LABEL: atomic_smin_i32_1d:
327 ; GFX90A: ; %bb.0: ; %main_body
328 ; GFX90A-NEXT: s_mov_b32 s0, s2
329 ; GFX90A-NEXT: s_mov_b32 s1, s3
330 ; GFX90A-NEXT: s_mov_b32 s2, s4
331 ; GFX90A-NEXT: s_mov_b32 s3, s5
332 ; GFX90A-NEXT: s_mov_b32 s4, s6
333 ; GFX90A-NEXT: s_mov_b32 s5, s7
334 ; GFX90A-NEXT: s_mov_b32 s6, s8
335 ; GFX90A-NEXT: s_mov_b32 s7, s9
336 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
337 ; GFX90A-NEXT: image_atomic_smin v0, v2, s[0:7] dmask:0x1 unorm glc
338 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
339 ; GFX90A-NEXT: ; return to shader part epilog
341 ; GFX10PLUS-LABEL: atomic_smin_i32_1d:
342 ; GFX10PLUS: ; %bb.0: ; %main_body
343 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
344 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
345 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
346 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
347 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
348 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
349 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
350 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
351 ; GFX10PLUS-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
352 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
353 ; GFX10PLUS-NEXT: ; return to shader part epilog
355 ; GFX12-LABEL: atomic_smin_i32_1d:
356 ; GFX12: ; %bb.0: ; %main_body
357 ; GFX12-NEXT: s_mov_b32 s0, s2
358 ; GFX12-NEXT: s_mov_b32 s1, s3
359 ; GFX12-NEXT: s_mov_b32 s2, s4
360 ; GFX12-NEXT: s_mov_b32 s3, s5
361 ; GFX12-NEXT: s_mov_b32 s4, s6
362 ; GFX12-NEXT: s_mov_b32 s5, s7
363 ; GFX12-NEXT: s_mov_b32 s6, s8
364 ; GFX12-NEXT: s_mov_b32 s7, s9
365 ; GFX12-NEXT: image_atomic_min_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
366 ; GFX12-NEXT: s_wait_loadcnt 0x0
367 ; GFX12-NEXT: ; return to shader part epilog
369 %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
370 %out = bitcast i32 %v to float
374 define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
375 ; GFX6-LABEL: atomic_umin_i32_1d:
376 ; GFX6: ; %bb.0: ; %main_body
377 ; GFX6-NEXT: s_mov_b32 s0, s2
378 ; GFX6-NEXT: s_mov_b32 s1, s3
379 ; GFX6-NEXT: s_mov_b32 s2, s4
380 ; GFX6-NEXT: s_mov_b32 s3, s5
381 ; GFX6-NEXT: s_mov_b32 s4, s6
382 ; GFX6-NEXT: s_mov_b32 s5, s7
383 ; GFX6-NEXT: s_mov_b32 s6, s8
384 ; GFX6-NEXT: s_mov_b32 s7, s9
385 ; GFX6-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
386 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
387 ; GFX6-NEXT: ; return to shader part epilog
389 ; GFX8-LABEL: atomic_umin_i32_1d:
390 ; GFX8: ; %bb.0: ; %main_body
391 ; GFX8-NEXT: s_mov_b32 s0, s2
392 ; GFX8-NEXT: s_mov_b32 s1, s3
393 ; GFX8-NEXT: s_mov_b32 s2, s4
394 ; GFX8-NEXT: s_mov_b32 s3, s5
395 ; GFX8-NEXT: s_mov_b32 s4, s6
396 ; GFX8-NEXT: s_mov_b32 s5, s7
397 ; GFX8-NEXT: s_mov_b32 s6, s8
398 ; GFX8-NEXT: s_mov_b32 s7, s9
399 ; GFX8-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
400 ; GFX8-NEXT: s_waitcnt vmcnt(0)
401 ; GFX8-NEXT: ; return to shader part epilog
403 ; GFX900-LABEL: atomic_umin_i32_1d:
404 ; GFX900: ; %bb.0: ; %main_body
405 ; GFX900-NEXT: s_mov_b32 s0, s2
406 ; GFX900-NEXT: s_mov_b32 s1, s3
407 ; GFX900-NEXT: s_mov_b32 s2, s4
408 ; GFX900-NEXT: s_mov_b32 s3, s5
409 ; GFX900-NEXT: s_mov_b32 s4, s6
410 ; GFX900-NEXT: s_mov_b32 s5, s7
411 ; GFX900-NEXT: s_mov_b32 s6, s8
412 ; GFX900-NEXT: s_mov_b32 s7, s9
413 ; GFX900-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
414 ; GFX900-NEXT: s_waitcnt vmcnt(0)
415 ; GFX900-NEXT: ; return to shader part epilog
417 ; GFX90A-LABEL: atomic_umin_i32_1d:
418 ; GFX90A: ; %bb.0: ; %main_body
419 ; GFX90A-NEXT: s_mov_b32 s0, s2
420 ; GFX90A-NEXT: s_mov_b32 s1, s3
421 ; GFX90A-NEXT: s_mov_b32 s2, s4
422 ; GFX90A-NEXT: s_mov_b32 s3, s5
423 ; GFX90A-NEXT: s_mov_b32 s4, s6
424 ; GFX90A-NEXT: s_mov_b32 s5, s7
425 ; GFX90A-NEXT: s_mov_b32 s6, s8
426 ; GFX90A-NEXT: s_mov_b32 s7, s9
427 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
428 ; GFX90A-NEXT: image_atomic_umin v0, v2, s[0:7] dmask:0x1 unorm glc
429 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
430 ; GFX90A-NEXT: ; return to shader part epilog
432 ; GFX10PLUS-LABEL: atomic_umin_i32_1d:
433 ; GFX10PLUS: ; %bb.0: ; %main_body
434 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
435 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
436 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
437 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
438 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
439 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
440 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
441 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
442 ; GFX10PLUS-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
443 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
444 ; GFX10PLUS-NEXT: ; return to shader part epilog
446 ; GFX12-LABEL: atomic_umin_i32_1d:
447 ; GFX12: ; %bb.0: ; %main_body
448 ; GFX12-NEXT: s_mov_b32 s0, s2
449 ; GFX12-NEXT: s_mov_b32 s1, s3
450 ; GFX12-NEXT: s_mov_b32 s2, s4
451 ; GFX12-NEXT: s_mov_b32 s3, s5
452 ; GFX12-NEXT: s_mov_b32 s4, s6
453 ; GFX12-NEXT: s_mov_b32 s5, s7
454 ; GFX12-NEXT: s_mov_b32 s6, s8
455 ; GFX12-NEXT: s_mov_b32 s7, s9
456 ; GFX12-NEXT: image_atomic_min_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
457 ; GFX12-NEXT: s_wait_loadcnt 0x0
458 ; GFX12-NEXT: ; return to shader part epilog
460 %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
461 %out = bitcast i32 %v to float
465 define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
466 ; GFX6-LABEL: atomic_smax_i32_1d:
467 ; GFX6: ; %bb.0: ; %main_body
468 ; GFX6-NEXT: s_mov_b32 s0, s2
469 ; GFX6-NEXT: s_mov_b32 s1, s3
470 ; GFX6-NEXT: s_mov_b32 s2, s4
471 ; GFX6-NEXT: s_mov_b32 s3, s5
472 ; GFX6-NEXT: s_mov_b32 s4, s6
473 ; GFX6-NEXT: s_mov_b32 s5, s7
474 ; GFX6-NEXT: s_mov_b32 s6, s8
475 ; GFX6-NEXT: s_mov_b32 s7, s9
476 ; GFX6-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
477 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
478 ; GFX6-NEXT: ; return to shader part epilog
480 ; GFX8-LABEL: atomic_smax_i32_1d:
481 ; GFX8: ; %bb.0: ; %main_body
482 ; GFX8-NEXT: s_mov_b32 s0, s2
483 ; GFX8-NEXT: s_mov_b32 s1, s3
484 ; GFX8-NEXT: s_mov_b32 s2, s4
485 ; GFX8-NEXT: s_mov_b32 s3, s5
486 ; GFX8-NEXT: s_mov_b32 s4, s6
487 ; GFX8-NEXT: s_mov_b32 s5, s7
488 ; GFX8-NEXT: s_mov_b32 s6, s8
489 ; GFX8-NEXT: s_mov_b32 s7, s9
490 ; GFX8-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
491 ; GFX8-NEXT: s_waitcnt vmcnt(0)
492 ; GFX8-NEXT: ; return to shader part epilog
494 ; GFX900-LABEL: atomic_smax_i32_1d:
495 ; GFX900: ; %bb.0: ; %main_body
496 ; GFX900-NEXT: s_mov_b32 s0, s2
497 ; GFX900-NEXT: s_mov_b32 s1, s3
498 ; GFX900-NEXT: s_mov_b32 s2, s4
499 ; GFX900-NEXT: s_mov_b32 s3, s5
500 ; GFX900-NEXT: s_mov_b32 s4, s6
501 ; GFX900-NEXT: s_mov_b32 s5, s7
502 ; GFX900-NEXT: s_mov_b32 s6, s8
503 ; GFX900-NEXT: s_mov_b32 s7, s9
504 ; GFX900-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
505 ; GFX900-NEXT: s_waitcnt vmcnt(0)
506 ; GFX900-NEXT: ; return to shader part epilog
508 ; GFX90A-LABEL: atomic_smax_i32_1d:
509 ; GFX90A: ; %bb.0: ; %main_body
510 ; GFX90A-NEXT: s_mov_b32 s0, s2
511 ; GFX90A-NEXT: s_mov_b32 s1, s3
512 ; GFX90A-NEXT: s_mov_b32 s2, s4
513 ; GFX90A-NEXT: s_mov_b32 s3, s5
514 ; GFX90A-NEXT: s_mov_b32 s4, s6
515 ; GFX90A-NEXT: s_mov_b32 s5, s7
516 ; GFX90A-NEXT: s_mov_b32 s6, s8
517 ; GFX90A-NEXT: s_mov_b32 s7, s9
518 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
519 ; GFX90A-NEXT: image_atomic_smax v0, v2, s[0:7] dmask:0x1 unorm glc
520 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
521 ; GFX90A-NEXT: ; return to shader part epilog
523 ; GFX10PLUS-LABEL: atomic_smax_i32_1d:
524 ; GFX10PLUS: ; %bb.0: ; %main_body
525 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
526 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
527 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
528 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
529 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
530 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
531 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
532 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
533 ; GFX10PLUS-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
534 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
535 ; GFX10PLUS-NEXT: ; return to shader part epilog
537 ; GFX12-LABEL: atomic_smax_i32_1d:
538 ; GFX12: ; %bb.0: ; %main_body
539 ; GFX12-NEXT: s_mov_b32 s0, s2
540 ; GFX12-NEXT: s_mov_b32 s1, s3
541 ; GFX12-NEXT: s_mov_b32 s2, s4
542 ; GFX12-NEXT: s_mov_b32 s3, s5
543 ; GFX12-NEXT: s_mov_b32 s4, s6
544 ; GFX12-NEXT: s_mov_b32 s5, s7
545 ; GFX12-NEXT: s_mov_b32 s6, s8
546 ; GFX12-NEXT: s_mov_b32 s7, s9
547 ; GFX12-NEXT: image_atomic_max_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
548 ; GFX12-NEXT: s_wait_loadcnt 0x0
549 ; GFX12-NEXT: ; return to shader part epilog
551 %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
552 %out = bitcast i32 %v to float
556 define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
557 ; GFX6-LABEL: atomic_umax_i32_1d:
558 ; GFX6: ; %bb.0: ; %main_body
559 ; GFX6-NEXT: s_mov_b32 s0, s2
560 ; GFX6-NEXT: s_mov_b32 s1, s3
561 ; GFX6-NEXT: s_mov_b32 s2, s4
562 ; GFX6-NEXT: s_mov_b32 s3, s5
563 ; GFX6-NEXT: s_mov_b32 s4, s6
564 ; GFX6-NEXT: s_mov_b32 s5, s7
565 ; GFX6-NEXT: s_mov_b32 s6, s8
566 ; GFX6-NEXT: s_mov_b32 s7, s9
567 ; GFX6-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
568 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
569 ; GFX6-NEXT: ; return to shader part epilog
571 ; GFX8-LABEL: atomic_umax_i32_1d:
572 ; GFX8: ; %bb.0: ; %main_body
573 ; GFX8-NEXT: s_mov_b32 s0, s2
574 ; GFX8-NEXT: s_mov_b32 s1, s3
575 ; GFX8-NEXT: s_mov_b32 s2, s4
576 ; GFX8-NEXT: s_mov_b32 s3, s5
577 ; GFX8-NEXT: s_mov_b32 s4, s6
578 ; GFX8-NEXT: s_mov_b32 s5, s7
579 ; GFX8-NEXT: s_mov_b32 s6, s8
580 ; GFX8-NEXT: s_mov_b32 s7, s9
581 ; GFX8-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
582 ; GFX8-NEXT: s_waitcnt vmcnt(0)
583 ; GFX8-NEXT: ; return to shader part epilog
585 ; GFX900-LABEL: atomic_umax_i32_1d:
586 ; GFX900: ; %bb.0: ; %main_body
587 ; GFX900-NEXT: s_mov_b32 s0, s2
588 ; GFX900-NEXT: s_mov_b32 s1, s3
589 ; GFX900-NEXT: s_mov_b32 s2, s4
590 ; GFX900-NEXT: s_mov_b32 s3, s5
591 ; GFX900-NEXT: s_mov_b32 s4, s6
592 ; GFX900-NEXT: s_mov_b32 s5, s7
593 ; GFX900-NEXT: s_mov_b32 s6, s8
594 ; GFX900-NEXT: s_mov_b32 s7, s9
595 ; GFX900-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
596 ; GFX900-NEXT: s_waitcnt vmcnt(0)
597 ; GFX900-NEXT: ; return to shader part epilog
599 ; GFX90A-LABEL: atomic_umax_i32_1d:
600 ; GFX90A: ; %bb.0: ; %main_body
601 ; GFX90A-NEXT: s_mov_b32 s0, s2
602 ; GFX90A-NEXT: s_mov_b32 s1, s3
603 ; GFX90A-NEXT: s_mov_b32 s2, s4
604 ; GFX90A-NEXT: s_mov_b32 s3, s5
605 ; GFX90A-NEXT: s_mov_b32 s4, s6
606 ; GFX90A-NEXT: s_mov_b32 s5, s7
607 ; GFX90A-NEXT: s_mov_b32 s6, s8
608 ; GFX90A-NEXT: s_mov_b32 s7, s9
609 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
610 ; GFX90A-NEXT: image_atomic_umax v0, v2, s[0:7] dmask:0x1 unorm glc
611 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
612 ; GFX90A-NEXT: ; return to shader part epilog
614 ; GFX10PLUS-LABEL: atomic_umax_i32_1d:
615 ; GFX10PLUS: ; %bb.0: ; %main_body
616 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
617 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
618 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
619 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
620 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
621 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
622 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
623 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
624 ; GFX10PLUS-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
625 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
626 ; GFX10PLUS-NEXT: ; return to shader part epilog
628 ; GFX12-LABEL: atomic_umax_i32_1d:
629 ; GFX12: ; %bb.0: ; %main_body
630 ; GFX12-NEXT: s_mov_b32 s0, s2
631 ; GFX12-NEXT: s_mov_b32 s1, s3
632 ; GFX12-NEXT: s_mov_b32 s2, s4
633 ; GFX12-NEXT: s_mov_b32 s3, s5
634 ; GFX12-NEXT: s_mov_b32 s4, s6
635 ; GFX12-NEXT: s_mov_b32 s5, s7
636 ; GFX12-NEXT: s_mov_b32 s6, s8
637 ; GFX12-NEXT: s_mov_b32 s7, s9
638 ; GFX12-NEXT: image_atomic_max_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
639 ; GFX12-NEXT: s_wait_loadcnt 0x0
640 ; GFX12-NEXT: ; return to shader part epilog
642 %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
643 %out = bitcast i32 %v to float
647 define amdgpu_ps float @atomic_and_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
648 ; GFX6-LABEL: atomic_and_i32_1d:
649 ; GFX6: ; %bb.0: ; %main_body
650 ; GFX6-NEXT: s_mov_b32 s0, s2
651 ; GFX6-NEXT: s_mov_b32 s1, s3
652 ; GFX6-NEXT: s_mov_b32 s2, s4
653 ; GFX6-NEXT: s_mov_b32 s3, s5
654 ; GFX6-NEXT: s_mov_b32 s4, s6
655 ; GFX6-NEXT: s_mov_b32 s5, s7
656 ; GFX6-NEXT: s_mov_b32 s6, s8
657 ; GFX6-NEXT: s_mov_b32 s7, s9
658 ; GFX6-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
659 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
660 ; GFX6-NEXT: ; return to shader part epilog
662 ; GFX8-LABEL: atomic_and_i32_1d:
663 ; GFX8: ; %bb.0: ; %main_body
664 ; GFX8-NEXT: s_mov_b32 s0, s2
665 ; GFX8-NEXT: s_mov_b32 s1, s3
666 ; GFX8-NEXT: s_mov_b32 s2, s4
667 ; GFX8-NEXT: s_mov_b32 s3, s5
668 ; GFX8-NEXT: s_mov_b32 s4, s6
669 ; GFX8-NEXT: s_mov_b32 s5, s7
670 ; GFX8-NEXT: s_mov_b32 s6, s8
671 ; GFX8-NEXT: s_mov_b32 s7, s9
672 ; GFX8-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
673 ; GFX8-NEXT: s_waitcnt vmcnt(0)
674 ; GFX8-NEXT: ; return to shader part epilog
676 ; GFX900-LABEL: atomic_and_i32_1d:
677 ; GFX900: ; %bb.0: ; %main_body
678 ; GFX900-NEXT: s_mov_b32 s0, s2
679 ; GFX900-NEXT: s_mov_b32 s1, s3
680 ; GFX900-NEXT: s_mov_b32 s2, s4
681 ; GFX900-NEXT: s_mov_b32 s3, s5
682 ; GFX900-NEXT: s_mov_b32 s4, s6
683 ; GFX900-NEXT: s_mov_b32 s5, s7
684 ; GFX900-NEXT: s_mov_b32 s6, s8
685 ; GFX900-NEXT: s_mov_b32 s7, s9
686 ; GFX900-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
687 ; GFX900-NEXT: s_waitcnt vmcnt(0)
688 ; GFX900-NEXT: ; return to shader part epilog
690 ; GFX90A-LABEL: atomic_and_i32_1d:
691 ; GFX90A: ; %bb.0: ; %main_body
692 ; GFX90A-NEXT: s_mov_b32 s0, s2
693 ; GFX90A-NEXT: s_mov_b32 s1, s3
694 ; GFX90A-NEXT: s_mov_b32 s2, s4
695 ; GFX90A-NEXT: s_mov_b32 s3, s5
696 ; GFX90A-NEXT: s_mov_b32 s4, s6
697 ; GFX90A-NEXT: s_mov_b32 s5, s7
698 ; GFX90A-NEXT: s_mov_b32 s6, s8
699 ; GFX90A-NEXT: s_mov_b32 s7, s9
700 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
701 ; GFX90A-NEXT: image_atomic_and v0, v2, s[0:7] dmask:0x1 unorm glc
702 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
703 ; GFX90A-NEXT: ; return to shader part epilog
705 ; GFX10PLUS-LABEL: atomic_and_i32_1d:
706 ; GFX10PLUS: ; %bb.0: ; %main_body
707 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
708 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
709 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
710 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
711 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
712 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
713 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
714 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
715 ; GFX10PLUS-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
716 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
717 ; GFX10PLUS-NEXT: ; return to shader part epilog
719 ; GFX12-LABEL: atomic_and_i32_1d:
720 ; GFX12: ; %bb.0: ; %main_body
721 ; GFX12-NEXT: s_mov_b32 s0, s2
722 ; GFX12-NEXT: s_mov_b32 s1, s3
723 ; GFX12-NEXT: s_mov_b32 s2, s4
724 ; GFX12-NEXT: s_mov_b32 s3, s5
725 ; GFX12-NEXT: s_mov_b32 s4, s6
726 ; GFX12-NEXT: s_mov_b32 s5, s7
727 ; GFX12-NEXT: s_mov_b32 s6, s8
728 ; GFX12-NEXT: s_mov_b32 s7, s9
729 ; GFX12-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
730 ; GFX12-NEXT: s_wait_loadcnt 0x0
731 ; GFX12-NEXT: ; return to shader part epilog
733 %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
734 %out = bitcast i32 %v to float
738 define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
739 ; GFX6-LABEL: atomic_or_i32_1d:
740 ; GFX6: ; %bb.0: ; %main_body
741 ; GFX6-NEXT: s_mov_b32 s0, s2
742 ; GFX6-NEXT: s_mov_b32 s1, s3
743 ; GFX6-NEXT: s_mov_b32 s2, s4
744 ; GFX6-NEXT: s_mov_b32 s3, s5
745 ; GFX6-NEXT: s_mov_b32 s4, s6
746 ; GFX6-NEXT: s_mov_b32 s5, s7
747 ; GFX6-NEXT: s_mov_b32 s6, s8
748 ; GFX6-NEXT: s_mov_b32 s7, s9
749 ; GFX6-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
750 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
751 ; GFX6-NEXT: ; return to shader part epilog
753 ; GFX8-LABEL: atomic_or_i32_1d:
754 ; GFX8: ; %bb.0: ; %main_body
755 ; GFX8-NEXT: s_mov_b32 s0, s2
756 ; GFX8-NEXT: s_mov_b32 s1, s3
757 ; GFX8-NEXT: s_mov_b32 s2, s4
758 ; GFX8-NEXT: s_mov_b32 s3, s5
759 ; GFX8-NEXT: s_mov_b32 s4, s6
760 ; GFX8-NEXT: s_mov_b32 s5, s7
761 ; GFX8-NEXT: s_mov_b32 s6, s8
762 ; GFX8-NEXT: s_mov_b32 s7, s9
763 ; GFX8-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
764 ; GFX8-NEXT: s_waitcnt vmcnt(0)
765 ; GFX8-NEXT: ; return to shader part epilog
767 ; GFX900-LABEL: atomic_or_i32_1d:
768 ; GFX900: ; %bb.0: ; %main_body
769 ; GFX900-NEXT: s_mov_b32 s0, s2
770 ; GFX900-NEXT: s_mov_b32 s1, s3
771 ; GFX900-NEXT: s_mov_b32 s2, s4
772 ; GFX900-NEXT: s_mov_b32 s3, s5
773 ; GFX900-NEXT: s_mov_b32 s4, s6
774 ; GFX900-NEXT: s_mov_b32 s5, s7
775 ; GFX900-NEXT: s_mov_b32 s6, s8
776 ; GFX900-NEXT: s_mov_b32 s7, s9
777 ; GFX900-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
778 ; GFX900-NEXT: s_waitcnt vmcnt(0)
779 ; GFX900-NEXT: ; return to shader part epilog
781 ; GFX90A-LABEL: atomic_or_i32_1d:
782 ; GFX90A: ; %bb.0: ; %main_body
783 ; GFX90A-NEXT: s_mov_b32 s0, s2
784 ; GFX90A-NEXT: s_mov_b32 s1, s3
785 ; GFX90A-NEXT: s_mov_b32 s2, s4
786 ; GFX90A-NEXT: s_mov_b32 s3, s5
787 ; GFX90A-NEXT: s_mov_b32 s4, s6
788 ; GFX90A-NEXT: s_mov_b32 s5, s7
789 ; GFX90A-NEXT: s_mov_b32 s6, s8
790 ; GFX90A-NEXT: s_mov_b32 s7, s9
791 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
792 ; GFX90A-NEXT: image_atomic_or v0, v2, s[0:7] dmask:0x1 unorm glc
793 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
794 ; GFX90A-NEXT: ; return to shader part epilog
796 ; GFX10PLUS-LABEL: atomic_or_i32_1d:
797 ; GFX10PLUS: ; %bb.0: ; %main_body
798 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
799 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
800 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
801 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
802 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
803 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
804 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
805 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
806 ; GFX10PLUS-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
807 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
808 ; GFX10PLUS-NEXT: ; return to shader part epilog
810 ; GFX12-LABEL: atomic_or_i32_1d:
811 ; GFX12: ; %bb.0: ; %main_body
812 ; GFX12-NEXT: s_mov_b32 s0, s2
813 ; GFX12-NEXT: s_mov_b32 s1, s3
814 ; GFX12-NEXT: s_mov_b32 s2, s4
815 ; GFX12-NEXT: s_mov_b32 s3, s5
816 ; GFX12-NEXT: s_mov_b32 s4, s6
817 ; GFX12-NEXT: s_mov_b32 s5, s7
818 ; GFX12-NEXT: s_mov_b32 s6, s8
819 ; GFX12-NEXT: s_mov_b32 s7, s9
820 ; GFX12-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
821 ; GFX12-NEXT: s_wait_loadcnt 0x0
822 ; GFX12-NEXT: ; return to shader part epilog
824 %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
825 %out = bitcast i32 %v to float
829 define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
830 ; GFX6-LABEL: atomic_xor_i32_1d:
831 ; GFX6: ; %bb.0: ; %main_body
832 ; GFX6-NEXT: s_mov_b32 s0, s2
833 ; GFX6-NEXT: s_mov_b32 s1, s3
834 ; GFX6-NEXT: s_mov_b32 s2, s4
835 ; GFX6-NEXT: s_mov_b32 s3, s5
836 ; GFX6-NEXT: s_mov_b32 s4, s6
837 ; GFX6-NEXT: s_mov_b32 s5, s7
838 ; GFX6-NEXT: s_mov_b32 s6, s8
839 ; GFX6-NEXT: s_mov_b32 s7, s9
840 ; GFX6-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
841 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
842 ; GFX6-NEXT: ; return to shader part epilog
844 ; GFX8-LABEL: atomic_xor_i32_1d:
845 ; GFX8: ; %bb.0: ; %main_body
846 ; GFX8-NEXT: s_mov_b32 s0, s2
847 ; GFX8-NEXT: s_mov_b32 s1, s3
848 ; GFX8-NEXT: s_mov_b32 s2, s4
849 ; GFX8-NEXT: s_mov_b32 s3, s5
850 ; GFX8-NEXT: s_mov_b32 s4, s6
851 ; GFX8-NEXT: s_mov_b32 s5, s7
852 ; GFX8-NEXT: s_mov_b32 s6, s8
853 ; GFX8-NEXT: s_mov_b32 s7, s9
854 ; GFX8-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
855 ; GFX8-NEXT: s_waitcnt vmcnt(0)
856 ; GFX8-NEXT: ; return to shader part epilog
858 ; GFX900-LABEL: atomic_xor_i32_1d:
859 ; GFX900: ; %bb.0: ; %main_body
860 ; GFX900-NEXT: s_mov_b32 s0, s2
861 ; GFX900-NEXT: s_mov_b32 s1, s3
862 ; GFX900-NEXT: s_mov_b32 s2, s4
863 ; GFX900-NEXT: s_mov_b32 s3, s5
864 ; GFX900-NEXT: s_mov_b32 s4, s6
865 ; GFX900-NEXT: s_mov_b32 s5, s7
866 ; GFX900-NEXT: s_mov_b32 s6, s8
867 ; GFX900-NEXT: s_mov_b32 s7, s9
868 ; GFX900-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
869 ; GFX900-NEXT: s_waitcnt vmcnt(0)
870 ; GFX900-NEXT: ; return to shader part epilog
872 ; GFX90A-LABEL: atomic_xor_i32_1d:
873 ; GFX90A: ; %bb.0: ; %main_body
874 ; GFX90A-NEXT: s_mov_b32 s0, s2
875 ; GFX90A-NEXT: s_mov_b32 s1, s3
876 ; GFX90A-NEXT: s_mov_b32 s2, s4
877 ; GFX90A-NEXT: s_mov_b32 s3, s5
878 ; GFX90A-NEXT: s_mov_b32 s4, s6
879 ; GFX90A-NEXT: s_mov_b32 s5, s7
880 ; GFX90A-NEXT: s_mov_b32 s6, s8
881 ; GFX90A-NEXT: s_mov_b32 s7, s9
882 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
883 ; GFX90A-NEXT: image_atomic_xor v0, v2, s[0:7] dmask:0x1 unorm glc
884 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
885 ; GFX90A-NEXT: ; return to shader part epilog
887 ; GFX10PLUS-LABEL: atomic_xor_i32_1d:
888 ; GFX10PLUS: ; %bb.0: ; %main_body
889 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
890 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
891 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
892 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
893 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
894 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
895 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
896 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
897 ; GFX10PLUS-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
898 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
899 ; GFX10PLUS-NEXT: ; return to shader part epilog
901 ; GFX12-LABEL: atomic_xor_i32_1d:
902 ; GFX12: ; %bb.0: ; %main_body
903 ; GFX12-NEXT: s_mov_b32 s0, s2
904 ; GFX12-NEXT: s_mov_b32 s1, s3
905 ; GFX12-NEXT: s_mov_b32 s2, s4
906 ; GFX12-NEXT: s_mov_b32 s3, s5
907 ; GFX12-NEXT: s_mov_b32 s4, s6
908 ; GFX12-NEXT: s_mov_b32 s5, s7
909 ; GFX12-NEXT: s_mov_b32 s6, s8
910 ; GFX12-NEXT: s_mov_b32 s7, s9
911 ; GFX12-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
912 ; GFX12-NEXT: s_wait_loadcnt 0x0
913 ; GFX12-NEXT: ; return to shader part epilog
915 %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
916 %out = bitcast i32 %v to float
920 define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
921 ; GFX6-LABEL: atomic_inc_i32_1d:
922 ; GFX6: ; %bb.0: ; %main_body
923 ; GFX6-NEXT: s_mov_b32 s0, s2
924 ; GFX6-NEXT: s_mov_b32 s1, s3
925 ; GFX6-NEXT: s_mov_b32 s2, s4
926 ; GFX6-NEXT: s_mov_b32 s3, s5
927 ; GFX6-NEXT: s_mov_b32 s4, s6
928 ; GFX6-NEXT: s_mov_b32 s5, s7
929 ; GFX6-NEXT: s_mov_b32 s6, s8
930 ; GFX6-NEXT: s_mov_b32 s7, s9
931 ; GFX6-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
932 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
933 ; GFX6-NEXT: ; return to shader part epilog
935 ; GFX8-LABEL: atomic_inc_i32_1d:
936 ; GFX8: ; %bb.0: ; %main_body
937 ; GFX8-NEXT: s_mov_b32 s0, s2
938 ; GFX8-NEXT: s_mov_b32 s1, s3
939 ; GFX8-NEXT: s_mov_b32 s2, s4
940 ; GFX8-NEXT: s_mov_b32 s3, s5
941 ; GFX8-NEXT: s_mov_b32 s4, s6
942 ; GFX8-NEXT: s_mov_b32 s5, s7
943 ; GFX8-NEXT: s_mov_b32 s6, s8
944 ; GFX8-NEXT: s_mov_b32 s7, s9
945 ; GFX8-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
946 ; GFX8-NEXT: s_waitcnt vmcnt(0)
947 ; GFX8-NEXT: ; return to shader part epilog
949 ; GFX900-LABEL: atomic_inc_i32_1d:
950 ; GFX900: ; %bb.0: ; %main_body
951 ; GFX900-NEXT: s_mov_b32 s0, s2
952 ; GFX900-NEXT: s_mov_b32 s1, s3
953 ; GFX900-NEXT: s_mov_b32 s2, s4
954 ; GFX900-NEXT: s_mov_b32 s3, s5
955 ; GFX900-NEXT: s_mov_b32 s4, s6
956 ; GFX900-NEXT: s_mov_b32 s5, s7
957 ; GFX900-NEXT: s_mov_b32 s6, s8
958 ; GFX900-NEXT: s_mov_b32 s7, s9
959 ; GFX900-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
960 ; GFX900-NEXT: s_waitcnt vmcnt(0)
961 ; GFX900-NEXT: ; return to shader part epilog
963 ; GFX90A-LABEL: atomic_inc_i32_1d:
964 ; GFX90A: ; %bb.0: ; %main_body
965 ; GFX90A-NEXT: s_mov_b32 s0, s2
966 ; GFX90A-NEXT: s_mov_b32 s1, s3
967 ; GFX90A-NEXT: s_mov_b32 s2, s4
968 ; GFX90A-NEXT: s_mov_b32 s3, s5
969 ; GFX90A-NEXT: s_mov_b32 s4, s6
970 ; GFX90A-NEXT: s_mov_b32 s5, s7
971 ; GFX90A-NEXT: s_mov_b32 s6, s8
972 ; GFX90A-NEXT: s_mov_b32 s7, s9
973 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
974 ; GFX90A-NEXT: image_atomic_inc v0, v2, s[0:7] dmask:0x1 unorm glc
975 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
976 ; GFX90A-NEXT: ; return to shader part epilog
978 ; GFX10PLUS-LABEL: atomic_inc_i32_1d:
979 ; GFX10PLUS: ; %bb.0: ; %main_body
980 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
981 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
982 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
983 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
984 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
985 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
986 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
987 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
988 ; GFX10PLUS-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
989 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
990 ; GFX10PLUS-NEXT: ; return to shader part epilog
992 ; GFX12-LABEL: atomic_inc_i32_1d:
993 ; GFX12: ; %bb.0: ; %main_body
994 ; GFX12-NEXT: s_mov_b32 s0, s2
995 ; GFX12-NEXT: s_mov_b32 s1, s3
996 ; GFX12-NEXT: s_mov_b32 s2, s4
997 ; GFX12-NEXT: s_mov_b32 s3, s5
998 ; GFX12-NEXT: s_mov_b32 s4, s6
999 ; GFX12-NEXT: s_mov_b32 s5, s7
1000 ; GFX12-NEXT: s_mov_b32 s6, s8
1001 ; GFX12-NEXT: s_mov_b32 s7, s9
1002 ; GFX12-NEXT: image_atomic_inc_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
1003 ; GFX12-NEXT: s_wait_loadcnt 0x0
1004 ; GFX12-NEXT: ; return to shader part epilog
1006 %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1007 %out = bitcast i32 %v to float
1011 define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
1012 ; GFX6-LABEL: atomic_dec_i32_1d:
1013 ; GFX6: ; %bb.0: ; %main_body
1014 ; GFX6-NEXT: s_mov_b32 s0, s2
1015 ; GFX6-NEXT: s_mov_b32 s1, s3
1016 ; GFX6-NEXT: s_mov_b32 s2, s4
1017 ; GFX6-NEXT: s_mov_b32 s3, s5
1018 ; GFX6-NEXT: s_mov_b32 s4, s6
1019 ; GFX6-NEXT: s_mov_b32 s5, s7
1020 ; GFX6-NEXT: s_mov_b32 s6, s8
1021 ; GFX6-NEXT: s_mov_b32 s7, s9
1022 ; GFX6-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
1023 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1024 ; GFX6-NEXT: ; return to shader part epilog
1026 ; GFX8-LABEL: atomic_dec_i32_1d:
1027 ; GFX8: ; %bb.0: ; %main_body
1028 ; GFX8-NEXT: s_mov_b32 s0, s2
1029 ; GFX8-NEXT: s_mov_b32 s1, s3
1030 ; GFX8-NEXT: s_mov_b32 s2, s4
1031 ; GFX8-NEXT: s_mov_b32 s3, s5
1032 ; GFX8-NEXT: s_mov_b32 s4, s6
1033 ; GFX8-NEXT: s_mov_b32 s5, s7
1034 ; GFX8-NEXT: s_mov_b32 s6, s8
1035 ; GFX8-NEXT: s_mov_b32 s7, s9
1036 ; GFX8-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
1037 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1038 ; GFX8-NEXT: ; return to shader part epilog
1040 ; GFX900-LABEL: atomic_dec_i32_1d:
1041 ; GFX900: ; %bb.0: ; %main_body
1042 ; GFX900-NEXT: s_mov_b32 s0, s2
1043 ; GFX900-NEXT: s_mov_b32 s1, s3
1044 ; GFX900-NEXT: s_mov_b32 s2, s4
1045 ; GFX900-NEXT: s_mov_b32 s3, s5
1046 ; GFX900-NEXT: s_mov_b32 s4, s6
1047 ; GFX900-NEXT: s_mov_b32 s5, s7
1048 ; GFX900-NEXT: s_mov_b32 s6, s8
1049 ; GFX900-NEXT: s_mov_b32 s7, s9
1050 ; GFX900-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
1051 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1052 ; GFX900-NEXT: ; return to shader part epilog
1054 ; GFX90A-LABEL: atomic_dec_i32_1d:
1055 ; GFX90A: ; %bb.0: ; %main_body
1056 ; GFX90A-NEXT: s_mov_b32 s0, s2
1057 ; GFX90A-NEXT: s_mov_b32 s1, s3
1058 ; GFX90A-NEXT: s_mov_b32 s2, s4
1059 ; GFX90A-NEXT: s_mov_b32 s3, s5
1060 ; GFX90A-NEXT: s_mov_b32 s4, s6
1061 ; GFX90A-NEXT: s_mov_b32 s5, s7
1062 ; GFX90A-NEXT: s_mov_b32 s6, s8
1063 ; GFX90A-NEXT: s_mov_b32 s7, s9
1064 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
1065 ; GFX90A-NEXT: image_atomic_dec v0, v2, s[0:7] dmask:0x1 unorm glc
1066 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1067 ; GFX90A-NEXT: ; return to shader part epilog
1069 ; GFX10PLUS-LABEL: atomic_dec_i32_1d:
1070 ; GFX10PLUS: ; %bb.0: ; %main_body
1071 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1072 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1073 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1074 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1075 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1076 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1077 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1078 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1079 ; GFX10PLUS-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
1080 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1081 ; GFX10PLUS-NEXT: ; return to shader part epilog
1083 ; GFX12-LABEL: atomic_dec_i32_1d:
1084 ; GFX12: ; %bb.0: ; %main_body
1085 ; GFX12-NEXT: s_mov_b32 s0, s2
1086 ; GFX12-NEXT: s_mov_b32 s1, s3
1087 ; GFX12-NEXT: s_mov_b32 s2, s4
1088 ; GFX12-NEXT: s_mov_b32 s3, s5
1089 ; GFX12-NEXT: s_mov_b32 s4, s6
1090 ; GFX12-NEXT: s_mov_b32 s5, s7
1091 ; GFX12-NEXT: s_mov_b32 s6, s8
1092 ; GFX12-NEXT: s_mov_b32 s7, s9
1093 ; GFX12-NEXT: image_atomic_dec_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
1094 ; GFX12-NEXT: s_wait_loadcnt 0x0
1095 ; GFX12-NEXT: ; return to shader part epilog
1097 %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1098 %out = bitcast i32 %v to float
1102 define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
1103 ; GFX6-LABEL: atomic_cmpswap_i32_1d:
1104 ; GFX6: ; %bb.0: ; %main_body
1105 ; GFX6-NEXT: s_mov_b32 s0, s2
1106 ; GFX6-NEXT: s_mov_b32 s1, s3
1107 ; GFX6-NEXT: s_mov_b32 s2, s4
1108 ; GFX6-NEXT: s_mov_b32 s3, s5
1109 ; GFX6-NEXT: s_mov_b32 s4, s6
1110 ; GFX6-NEXT: s_mov_b32 s5, s7
1111 ; GFX6-NEXT: s_mov_b32 s6, s8
1112 ; GFX6-NEXT: s_mov_b32 s7, s9
1113 ; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1114 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1115 ; GFX6-NEXT: ; return to shader part epilog
1117 ; GFX8-LABEL: atomic_cmpswap_i32_1d:
1118 ; GFX8: ; %bb.0: ; %main_body
1119 ; GFX8-NEXT: s_mov_b32 s0, s2
1120 ; GFX8-NEXT: s_mov_b32 s1, s3
1121 ; GFX8-NEXT: s_mov_b32 s2, s4
1122 ; GFX8-NEXT: s_mov_b32 s3, s5
1123 ; GFX8-NEXT: s_mov_b32 s4, s6
1124 ; GFX8-NEXT: s_mov_b32 s5, s7
1125 ; GFX8-NEXT: s_mov_b32 s6, s8
1126 ; GFX8-NEXT: s_mov_b32 s7, s9
1127 ; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1128 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1129 ; GFX8-NEXT: ; return to shader part epilog
1131 ; GFX900-LABEL: atomic_cmpswap_i32_1d:
1132 ; GFX900: ; %bb.0: ; %main_body
1133 ; GFX900-NEXT: s_mov_b32 s0, s2
1134 ; GFX900-NEXT: s_mov_b32 s1, s3
1135 ; GFX900-NEXT: s_mov_b32 s2, s4
1136 ; GFX900-NEXT: s_mov_b32 s3, s5
1137 ; GFX900-NEXT: s_mov_b32 s4, s6
1138 ; GFX900-NEXT: s_mov_b32 s5, s7
1139 ; GFX900-NEXT: s_mov_b32 s6, s8
1140 ; GFX900-NEXT: s_mov_b32 s7, s9
1141 ; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1142 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1143 ; GFX900-NEXT: ; return to shader part epilog
1145 ; GFX90A-LABEL: atomic_cmpswap_i32_1d:
1146 ; GFX90A: ; %bb.0: ; %main_body
1147 ; GFX90A-NEXT: s_mov_b32 s0, s2
1148 ; GFX90A-NEXT: s_mov_b32 s1, s3
1149 ; GFX90A-NEXT: s_mov_b32 s2, s4
1150 ; GFX90A-NEXT: s_mov_b32 s3, s5
1151 ; GFX90A-NEXT: s_mov_b32 s4, s6
1152 ; GFX90A-NEXT: s_mov_b32 s5, s7
1153 ; GFX90A-NEXT: s_mov_b32 s6, s8
1154 ; GFX90A-NEXT: s_mov_b32 s7, s9
1155 ; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1156 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1157 ; GFX90A-NEXT: ; return to shader part epilog
1159 ; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d:
1160 ; GFX10PLUS: ; %bb.0: ; %main_body
1161 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1162 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1163 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1164 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1165 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1166 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1167 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1168 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1169 ; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1170 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1171 ; GFX10PLUS-NEXT: ; return to shader part epilog
1173 ; GFX12-LABEL: atomic_cmpswap_i32_1d:
1174 ; GFX12: ; %bb.0: ; %main_body
1175 ; GFX12-NEXT: s_mov_b32 s0, s2
1176 ; GFX12-NEXT: s_mov_b32 s1, s3
1177 ; GFX12-NEXT: s_mov_b32 s2, s4
1178 ; GFX12-NEXT: s_mov_b32 s3, s5
1179 ; GFX12-NEXT: s_mov_b32 s4, s6
1180 ; GFX12-NEXT: s_mov_b32 s5, s7
1181 ; GFX12-NEXT: s_mov_b32 s6, s8
1182 ; GFX12-NEXT: s_mov_b32 s7, s9
1183 ; GFX12-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
1184 ; GFX12-NEXT: s_wait_loadcnt 0x0
1185 ; GFX12-NEXT: ; return to shader part epilog
1187 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1188 %out = bitcast i32 %v to float
1192 define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
1193 ; GFX6-LABEL: atomic_cmpswap_i32_1d_no_return:
1194 ; GFX6: ; %bb.0: ; %main_body
1195 ; GFX6-NEXT: s_mov_b32 s0, s2
1196 ; GFX6-NEXT: s_mov_b32 s1, s3
1197 ; GFX6-NEXT: s_mov_b32 s2, s4
1198 ; GFX6-NEXT: s_mov_b32 s3, s5
1199 ; GFX6-NEXT: s_mov_b32 s4, s6
1200 ; GFX6-NEXT: s_mov_b32 s5, s7
1201 ; GFX6-NEXT: s_mov_b32 s6, s8
1202 ; GFX6-NEXT: s_mov_b32 s7, s9
1203 ; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1204 ; GFX6-NEXT: s_endpgm
1206 ; GFX8-LABEL: atomic_cmpswap_i32_1d_no_return:
1207 ; GFX8: ; %bb.0: ; %main_body
1208 ; GFX8-NEXT: s_mov_b32 s0, s2
1209 ; GFX8-NEXT: s_mov_b32 s1, s3
1210 ; GFX8-NEXT: s_mov_b32 s2, s4
1211 ; GFX8-NEXT: s_mov_b32 s3, s5
1212 ; GFX8-NEXT: s_mov_b32 s4, s6
1213 ; GFX8-NEXT: s_mov_b32 s5, s7
1214 ; GFX8-NEXT: s_mov_b32 s6, s8
1215 ; GFX8-NEXT: s_mov_b32 s7, s9
1216 ; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1217 ; GFX8-NEXT: s_endpgm
1219 ; GFX900-LABEL: atomic_cmpswap_i32_1d_no_return:
1220 ; GFX900: ; %bb.0: ; %main_body
1221 ; GFX900-NEXT: s_mov_b32 s0, s2
1222 ; GFX900-NEXT: s_mov_b32 s1, s3
1223 ; GFX900-NEXT: s_mov_b32 s2, s4
1224 ; GFX900-NEXT: s_mov_b32 s3, s5
1225 ; GFX900-NEXT: s_mov_b32 s4, s6
1226 ; GFX900-NEXT: s_mov_b32 s5, s7
1227 ; GFX900-NEXT: s_mov_b32 s6, s8
1228 ; GFX900-NEXT: s_mov_b32 s7, s9
1229 ; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1230 ; GFX900-NEXT: s_endpgm
1232 ; GFX90A-LABEL: atomic_cmpswap_i32_1d_no_return:
1233 ; GFX90A: ; %bb.0: ; %main_body
1234 ; GFX90A-NEXT: s_mov_b32 s0, s2
1235 ; GFX90A-NEXT: s_mov_b32 s1, s3
1236 ; GFX90A-NEXT: s_mov_b32 s2, s4
1237 ; GFX90A-NEXT: s_mov_b32 s3, s5
1238 ; GFX90A-NEXT: s_mov_b32 s4, s6
1239 ; GFX90A-NEXT: s_mov_b32 s5, s7
1240 ; GFX90A-NEXT: s_mov_b32 s6, s8
1241 ; GFX90A-NEXT: s_mov_b32 s7, s9
1242 ; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1243 ; GFX90A-NEXT: s_endpgm
1245 ; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d_no_return:
1246 ; GFX10PLUS: ; %bb.0: ; %main_body
1247 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1248 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1249 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1250 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1251 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1252 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1253 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1254 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1255 ; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1256 ; GFX10PLUS-NEXT: s_endpgm
1258 ; GFX12-LABEL: atomic_cmpswap_i32_1d_no_return:
1259 ; GFX12: ; %bb.0: ; %main_body
1260 ; GFX12-NEXT: s_mov_b32 s0, s2
1261 ; GFX12-NEXT: s_mov_b32 s1, s3
1262 ; GFX12-NEXT: s_mov_b32 s2, s4
1263 ; GFX12-NEXT: s_mov_b32 s3, s5
1264 ; GFX12-NEXT: s_mov_b32 s4, s6
1265 ; GFX12-NEXT: s_mov_b32 s5, s7
1266 ; GFX12-NEXT: s_mov_b32 s6, s8
1267 ; GFX12-NEXT: s_mov_b32 s7, s9
1268 ; GFX12-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
1269 ; GFX12-NEXT: s_endpgm
1271 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1275 define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) {
1276 ; GFX6-LABEL: atomic_add_i32_2d:
1277 ; GFX6: ; %bb.0: ; %main_body
1278 ; GFX6-NEXT: s_mov_b32 s0, s2
1279 ; GFX6-NEXT: s_mov_b32 s1, s3
1280 ; GFX6-NEXT: s_mov_b32 s2, s4
1281 ; GFX6-NEXT: s_mov_b32 s3, s5
1282 ; GFX6-NEXT: s_mov_b32 s4, s6
1283 ; GFX6-NEXT: s_mov_b32 s5, s7
1284 ; GFX6-NEXT: s_mov_b32 s6, s8
1285 ; GFX6-NEXT: s_mov_b32 s7, s9
1286 ; GFX6-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
1287 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1288 ; GFX6-NEXT: ; return to shader part epilog
1290 ; GFX8-LABEL: atomic_add_i32_2d:
1291 ; GFX8: ; %bb.0: ; %main_body
1292 ; GFX8-NEXT: s_mov_b32 s0, s2
1293 ; GFX8-NEXT: s_mov_b32 s1, s3
1294 ; GFX8-NEXT: s_mov_b32 s2, s4
1295 ; GFX8-NEXT: s_mov_b32 s3, s5
1296 ; GFX8-NEXT: s_mov_b32 s4, s6
1297 ; GFX8-NEXT: s_mov_b32 s5, s7
1298 ; GFX8-NEXT: s_mov_b32 s6, s8
1299 ; GFX8-NEXT: s_mov_b32 s7, s9
1300 ; GFX8-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
1301 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1302 ; GFX8-NEXT: ; return to shader part epilog
1304 ; GFX900-LABEL: atomic_add_i32_2d:
1305 ; GFX900: ; %bb.0: ; %main_body
1306 ; GFX900-NEXT: s_mov_b32 s0, s2
1307 ; GFX900-NEXT: s_mov_b32 s1, s3
1308 ; GFX900-NEXT: s_mov_b32 s2, s4
1309 ; GFX900-NEXT: s_mov_b32 s3, s5
1310 ; GFX900-NEXT: s_mov_b32 s4, s6
1311 ; GFX900-NEXT: s_mov_b32 s5, s7
1312 ; GFX900-NEXT: s_mov_b32 s6, s8
1313 ; GFX900-NEXT: s_mov_b32 s7, s9
1314 ; GFX900-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
1315 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1316 ; GFX900-NEXT: ; return to shader part epilog
1318 ; GFX90A-LABEL: atomic_add_i32_2d:
1319 ; GFX90A: ; %bb.0: ; %main_body
1320 ; GFX90A-NEXT: s_mov_b32 s0, s2
1321 ; GFX90A-NEXT: s_mov_b32 s1, s3
1322 ; GFX90A-NEXT: s_mov_b32 s2, s4
1323 ; GFX90A-NEXT: s_mov_b32 s3, s5
1324 ; GFX90A-NEXT: s_mov_b32 s4, s6
1325 ; GFX90A-NEXT: s_mov_b32 s5, s7
1326 ; GFX90A-NEXT: s_mov_b32 s6, s8
1327 ; GFX90A-NEXT: s_mov_b32 s7, s9
1328 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1329 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1330 ; GFX90A-NEXT: image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc
1331 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1332 ; GFX90A-NEXT: ; return to shader part epilog
1334 ; GFX10PLUS-LABEL: atomic_add_i32_2d:
1335 ; GFX10PLUS: ; %bb.0: ; %main_body
1336 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1337 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1338 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1339 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1340 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1341 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1342 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1343 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1344 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc
1345 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1346 ; GFX10PLUS-NEXT: ; return to shader part epilog
1348 ; GFX12-LABEL: atomic_add_i32_2d:
1349 ; GFX12: ; %bb.0: ; %main_body
1350 ; GFX12-NEXT: s_mov_b32 s0, s2
1351 ; GFX12-NEXT: s_mov_b32 s1, s3
1352 ; GFX12-NEXT: s_mov_b32 s2, s4
1353 ; GFX12-NEXT: s_mov_b32 s3, s5
1354 ; GFX12-NEXT: s_mov_b32 s4, s6
1355 ; GFX12-NEXT: s_mov_b32 s5, s7
1356 ; GFX12-NEXT: s_mov_b32 s6, s8
1357 ; GFX12-NEXT: s_mov_b32 s7, s9
1358 ; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D th:TH_ATOMIC_RETURN
1359 ; GFX12-NEXT: s_wait_loadcnt 0x0
1360 ; GFX12-NEXT: ; return to shader part epilog
1362 %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
1363 %out = bitcast i32 %v to float
1367 define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) {
1368 ; GFX6-LABEL: atomic_add_i32_3d:
1369 ; GFX6: ; %bb.0: ; %main_body
1370 ; GFX6-NEXT: s_mov_b32 s0, s2
1371 ; GFX6-NEXT: s_mov_b32 s1, s3
1372 ; GFX6-NEXT: s_mov_b32 s2, s4
1373 ; GFX6-NEXT: s_mov_b32 s3, s5
1374 ; GFX6-NEXT: s_mov_b32 s4, s6
1375 ; GFX6-NEXT: s_mov_b32 s5, s7
1376 ; GFX6-NEXT: s_mov_b32 s6, s8
1377 ; GFX6-NEXT: s_mov_b32 s7, s9
1378 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1379 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1380 ; GFX6-NEXT: ; return to shader part epilog
1382 ; GFX8-LABEL: atomic_add_i32_3d:
1383 ; GFX8: ; %bb.0: ; %main_body
1384 ; GFX8-NEXT: s_mov_b32 s0, s2
1385 ; GFX8-NEXT: s_mov_b32 s1, s3
1386 ; GFX8-NEXT: s_mov_b32 s2, s4
1387 ; GFX8-NEXT: s_mov_b32 s3, s5
1388 ; GFX8-NEXT: s_mov_b32 s4, s6
1389 ; GFX8-NEXT: s_mov_b32 s5, s7
1390 ; GFX8-NEXT: s_mov_b32 s6, s8
1391 ; GFX8-NEXT: s_mov_b32 s7, s9
1392 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1393 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1394 ; GFX8-NEXT: ; return to shader part epilog
1396 ; GFX900-LABEL: atomic_add_i32_3d:
1397 ; GFX900: ; %bb.0: ; %main_body
1398 ; GFX900-NEXT: s_mov_b32 s0, s2
1399 ; GFX900-NEXT: s_mov_b32 s1, s3
1400 ; GFX900-NEXT: s_mov_b32 s2, s4
1401 ; GFX900-NEXT: s_mov_b32 s3, s5
1402 ; GFX900-NEXT: s_mov_b32 s4, s6
1403 ; GFX900-NEXT: s_mov_b32 s5, s7
1404 ; GFX900-NEXT: s_mov_b32 s6, s8
1405 ; GFX900-NEXT: s_mov_b32 s7, s9
1406 ; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1407 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1408 ; GFX900-NEXT: ; return to shader part epilog
1410 ; GFX90A-LABEL: atomic_add_i32_3d:
1411 ; GFX90A: ; %bb.0: ; %main_body
1412 ; GFX90A-NEXT: s_mov_b32 s0, s2
1413 ; GFX90A-NEXT: s_mov_b32 s1, s3
1414 ; GFX90A-NEXT: s_mov_b32 s2, s4
1415 ; GFX90A-NEXT: s_mov_b32 s3, s5
1416 ; GFX90A-NEXT: s_mov_b32 s4, s6
1417 ; GFX90A-NEXT: s_mov_b32 s5, s7
1418 ; GFX90A-NEXT: s_mov_b32 s6, s8
1419 ; GFX90A-NEXT: s_mov_b32 s7, s9
1420 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1421 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1422 ; GFX90A-NEXT: v_mov_b32_e32 v6, v3
1423 ; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc
1424 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1425 ; GFX90A-NEXT: ; return to shader part epilog
1427 ; GFX10PLUS-LABEL: atomic_add_i32_3d:
1428 ; GFX10PLUS: ; %bb.0: ; %main_body
1429 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1430 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1431 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1432 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1433 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1434 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1435 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1436 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1437 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc
1438 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1439 ; GFX10PLUS-NEXT: ; return to shader part epilog
1441 ; GFX12-LABEL: atomic_add_i32_3d:
1442 ; GFX12: ; %bb.0: ; %main_body
1443 ; GFX12-NEXT: s_mov_b32 s0, s2
1444 ; GFX12-NEXT: s_mov_b32 s1, s3
1445 ; GFX12-NEXT: s_mov_b32 s2, s4
1446 ; GFX12-NEXT: s_mov_b32 s3, s5
1447 ; GFX12-NEXT: s_mov_b32 s4, s6
1448 ; GFX12-NEXT: s_mov_b32 s5, s7
1449 ; GFX12-NEXT: s_mov_b32 s6, s8
1450 ; GFX12-NEXT: s_mov_b32 s7, s9
1451 ; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D th:TH_ATOMIC_RETURN
1452 ; GFX12-NEXT: s_wait_loadcnt 0x0
1453 ; GFX12-NEXT: ; return to shader part epilog
1455 %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
1456 %out = bitcast i32 %v to float
1460 define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) {
1461 ; GFX6-LABEL: atomic_add_i32_cube:
1462 ; GFX6: ; %bb.0: ; %main_body
1463 ; GFX6-NEXT: s_mov_b32 s0, s2
1464 ; GFX6-NEXT: s_mov_b32 s1, s3
1465 ; GFX6-NEXT: s_mov_b32 s2, s4
1466 ; GFX6-NEXT: s_mov_b32 s3, s5
1467 ; GFX6-NEXT: s_mov_b32 s4, s6
1468 ; GFX6-NEXT: s_mov_b32 s5, s7
1469 ; GFX6-NEXT: s_mov_b32 s6, s8
1470 ; GFX6-NEXT: s_mov_b32 s7, s9
1471 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1472 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1473 ; GFX6-NEXT: ; return to shader part epilog
1475 ; GFX8-LABEL: atomic_add_i32_cube:
1476 ; GFX8: ; %bb.0: ; %main_body
1477 ; GFX8-NEXT: s_mov_b32 s0, s2
1478 ; GFX8-NEXT: s_mov_b32 s1, s3
1479 ; GFX8-NEXT: s_mov_b32 s2, s4
1480 ; GFX8-NEXT: s_mov_b32 s3, s5
1481 ; GFX8-NEXT: s_mov_b32 s4, s6
1482 ; GFX8-NEXT: s_mov_b32 s5, s7
1483 ; GFX8-NEXT: s_mov_b32 s6, s8
1484 ; GFX8-NEXT: s_mov_b32 s7, s9
1485 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1486 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1487 ; GFX8-NEXT: ; return to shader part epilog
1489 ; GFX900-LABEL: atomic_add_i32_cube:
1490 ; GFX900: ; %bb.0: ; %main_body
1491 ; GFX900-NEXT: s_mov_b32 s0, s2
1492 ; GFX900-NEXT: s_mov_b32 s1, s3
1493 ; GFX900-NEXT: s_mov_b32 s2, s4
1494 ; GFX900-NEXT: s_mov_b32 s3, s5
1495 ; GFX900-NEXT: s_mov_b32 s4, s6
1496 ; GFX900-NEXT: s_mov_b32 s5, s7
1497 ; GFX900-NEXT: s_mov_b32 s6, s8
1498 ; GFX900-NEXT: s_mov_b32 s7, s9
1499 ; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1500 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1501 ; GFX900-NEXT: ; return to shader part epilog
1503 ; GFX90A-LABEL: atomic_add_i32_cube:
1504 ; GFX90A: ; %bb.0: ; %main_body
1505 ; GFX90A-NEXT: s_mov_b32 s0, s2
1506 ; GFX90A-NEXT: s_mov_b32 s1, s3
1507 ; GFX90A-NEXT: s_mov_b32 s2, s4
1508 ; GFX90A-NEXT: s_mov_b32 s3, s5
1509 ; GFX90A-NEXT: s_mov_b32 s4, s6
1510 ; GFX90A-NEXT: s_mov_b32 s5, s7
1511 ; GFX90A-NEXT: s_mov_b32 s6, s8
1512 ; GFX90A-NEXT: s_mov_b32 s7, s9
1513 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1514 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1515 ; GFX90A-NEXT: v_mov_b32_e32 v6, v3
1516 ; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da
1517 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1518 ; GFX90A-NEXT: ; return to shader part epilog
1520 ; GFX10PLUS-LABEL: atomic_add_i32_cube:
1521 ; GFX10PLUS: ; %bb.0: ; %main_body
1522 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1523 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1524 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1525 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1526 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1527 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1528 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1529 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1530 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc
1531 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1532 ; GFX10PLUS-NEXT: ; return to shader part epilog
1534 ; GFX12-LABEL: atomic_add_i32_cube:
1535 ; GFX12: ; %bb.0: ; %main_body
1536 ; GFX12-NEXT: s_mov_b32 s0, s2
1537 ; GFX12-NEXT: s_mov_b32 s1, s3
1538 ; GFX12-NEXT: s_mov_b32 s2, s4
1539 ; GFX12-NEXT: s_mov_b32 s3, s5
1540 ; GFX12-NEXT: s_mov_b32 s4, s6
1541 ; GFX12-NEXT: s_mov_b32 s5, s7
1542 ; GFX12-NEXT: s_mov_b32 s6, s8
1543 ; GFX12-NEXT: s_mov_b32 s7, s9
1544 ; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE th:TH_ATOMIC_RETURN
1545 ; GFX12-NEXT: s_wait_loadcnt 0x0
1546 ; GFX12-NEXT: ; return to shader part epilog
1548 %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
1549 %out = bitcast i32 %v to float
1553 define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) {
1554 ; GFX6-LABEL: atomic_add_i32_1darray:
1555 ; GFX6: ; %bb.0: ; %main_body
1556 ; GFX6-NEXT: s_mov_b32 s0, s2
1557 ; GFX6-NEXT: s_mov_b32 s1, s3
1558 ; GFX6-NEXT: s_mov_b32 s2, s4
1559 ; GFX6-NEXT: s_mov_b32 s3, s5
1560 ; GFX6-NEXT: s_mov_b32 s4, s6
1561 ; GFX6-NEXT: s_mov_b32 s5, s7
1562 ; GFX6-NEXT: s_mov_b32 s6, s8
1563 ; GFX6-NEXT: s_mov_b32 s7, s9
1564 ; GFX6-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
1565 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1566 ; GFX6-NEXT: ; return to shader part epilog
1568 ; GFX8-LABEL: atomic_add_i32_1darray:
1569 ; GFX8: ; %bb.0: ; %main_body
1570 ; GFX8-NEXT: s_mov_b32 s0, s2
1571 ; GFX8-NEXT: s_mov_b32 s1, s3
1572 ; GFX8-NEXT: s_mov_b32 s2, s4
1573 ; GFX8-NEXT: s_mov_b32 s3, s5
1574 ; GFX8-NEXT: s_mov_b32 s4, s6
1575 ; GFX8-NEXT: s_mov_b32 s5, s7
1576 ; GFX8-NEXT: s_mov_b32 s6, s8
1577 ; GFX8-NEXT: s_mov_b32 s7, s9
1578 ; GFX8-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
1579 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1580 ; GFX8-NEXT: ; return to shader part epilog
1582 ; GFX900-LABEL: atomic_add_i32_1darray:
1583 ; GFX900: ; %bb.0: ; %main_body
1584 ; GFX900-NEXT: s_mov_b32 s0, s2
1585 ; GFX900-NEXT: s_mov_b32 s1, s3
1586 ; GFX900-NEXT: s_mov_b32 s2, s4
1587 ; GFX900-NEXT: s_mov_b32 s3, s5
1588 ; GFX900-NEXT: s_mov_b32 s4, s6
1589 ; GFX900-NEXT: s_mov_b32 s5, s7
1590 ; GFX900-NEXT: s_mov_b32 s6, s8
1591 ; GFX900-NEXT: s_mov_b32 s7, s9
1592 ; GFX900-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
1593 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1594 ; GFX900-NEXT: ; return to shader part epilog
1596 ; GFX90A-LABEL: atomic_add_i32_1darray:
1597 ; GFX90A: ; %bb.0: ; %main_body
1598 ; GFX90A-NEXT: s_mov_b32 s0, s2
1599 ; GFX90A-NEXT: s_mov_b32 s1, s3
1600 ; GFX90A-NEXT: s_mov_b32 s2, s4
1601 ; GFX90A-NEXT: s_mov_b32 s3, s5
1602 ; GFX90A-NEXT: s_mov_b32 s4, s6
1603 ; GFX90A-NEXT: s_mov_b32 s5, s7
1604 ; GFX90A-NEXT: s_mov_b32 s6, s8
1605 ; GFX90A-NEXT: s_mov_b32 s7, s9
1606 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1607 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1608 ; GFX90A-NEXT: image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc da
1609 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1610 ; GFX90A-NEXT: ; return to shader part epilog
1612 ; GFX10PLUS-LABEL: atomic_add_i32_1darray:
1613 ; GFX10PLUS: ; %bb.0: ; %main_body
1614 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1615 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1616 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1617 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1618 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1619 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1620 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1621 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1622 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc
1623 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1624 ; GFX10PLUS-NEXT: ; return to shader part epilog
1626 ; GFX12-LABEL: atomic_add_i32_1darray:
1627 ; GFX12: ; %bb.0: ; %main_body
1628 ; GFX12-NEXT: s_mov_b32 s0, s2
1629 ; GFX12-NEXT: s_mov_b32 s1, s3
1630 ; GFX12-NEXT: s_mov_b32 s2, s4
1631 ; GFX12-NEXT: s_mov_b32 s3, s5
1632 ; GFX12-NEXT: s_mov_b32 s4, s6
1633 ; GFX12-NEXT: s_mov_b32 s5, s7
1634 ; GFX12-NEXT: s_mov_b32 s6, s8
1635 ; GFX12-NEXT: s_mov_b32 s7, s9
1636 ; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY th:TH_ATOMIC_RETURN
1637 ; GFX12-NEXT: s_wait_loadcnt 0x0
1638 ; GFX12-NEXT: ; return to shader part epilog
1640 %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1641 %out = bitcast i32 %v to float
1645 define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) {
1646 ; GFX6-LABEL: atomic_add_i32_2darray:
1647 ; GFX6: ; %bb.0: ; %main_body
1648 ; GFX6-NEXT: s_mov_b32 s0, s2
1649 ; GFX6-NEXT: s_mov_b32 s1, s3
1650 ; GFX6-NEXT: s_mov_b32 s2, s4
1651 ; GFX6-NEXT: s_mov_b32 s3, s5
1652 ; GFX6-NEXT: s_mov_b32 s4, s6
1653 ; GFX6-NEXT: s_mov_b32 s5, s7
1654 ; GFX6-NEXT: s_mov_b32 s6, s8
1655 ; GFX6-NEXT: s_mov_b32 s7, s9
1656 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1657 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1658 ; GFX6-NEXT: ; return to shader part epilog
1660 ; GFX8-LABEL: atomic_add_i32_2darray:
1661 ; GFX8: ; %bb.0: ; %main_body
1662 ; GFX8-NEXT: s_mov_b32 s0, s2
1663 ; GFX8-NEXT: s_mov_b32 s1, s3
1664 ; GFX8-NEXT: s_mov_b32 s2, s4
1665 ; GFX8-NEXT: s_mov_b32 s3, s5
1666 ; GFX8-NEXT: s_mov_b32 s4, s6
1667 ; GFX8-NEXT: s_mov_b32 s5, s7
1668 ; GFX8-NEXT: s_mov_b32 s6, s8
1669 ; GFX8-NEXT: s_mov_b32 s7, s9
1670 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1671 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1672 ; GFX8-NEXT: ; return to shader part epilog
1674 ; GFX900-LABEL: atomic_add_i32_2darray:
1675 ; GFX900: ; %bb.0: ; %main_body
1676 ; GFX900-NEXT: s_mov_b32 s0, s2
1677 ; GFX900-NEXT: s_mov_b32 s1, s3
1678 ; GFX900-NEXT: s_mov_b32 s2, s4
1679 ; GFX900-NEXT: s_mov_b32 s3, s5
1680 ; GFX900-NEXT: s_mov_b32 s4, s6
1681 ; GFX900-NEXT: s_mov_b32 s5, s7
1682 ; GFX900-NEXT: s_mov_b32 s6, s8
1683 ; GFX900-NEXT: s_mov_b32 s7, s9
1684 ; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1685 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1686 ; GFX900-NEXT: ; return to shader part epilog
1688 ; GFX90A-LABEL: atomic_add_i32_2darray:
1689 ; GFX90A: ; %bb.0: ; %main_body
1690 ; GFX90A-NEXT: s_mov_b32 s0, s2
1691 ; GFX90A-NEXT: s_mov_b32 s1, s3
1692 ; GFX90A-NEXT: s_mov_b32 s2, s4
1693 ; GFX90A-NEXT: s_mov_b32 s3, s5
1694 ; GFX90A-NEXT: s_mov_b32 s4, s6
1695 ; GFX90A-NEXT: s_mov_b32 s5, s7
1696 ; GFX90A-NEXT: s_mov_b32 s6, s8
1697 ; GFX90A-NEXT: s_mov_b32 s7, s9
1698 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1699 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1700 ; GFX90A-NEXT: v_mov_b32_e32 v6, v3
1701 ; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da
1702 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1703 ; GFX90A-NEXT: ; return to shader part epilog
1705 ; GFX10PLUS-LABEL: atomic_add_i32_2darray:
1706 ; GFX10PLUS: ; %bb.0: ; %main_body
1707 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1708 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1709 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1710 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1711 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1712 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1713 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1714 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1715 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc
1716 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1717 ; GFX10PLUS-NEXT: ; return to shader part epilog
1719 ; GFX12-LABEL: atomic_add_i32_2darray:
1720 ; GFX12: ; %bb.0: ; %main_body
1721 ; GFX12-NEXT: s_mov_b32 s0, s2
1722 ; GFX12-NEXT: s_mov_b32 s1, s3
1723 ; GFX12-NEXT: s_mov_b32 s2, s4
1724 ; GFX12-NEXT: s_mov_b32 s3, s5
1725 ; GFX12-NEXT: s_mov_b32 s4, s6
1726 ; GFX12-NEXT: s_mov_b32 s5, s7
1727 ; GFX12-NEXT: s_mov_b32 s6, s8
1728 ; GFX12-NEXT: s_mov_b32 s7, s9
1729 ; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY th:TH_ATOMIC_RETURN
1730 ; GFX12-NEXT: s_wait_loadcnt 0x0
1731 ; GFX12-NEXT: ; return to shader part epilog
1733 %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1734 %out = bitcast i32 %v to float
1738 define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) {
1739 ; GFX6-LABEL: atomic_add_i32_2dmsaa:
1740 ; GFX6: ; %bb.0: ; %main_body
1741 ; GFX6-NEXT: s_mov_b32 s0, s2
1742 ; GFX6-NEXT: s_mov_b32 s1, s3
1743 ; GFX6-NEXT: s_mov_b32 s2, s4
1744 ; GFX6-NEXT: s_mov_b32 s3, s5
1745 ; GFX6-NEXT: s_mov_b32 s4, s6
1746 ; GFX6-NEXT: s_mov_b32 s5, s7
1747 ; GFX6-NEXT: s_mov_b32 s6, s8
1748 ; GFX6-NEXT: s_mov_b32 s7, s9
1749 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1750 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1751 ; GFX6-NEXT: ; return to shader part epilog
1753 ; GFX8-LABEL: atomic_add_i32_2dmsaa:
1754 ; GFX8: ; %bb.0: ; %main_body
1755 ; GFX8-NEXT: s_mov_b32 s0, s2
1756 ; GFX8-NEXT: s_mov_b32 s1, s3
1757 ; GFX8-NEXT: s_mov_b32 s2, s4
1758 ; GFX8-NEXT: s_mov_b32 s3, s5
1759 ; GFX8-NEXT: s_mov_b32 s4, s6
1760 ; GFX8-NEXT: s_mov_b32 s5, s7
1761 ; GFX8-NEXT: s_mov_b32 s6, s8
1762 ; GFX8-NEXT: s_mov_b32 s7, s9
1763 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1764 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1765 ; GFX8-NEXT: ; return to shader part epilog
1767 ; GFX900-LABEL: atomic_add_i32_2dmsaa:
1768 ; GFX900: ; %bb.0: ; %main_body
1769 ; GFX900-NEXT: s_mov_b32 s0, s2
1770 ; GFX900-NEXT: s_mov_b32 s1, s3
1771 ; GFX900-NEXT: s_mov_b32 s2, s4
1772 ; GFX900-NEXT: s_mov_b32 s3, s5
1773 ; GFX900-NEXT: s_mov_b32 s4, s6
1774 ; GFX900-NEXT: s_mov_b32 s5, s7
1775 ; GFX900-NEXT: s_mov_b32 s6, s8
1776 ; GFX900-NEXT: s_mov_b32 s7, s9
1777 ; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1778 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1779 ; GFX900-NEXT: ; return to shader part epilog
1781 ; GFX90A-LABEL: atomic_add_i32_2dmsaa:
1782 ; GFX90A: ; %bb.0: ; %main_body
1783 ; GFX90A-NEXT: s_mov_b32 s0, s2
1784 ; GFX90A-NEXT: s_mov_b32 s1, s3
1785 ; GFX90A-NEXT: s_mov_b32 s2, s4
1786 ; GFX90A-NEXT: s_mov_b32 s3, s5
1787 ; GFX90A-NEXT: s_mov_b32 s4, s6
1788 ; GFX90A-NEXT: s_mov_b32 s5, s7
1789 ; GFX90A-NEXT: s_mov_b32 s6, s8
1790 ; GFX90A-NEXT: s_mov_b32 s7, s9
1791 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1792 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1793 ; GFX90A-NEXT: v_mov_b32_e32 v6, v3
1794 ; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc
1795 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1796 ; GFX90A-NEXT: ; return to shader part epilog
1798 ; GFX10PLUS-LABEL: atomic_add_i32_2dmsaa:
1799 ; GFX10PLUS: ; %bb.0: ; %main_body
1800 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1801 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1802 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1803 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1804 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1805 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1806 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1807 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1808 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc
1809 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1810 ; GFX10PLUS-NEXT: ; return to shader part epilog
1812 ; GFX12-LABEL: atomic_add_i32_2dmsaa:
1813 ; GFX12: ; %bb.0: ; %main_body
1814 ; GFX12-NEXT: s_mov_b32 s0, s2
1815 ; GFX12-NEXT: s_mov_b32 s1, s3
1816 ; GFX12-NEXT: s_mov_b32 s2, s4
1817 ; GFX12-NEXT: s_mov_b32 s3, s5
1818 ; GFX12-NEXT: s_mov_b32 s4, s6
1819 ; GFX12-NEXT: s_mov_b32 s5, s7
1820 ; GFX12-NEXT: s_mov_b32 s6, s8
1821 ; GFX12-NEXT: s_mov_b32 s7, s9
1822 ; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA th:TH_ATOMIC_RETURN
1823 ; GFX12-NEXT: s_wait_loadcnt 0x0
1824 ; GFX12-NEXT: ; return to shader part epilog
1826 %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1827 %out = bitcast i32 %v to float
1831 define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
1832 ; GFX6-LABEL: atomic_add_i32_2darraymsaa:
1833 ; GFX6: ; %bb.0: ; %main_body
1834 ; GFX6-NEXT: s_mov_b32 s0, s2
1835 ; GFX6-NEXT: s_mov_b32 s1, s3
1836 ; GFX6-NEXT: s_mov_b32 s2, s4
1837 ; GFX6-NEXT: s_mov_b32 s3, s5
1838 ; GFX6-NEXT: s_mov_b32 s4, s6
1839 ; GFX6-NEXT: s_mov_b32 s5, s7
1840 ; GFX6-NEXT: s_mov_b32 s6, s8
1841 ; GFX6-NEXT: s_mov_b32 s7, s9
1842 ; GFX6-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
1843 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1844 ; GFX6-NEXT: ; return to shader part epilog
1846 ; GFX8-LABEL: atomic_add_i32_2darraymsaa:
1847 ; GFX8: ; %bb.0: ; %main_body
1848 ; GFX8-NEXT: s_mov_b32 s0, s2
1849 ; GFX8-NEXT: s_mov_b32 s1, s3
1850 ; GFX8-NEXT: s_mov_b32 s2, s4
1851 ; GFX8-NEXT: s_mov_b32 s3, s5
1852 ; GFX8-NEXT: s_mov_b32 s4, s6
1853 ; GFX8-NEXT: s_mov_b32 s5, s7
1854 ; GFX8-NEXT: s_mov_b32 s6, s8
1855 ; GFX8-NEXT: s_mov_b32 s7, s9
1856 ; GFX8-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
1857 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1858 ; GFX8-NEXT: ; return to shader part epilog
1860 ; GFX900-LABEL: atomic_add_i32_2darraymsaa:
1861 ; GFX900: ; %bb.0: ; %main_body
1862 ; GFX900-NEXT: s_mov_b32 s0, s2
1863 ; GFX900-NEXT: s_mov_b32 s1, s3
1864 ; GFX900-NEXT: s_mov_b32 s2, s4
1865 ; GFX900-NEXT: s_mov_b32 s3, s5
1866 ; GFX900-NEXT: s_mov_b32 s4, s6
1867 ; GFX900-NEXT: s_mov_b32 s5, s7
1868 ; GFX900-NEXT: s_mov_b32 s6, s8
1869 ; GFX900-NEXT: s_mov_b32 s7, s9
1870 ; GFX900-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
1871 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1872 ; GFX900-NEXT: ; return to shader part epilog
1874 ; GFX90A-LABEL: atomic_add_i32_2darraymsaa:
1875 ; GFX90A: ; %bb.0: ; %main_body
1876 ; GFX90A-NEXT: s_mov_b32 s0, s2
1877 ; GFX90A-NEXT: s_mov_b32 s1, s3
1878 ; GFX90A-NEXT: s_mov_b32 s2, s4
1879 ; GFX90A-NEXT: s_mov_b32 s3, s5
1880 ; GFX90A-NEXT: s_mov_b32 s4, s6
1881 ; GFX90A-NEXT: s_mov_b32 s5, s7
1882 ; GFX90A-NEXT: s_mov_b32 s6, s8
1883 ; GFX90A-NEXT: s_mov_b32 s7, s9
1884 ; GFX90A-NEXT: v_mov_b32_e32 v6, v1
1885 ; GFX90A-NEXT: v_mov_b32_e32 v7, v2
1886 ; GFX90A-NEXT: v_mov_b32_e32 v8, v3
1887 ; GFX90A-NEXT: v_mov_b32_e32 v9, v4
1888 ; GFX90A-NEXT: image_atomic_add v0, v[6:9], s[0:7] dmask:0x1 unorm glc da
1889 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1890 ; GFX90A-NEXT: ; return to shader part epilog
1892 ; GFX10PLUS-LABEL: atomic_add_i32_2darraymsaa:
1893 ; GFX10PLUS: ; %bb.0: ; %main_body
1894 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1895 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1896 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1897 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1898 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1899 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1900 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1901 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1902 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc
1903 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1904 ; GFX10PLUS-NEXT: ; return to shader part epilog
1906 ; GFX12-LABEL: atomic_add_i32_2darraymsaa:
1907 ; GFX12: ; %bb.0: ; %main_body
1908 ; GFX12-NEXT: s_mov_b32 s0, s2
1909 ; GFX12-NEXT: s_mov_b32 s1, s3
1910 ; GFX12-NEXT: s_mov_b32 s2, s4
1911 ; GFX12-NEXT: s_mov_b32 s3, s5
1912 ; GFX12-NEXT: s_mov_b32 s4, s6
1913 ; GFX12-NEXT: s_mov_b32 s5, s7
1914 ; GFX12-NEXT: s_mov_b32 s6, s8
1915 ; GFX12-NEXT: s_mov_b32 s7, s9
1916 ; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3, v4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY th:TH_ATOMIC_RETURN
1917 ; GFX12-NEXT: s_wait_loadcnt 0x0
1918 ; GFX12-NEXT: ; return to shader part epilog
1920 %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1921 %out = bitcast i32 %v to float
1925 define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
1926 ; GFX6-LABEL: atomic_add_i32_1d_slc:
1927 ; GFX6: ; %bb.0: ; %main_body
1928 ; GFX6-NEXT: s_mov_b32 s0, s2
1929 ; GFX6-NEXT: s_mov_b32 s1, s3
1930 ; GFX6-NEXT: s_mov_b32 s2, s4
1931 ; GFX6-NEXT: s_mov_b32 s3, s5
1932 ; GFX6-NEXT: s_mov_b32 s4, s6
1933 ; GFX6-NEXT: s_mov_b32 s5, s7
1934 ; GFX6-NEXT: s_mov_b32 s6, s8
1935 ; GFX6-NEXT: s_mov_b32 s7, s9
1936 ; GFX6-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1937 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1938 ; GFX6-NEXT: ; return to shader part epilog
1940 ; GFX8-LABEL: atomic_add_i32_1d_slc:
1941 ; GFX8: ; %bb.0: ; %main_body
1942 ; GFX8-NEXT: s_mov_b32 s0, s2
1943 ; GFX8-NEXT: s_mov_b32 s1, s3
1944 ; GFX8-NEXT: s_mov_b32 s2, s4
1945 ; GFX8-NEXT: s_mov_b32 s3, s5
1946 ; GFX8-NEXT: s_mov_b32 s4, s6
1947 ; GFX8-NEXT: s_mov_b32 s5, s7
1948 ; GFX8-NEXT: s_mov_b32 s6, s8
1949 ; GFX8-NEXT: s_mov_b32 s7, s9
1950 ; GFX8-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1951 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1952 ; GFX8-NEXT: ; return to shader part epilog
1954 ; GFX900-LABEL: atomic_add_i32_1d_slc:
1955 ; GFX900: ; %bb.0: ; %main_body
1956 ; GFX900-NEXT: s_mov_b32 s0, s2
1957 ; GFX900-NEXT: s_mov_b32 s1, s3
1958 ; GFX900-NEXT: s_mov_b32 s2, s4
1959 ; GFX900-NEXT: s_mov_b32 s3, s5
1960 ; GFX900-NEXT: s_mov_b32 s4, s6
1961 ; GFX900-NEXT: s_mov_b32 s5, s7
1962 ; GFX900-NEXT: s_mov_b32 s6, s8
1963 ; GFX900-NEXT: s_mov_b32 s7, s9
1964 ; GFX900-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1965 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1966 ; GFX900-NEXT: ; return to shader part epilog
1968 ; GFX90A-LABEL: atomic_add_i32_1d_slc:
1969 ; GFX90A: ; %bb.0: ; %main_body
1970 ; GFX90A-NEXT: s_mov_b32 s0, s2
1971 ; GFX90A-NEXT: s_mov_b32 s1, s3
1972 ; GFX90A-NEXT: s_mov_b32 s2, s4
1973 ; GFX90A-NEXT: s_mov_b32 s3, s5
1974 ; GFX90A-NEXT: s_mov_b32 s4, s6
1975 ; GFX90A-NEXT: s_mov_b32 s5, s7
1976 ; GFX90A-NEXT: s_mov_b32 s6, s8
1977 ; GFX90A-NEXT: s_mov_b32 s7, s9
1978 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
1979 ; GFX90A-NEXT: image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc slc
1980 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1981 ; GFX90A-NEXT: ; return to shader part epilog
1983 ; GFX10PLUS-LABEL: atomic_add_i32_1d_slc:
1984 ; GFX10PLUS: ; %bb.0: ; %main_body
1985 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1986 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1987 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1988 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1989 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1990 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1991 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1992 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1993 ; GFX10PLUS-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc
1994 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1995 ; GFX10PLUS-NEXT: ; return to shader part epilog
1997 ; GFX12-LABEL: atomic_add_i32_1d_slc:
1998 ; GFX12: ; %bb.0: ; %main_body
1999 ; GFX12-NEXT: s_mov_b32 s0, s2
2000 ; GFX12-NEXT: s_mov_b32 s1, s3
2001 ; GFX12-NEXT: s_mov_b32 s2, s4
2002 ; GFX12-NEXT: s_mov_b32 s3, s5
2003 ; GFX12-NEXT: s_mov_b32 s4, s6
2004 ; GFX12-NEXT: s_mov_b32 s5, s7
2005 ; GFX12-NEXT: s_mov_b32 s6, s8
2006 ; GFX12-NEXT: s_mov_b32 s7, s9
2007 ; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN
2008 ; GFX12-NEXT: s_wait_loadcnt 0x0
2009 ; GFX12-NEXT: ; return to shader part epilog
2011 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
2012 %out = bitcast i32 %v to float
2016 define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2017 ; GFX6-LABEL: atomic_swap_i64_1d:
2018 ; GFX6: ; %bb.0: ; %main_body
2019 ; GFX6-NEXT: s_mov_b32 s0, s2
2020 ; GFX6-NEXT: s_mov_b32 s1, s3
2021 ; GFX6-NEXT: s_mov_b32 s2, s4
2022 ; GFX6-NEXT: s_mov_b32 s3, s5
2023 ; GFX6-NEXT: s_mov_b32 s4, s6
2024 ; GFX6-NEXT: s_mov_b32 s5, s7
2025 ; GFX6-NEXT: s_mov_b32 s6, s8
2026 ; GFX6-NEXT: s_mov_b32 s7, s9
2027 ; GFX6-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2028 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2029 ; GFX6-NEXT: ; return to shader part epilog
2031 ; GFX8-LABEL: atomic_swap_i64_1d:
2032 ; GFX8: ; %bb.0: ; %main_body
2033 ; GFX8-NEXT: s_mov_b32 s0, s2
2034 ; GFX8-NEXT: s_mov_b32 s1, s3
2035 ; GFX8-NEXT: s_mov_b32 s2, s4
2036 ; GFX8-NEXT: s_mov_b32 s3, s5
2037 ; GFX8-NEXT: s_mov_b32 s4, s6
2038 ; GFX8-NEXT: s_mov_b32 s5, s7
2039 ; GFX8-NEXT: s_mov_b32 s6, s8
2040 ; GFX8-NEXT: s_mov_b32 s7, s9
2041 ; GFX8-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2042 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2043 ; GFX8-NEXT: ; return to shader part epilog
2045 ; GFX900-LABEL: atomic_swap_i64_1d:
2046 ; GFX900: ; %bb.0: ; %main_body
2047 ; GFX900-NEXT: s_mov_b32 s0, s2
2048 ; GFX900-NEXT: s_mov_b32 s1, s3
2049 ; GFX900-NEXT: s_mov_b32 s2, s4
2050 ; GFX900-NEXT: s_mov_b32 s3, s5
2051 ; GFX900-NEXT: s_mov_b32 s4, s6
2052 ; GFX900-NEXT: s_mov_b32 s5, s7
2053 ; GFX900-NEXT: s_mov_b32 s6, s8
2054 ; GFX900-NEXT: s_mov_b32 s7, s9
2055 ; GFX900-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2056 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2057 ; GFX900-NEXT: ; return to shader part epilog
2059 ; GFX90A-LABEL: atomic_swap_i64_1d:
2060 ; GFX90A: ; %bb.0: ; %main_body
2061 ; GFX90A-NEXT: s_mov_b32 s0, s2
2062 ; GFX90A-NEXT: s_mov_b32 s1, s3
2063 ; GFX90A-NEXT: s_mov_b32 s2, s4
2064 ; GFX90A-NEXT: s_mov_b32 s3, s5
2065 ; GFX90A-NEXT: s_mov_b32 s4, s6
2066 ; GFX90A-NEXT: s_mov_b32 s5, s7
2067 ; GFX90A-NEXT: s_mov_b32 s6, s8
2068 ; GFX90A-NEXT: s_mov_b32 s7, s9
2069 ; GFX90A-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2070 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2071 ; GFX90A-NEXT: ; return to shader part epilog
2073 ; GFX10PLUS-LABEL: atomic_swap_i64_1d:
2074 ; GFX10PLUS: ; %bb.0: ; %main_body
2075 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2076 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2077 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2078 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2079 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2080 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2081 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2082 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2083 ; GFX10PLUS-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2084 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2085 ; GFX10PLUS-NEXT: ; return to shader part epilog
2087 ; GFX12-LABEL: atomic_swap_i64_1d:
2088 ; GFX12: ; %bb.0: ; %main_body
2089 ; GFX12-NEXT: s_mov_b32 s0, s2
2090 ; GFX12-NEXT: s_mov_b32 s1, s3
2091 ; GFX12-NEXT: s_mov_b32 s2, s4
2092 ; GFX12-NEXT: s_mov_b32 s3, s5
2093 ; GFX12-NEXT: s_mov_b32 s4, s6
2094 ; GFX12-NEXT: s_mov_b32 s5, s7
2095 ; GFX12-NEXT: s_mov_b32 s6, s8
2096 ; GFX12-NEXT: s_mov_b32 s7, s9
2097 ; GFX12-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2098 ; GFX12-NEXT: s_wait_loadcnt 0x0
2099 ; GFX12-NEXT: ; return to shader part epilog
2101 %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2102 %out = bitcast i64 %v to <2 x float>
2103 ret <2 x float> %out
2106 define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2107 ; GFX6-LABEL: atomic_add_i64_1d:
2108 ; GFX6: ; %bb.0: ; %main_body
2109 ; GFX6-NEXT: s_mov_b32 s0, s2
2110 ; GFX6-NEXT: s_mov_b32 s1, s3
2111 ; GFX6-NEXT: s_mov_b32 s2, s4
2112 ; GFX6-NEXT: s_mov_b32 s3, s5
2113 ; GFX6-NEXT: s_mov_b32 s4, s6
2114 ; GFX6-NEXT: s_mov_b32 s5, s7
2115 ; GFX6-NEXT: s_mov_b32 s6, s8
2116 ; GFX6-NEXT: s_mov_b32 s7, s9
2117 ; GFX6-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2118 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2119 ; GFX6-NEXT: ; return to shader part epilog
2121 ; GFX8-LABEL: atomic_add_i64_1d:
2122 ; GFX8: ; %bb.0: ; %main_body
2123 ; GFX8-NEXT: s_mov_b32 s0, s2
2124 ; GFX8-NEXT: s_mov_b32 s1, s3
2125 ; GFX8-NEXT: s_mov_b32 s2, s4
2126 ; GFX8-NEXT: s_mov_b32 s3, s5
2127 ; GFX8-NEXT: s_mov_b32 s4, s6
2128 ; GFX8-NEXT: s_mov_b32 s5, s7
2129 ; GFX8-NEXT: s_mov_b32 s6, s8
2130 ; GFX8-NEXT: s_mov_b32 s7, s9
2131 ; GFX8-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2132 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2133 ; GFX8-NEXT: ; return to shader part epilog
2135 ; GFX900-LABEL: atomic_add_i64_1d:
2136 ; GFX900: ; %bb.0: ; %main_body
2137 ; GFX900-NEXT: s_mov_b32 s0, s2
2138 ; GFX900-NEXT: s_mov_b32 s1, s3
2139 ; GFX900-NEXT: s_mov_b32 s2, s4
2140 ; GFX900-NEXT: s_mov_b32 s3, s5
2141 ; GFX900-NEXT: s_mov_b32 s4, s6
2142 ; GFX900-NEXT: s_mov_b32 s5, s7
2143 ; GFX900-NEXT: s_mov_b32 s6, s8
2144 ; GFX900-NEXT: s_mov_b32 s7, s9
2145 ; GFX900-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2146 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2147 ; GFX900-NEXT: ; return to shader part epilog
2149 ; GFX90A-LABEL: atomic_add_i64_1d:
2150 ; GFX90A: ; %bb.0: ; %main_body
2151 ; GFX90A-NEXT: s_mov_b32 s0, s2
2152 ; GFX90A-NEXT: s_mov_b32 s1, s3
2153 ; GFX90A-NEXT: s_mov_b32 s2, s4
2154 ; GFX90A-NEXT: s_mov_b32 s3, s5
2155 ; GFX90A-NEXT: s_mov_b32 s4, s6
2156 ; GFX90A-NEXT: s_mov_b32 s5, s7
2157 ; GFX90A-NEXT: s_mov_b32 s6, s8
2158 ; GFX90A-NEXT: s_mov_b32 s7, s9
2159 ; GFX90A-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2160 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2161 ; GFX90A-NEXT: ; return to shader part epilog
2163 ; GFX10PLUS-LABEL: atomic_add_i64_1d:
2164 ; GFX10PLUS: ; %bb.0: ; %main_body
2165 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2166 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2167 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2168 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2169 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2170 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2171 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2172 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2173 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2174 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2175 ; GFX10PLUS-NEXT: ; return to shader part epilog
2177 ; GFX12-LABEL: atomic_add_i64_1d:
2178 ; GFX12: ; %bb.0: ; %main_body
2179 ; GFX12-NEXT: s_mov_b32 s0, s2
2180 ; GFX12-NEXT: s_mov_b32 s1, s3
2181 ; GFX12-NEXT: s_mov_b32 s2, s4
2182 ; GFX12-NEXT: s_mov_b32 s3, s5
2183 ; GFX12-NEXT: s_mov_b32 s4, s6
2184 ; GFX12-NEXT: s_mov_b32 s5, s7
2185 ; GFX12-NEXT: s_mov_b32 s6, s8
2186 ; GFX12-NEXT: s_mov_b32 s7, s9
2187 ; GFX12-NEXT: image_atomic_add_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2188 ; GFX12-NEXT: s_wait_loadcnt 0x0
2189 ; GFX12-NEXT: ; return to shader part epilog
2191 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2192 %out = bitcast i64 %v to <2 x float>
2193 ret <2 x float> %out
2196 define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2197 ; GFX6-LABEL: atomic_sub_i64_1d:
2198 ; GFX6: ; %bb.0: ; %main_body
2199 ; GFX6-NEXT: s_mov_b32 s0, s2
2200 ; GFX6-NEXT: s_mov_b32 s1, s3
2201 ; GFX6-NEXT: s_mov_b32 s2, s4
2202 ; GFX6-NEXT: s_mov_b32 s3, s5
2203 ; GFX6-NEXT: s_mov_b32 s4, s6
2204 ; GFX6-NEXT: s_mov_b32 s5, s7
2205 ; GFX6-NEXT: s_mov_b32 s6, s8
2206 ; GFX6-NEXT: s_mov_b32 s7, s9
2207 ; GFX6-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2208 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2209 ; GFX6-NEXT: ; return to shader part epilog
2211 ; GFX8-LABEL: atomic_sub_i64_1d:
2212 ; GFX8: ; %bb.0: ; %main_body
2213 ; GFX8-NEXT: s_mov_b32 s0, s2
2214 ; GFX8-NEXT: s_mov_b32 s1, s3
2215 ; GFX8-NEXT: s_mov_b32 s2, s4
2216 ; GFX8-NEXT: s_mov_b32 s3, s5
2217 ; GFX8-NEXT: s_mov_b32 s4, s6
2218 ; GFX8-NEXT: s_mov_b32 s5, s7
2219 ; GFX8-NEXT: s_mov_b32 s6, s8
2220 ; GFX8-NEXT: s_mov_b32 s7, s9
2221 ; GFX8-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2222 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2223 ; GFX8-NEXT: ; return to shader part epilog
2225 ; GFX900-LABEL: atomic_sub_i64_1d:
2226 ; GFX900: ; %bb.0: ; %main_body
2227 ; GFX900-NEXT: s_mov_b32 s0, s2
2228 ; GFX900-NEXT: s_mov_b32 s1, s3
2229 ; GFX900-NEXT: s_mov_b32 s2, s4
2230 ; GFX900-NEXT: s_mov_b32 s3, s5
2231 ; GFX900-NEXT: s_mov_b32 s4, s6
2232 ; GFX900-NEXT: s_mov_b32 s5, s7
2233 ; GFX900-NEXT: s_mov_b32 s6, s8
2234 ; GFX900-NEXT: s_mov_b32 s7, s9
2235 ; GFX900-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2236 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2237 ; GFX900-NEXT: ; return to shader part epilog
2239 ; GFX90A-LABEL: atomic_sub_i64_1d:
2240 ; GFX90A: ; %bb.0: ; %main_body
2241 ; GFX90A-NEXT: s_mov_b32 s0, s2
2242 ; GFX90A-NEXT: s_mov_b32 s1, s3
2243 ; GFX90A-NEXT: s_mov_b32 s2, s4
2244 ; GFX90A-NEXT: s_mov_b32 s3, s5
2245 ; GFX90A-NEXT: s_mov_b32 s4, s6
2246 ; GFX90A-NEXT: s_mov_b32 s5, s7
2247 ; GFX90A-NEXT: s_mov_b32 s6, s8
2248 ; GFX90A-NEXT: s_mov_b32 s7, s9
2249 ; GFX90A-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2250 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2251 ; GFX90A-NEXT: ; return to shader part epilog
2253 ; GFX10PLUS-LABEL: atomic_sub_i64_1d:
2254 ; GFX10PLUS: ; %bb.0: ; %main_body
2255 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2256 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2257 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2258 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2259 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2260 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2261 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2262 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2263 ; GFX10PLUS-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2264 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2265 ; GFX10PLUS-NEXT: ; return to shader part epilog
2267 ; GFX12-LABEL: atomic_sub_i64_1d:
2268 ; GFX12: ; %bb.0: ; %main_body
2269 ; GFX12-NEXT: s_mov_b32 s0, s2
2270 ; GFX12-NEXT: s_mov_b32 s1, s3
2271 ; GFX12-NEXT: s_mov_b32 s2, s4
2272 ; GFX12-NEXT: s_mov_b32 s3, s5
2273 ; GFX12-NEXT: s_mov_b32 s4, s6
2274 ; GFX12-NEXT: s_mov_b32 s5, s7
2275 ; GFX12-NEXT: s_mov_b32 s6, s8
2276 ; GFX12-NEXT: s_mov_b32 s7, s9
2277 ; GFX12-NEXT: image_atomic_sub_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2278 ; GFX12-NEXT: s_wait_loadcnt 0x0
2279 ; GFX12-NEXT: ; return to shader part epilog
2281 %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2282 %out = bitcast i64 %v to <2 x float>
2283 ret <2 x float> %out
2286 define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2287 ; GFX6-LABEL: atomic_smin_i64_1d:
2288 ; GFX6: ; %bb.0: ; %main_body
2289 ; GFX6-NEXT: s_mov_b32 s0, s2
2290 ; GFX6-NEXT: s_mov_b32 s1, s3
2291 ; GFX6-NEXT: s_mov_b32 s2, s4
2292 ; GFX6-NEXT: s_mov_b32 s3, s5
2293 ; GFX6-NEXT: s_mov_b32 s4, s6
2294 ; GFX6-NEXT: s_mov_b32 s5, s7
2295 ; GFX6-NEXT: s_mov_b32 s6, s8
2296 ; GFX6-NEXT: s_mov_b32 s7, s9
2297 ; GFX6-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2298 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2299 ; GFX6-NEXT: ; return to shader part epilog
2301 ; GFX8-LABEL: atomic_smin_i64_1d:
2302 ; GFX8: ; %bb.0: ; %main_body
2303 ; GFX8-NEXT: s_mov_b32 s0, s2
2304 ; GFX8-NEXT: s_mov_b32 s1, s3
2305 ; GFX8-NEXT: s_mov_b32 s2, s4
2306 ; GFX8-NEXT: s_mov_b32 s3, s5
2307 ; GFX8-NEXT: s_mov_b32 s4, s6
2308 ; GFX8-NEXT: s_mov_b32 s5, s7
2309 ; GFX8-NEXT: s_mov_b32 s6, s8
2310 ; GFX8-NEXT: s_mov_b32 s7, s9
2311 ; GFX8-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2312 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2313 ; GFX8-NEXT: ; return to shader part epilog
2315 ; GFX900-LABEL: atomic_smin_i64_1d:
2316 ; GFX900: ; %bb.0: ; %main_body
2317 ; GFX900-NEXT: s_mov_b32 s0, s2
2318 ; GFX900-NEXT: s_mov_b32 s1, s3
2319 ; GFX900-NEXT: s_mov_b32 s2, s4
2320 ; GFX900-NEXT: s_mov_b32 s3, s5
2321 ; GFX900-NEXT: s_mov_b32 s4, s6
2322 ; GFX900-NEXT: s_mov_b32 s5, s7
2323 ; GFX900-NEXT: s_mov_b32 s6, s8
2324 ; GFX900-NEXT: s_mov_b32 s7, s9
2325 ; GFX900-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2326 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2327 ; GFX900-NEXT: ; return to shader part epilog
2329 ; GFX90A-LABEL: atomic_smin_i64_1d:
2330 ; GFX90A: ; %bb.0: ; %main_body
2331 ; GFX90A-NEXT: s_mov_b32 s0, s2
2332 ; GFX90A-NEXT: s_mov_b32 s1, s3
2333 ; GFX90A-NEXT: s_mov_b32 s2, s4
2334 ; GFX90A-NEXT: s_mov_b32 s3, s5
2335 ; GFX90A-NEXT: s_mov_b32 s4, s6
2336 ; GFX90A-NEXT: s_mov_b32 s5, s7
2337 ; GFX90A-NEXT: s_mov_b32 s6, s8
2338 ; GFX90A-NEXT: s_mov_b32 s7, s9
2339 ; GFX90A-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2340 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2341 ; GFX90A-NEXT: ; return to shader part epilog
2343 ; GFX10PLUS-LABEL: atomic_smin_i64_1d:
2344 ; GFX10PLUS: ; %bb.0: ; %main_body
2345 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2346 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2347 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2348 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2349 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2350 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2351 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2352 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2353 ; GFX10PLUS-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2354 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2355 ; GFX10PLUS-NEXT: ; return to shader part epilog
2357 ; GFX12-LABEL: atomic_smin_i64_1d:
2358 ; GFX12: ; %bb.0: ; %main_body
2359 ; GFX12-NEXT: s_mov_b32 s0, s2
2360 ; GFX12-NEXT: s_mov_b32 s1, s3
2361 ; GFX12-NEXT: s_mov_b32 s2, s4
2362 ; GFX12-NEXT: s_mov_b32 s3, s5
2363 ; GFX12-NEXT: s_mov_b32 s4, s6
2364 ; GFX12-NEXT: s_mov_b32 s5, s7
2365 ; GFX12-NEXT: s_mov_b32 s6, s8
2366 ; GFX12-NEXT: s_mov_b32 s7, s9
2367 ; GFX12-NEXT: image_atomic_min_int v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2368 ; GFX12-NEXT: s_wait_loadcnt 0x0
2369 ; GFX12-NEXT: ; return to shader part epilog
2371 %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2372 %out = bitcast i64 %v to <2 x float>
2373 ret <2 x float> %out
2376 define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2377 ; GFX6-LABEL: atomic_umin_i64_1d:
2378 ; GFX6: ; %bb.0: ; %main_body
2379 ; GFX6-NEXT: s_mov_b32 s0, s2
2380 ; GFX6-NEXT: s_mov_b32 s1, s3
2381 ; GFX6-NEXT: s_mov_b32 s2, s4
2382 ; GFX6-NEXT: s_mov_b32 s3, s5
2383 ; GFX6-NEXT: s_mov_b32 s4, s6
2384 ; GFX6-NEXT: s_mov_b32 s5, s7
2385 ; GFX6-NEXT: s_mov_b32 s6, s8
2386 ; GFX6-NEXT: s_mov_b32 s7, s9
2387 ; GFX6-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2388 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2389 ; GFX6-NEXT: ; return to shader part epilog
2391 ; GFX8-LABEL: atomic_umin_i64_1d:
2392 ; GFX8: ; %bb.0: ; %main_body
2393 ; GFX8-NEXT: s_mov_b32 s0, s2
2394 ; GFX8-NEXT: s_mov_b32 s1, s3
2395 ; GFX8-NEXT: s_mov_b32 s2, s4
2396 ; GFX8-NEXT: s_mov_b32 s3, s5
2397 ; GFX8-NEXT: s_mov_b32 s4, s6
2398 ; GFX8-NEXT: s_mov_b32 s5, s7
2399 ; GFX8-NEXT: s_mov_b32 s6, s8
2400 ; GFX8-NEXT: s_mov_b32 s7, s9
2401 ; GFX8-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2402 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2403 ; GFX8-NEXT: ; return to shader part epilog
2405 ; GFX900-LABEL: atomic_umin_i64_1d:
2406 ; GFX900: ; %bb.0: ; %main_body
2407 ; GFX900-NEXT: s_mov_b32 s0, s2
2408 ; GFX900-NEXT: s_mov_b32 s1, s3
2409 ; GFX900-NEXT: s_mov_b32 s2, s4
2410 ; GFX900-NEXT: s_mov_b32 s3, s5
2411 ; GFX900-NEXT: s_mov_b32 s4, s6
2412 ; GFX900-NEXT: s_mov_b32 s5, s7
2413 ; GFX900-NEXT: s_mov_b32 s6, s8
2414 ; GFX900-NEXT: s_mov_b32 s7, s9
2415 ; GFX900-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2416 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2417 ; GFX900-NEXT: ; return to shader part epilog
2419 ; GFX90A-LABEL: atomic_umin_i64_1d:
2420 ; GFX90A: ; %bb.0: ; %main_body
2421 ; GFX90A-NEXT: s_mov_b32 s0, s2
2422 ; GFX90A-NEXT: s_mov_b32 s1, s3
2423 ; GFX90A-NEXT: s_mov_b32 s2, s4
2424 ; GFX90A-NEXT: s_mov_b32 s3, s5
2425 ; GFX90A-NEXT: s_mov_b32 s4, s6
2426 ; GFX90A-NEXT: s_mov_b32 s5, s7
2427 ; GFX90A-NEXT: s_mov_b32 s6, s8
2428 ; GFX90A-NEXT: s_mov_b32 s7, s9
2429 ; GFX90A-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2430 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2431 ; GFX90A-NEXT: ; return to shader part epilog
2433 ; GFX10PLUS-LABEL: atomic_umin_i64_1d:
2434 ; GFX10PLUS: ; %bb.0: ; %main_body
2435 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2436 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2437 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2438 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2439 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2440 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2441 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2442 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2443 ; GFX10PLUS-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2444 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2445 ; GFX10PLUS-NEXT: ; return to shader part epilog
2447 ; GFX12-LABEL: atomic_umin_i64_1d:
2448 ; GFX12: ; %bb.0: ; %main_body
2449 ; GFX12-NEXT: s_mov_b32 s0, s2
2450 ; GFX12-NEXT: s_mov_b32 s1, s3
2451 ; GFX12-NEXT: s_mov_b32 s2, s4
2452 ; GFX12-NEXT: s_mov_b32 s3, s5
2453 ; GFX12-NEXT: s_mov_b32 s4, s6
2454 ; GFX12-NEXT: s_mov_b32 s5, s7
2455 ; GFX12-NEXT: s_mov_b32 s6, s8
2456 ; GFX12-NEXT: s_mov_b32 s7, s9
2457 ; GFX12-NEXT: image_atomic_min_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2458 ; GFX12-NEXT: s_wait_loadcnt 0x0
2459 ; GFX12-NEXT: ; return to shader part epilog
2461 %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2462 %out = bitcast i64 %v to <2 x float>
2463 ret <2 x float> %out
2466 define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2467 ; GFX6-LABEL: atomic_smax_i64_1d:
2468 ; GFX6: ; %bb.0: ; %main_body
2469 ; GFX6-NEXT: s_mov_b32 s0, s2
2470 ; GFX6-NEXT: s_mov_b32 s1, s3
2471 ; GFX6-NEXT: s_mov_b32 s2, s4
2472 ; GFX6-NEXT: s_mov_b32 s3, s5
2473 ; GFX6-NEXT: s_mov_b32 s4, s6
2474 ; GFX6-NEXT: s_mov_b32 s5, s7
2475 ; GFX6-NEXT: s_mov_b32 s6, s8
2476 ; GFX6-NEXT: s_mov_b32 s7, s9
2477 ; GFX6-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2478 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2479 ; GFX6-NEXT: ; return to shader part epilog
2481 ; GFX8-LABEL: atomic_smax_i64_1d:
2482 ; GFX8: ; %bb.0: ; %main_body
2483 ; GFX8-NEXT: s_mov_b32 s0, s2
2484 ; GFX8-NEXT: s_mov_b32 s1, s3
2485 ; GFX8-NEXT: s_mov_b32 s2, s4
2486 ; GFX8-NEXT: s_mov_b32 s3, s5
2487 ; GFX8-NEXT: s_mov_b32 s4, s6
2488 ; GFX8-NEXT: s_mov_b32 s5, s7
2489 ; GFX8-NEXT: s_mov_b32 s6, s8
2490 ; GFX8-NEXT: s_mov_b32 s7, s9
2491 ; GFX8-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2492 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2493 ; GFX8-NEXT: ; return to shader part epilog
2495 ; GFX900-LABEL: atomic_smax_i64_1d:
2496 ; GFX900: ; %bb.0: ; %main_body
2497 ; GFX900-NEXT: s_mov_b32 s0, s2
2498 ; GFX900-NEXT: s_mov_b32 s1, s3
2499 ; GFX900-NEXT: s_mov_b32 s2, s4
2500 ; GFX900-NEXT: s_mov_b32 s3, s5
2501 ; GFX900-NEXT: s_mov_b32 s4, s6
2502 ; GFX900-NEXT: s_mov_b32 s5, s7
2503 ; GFX900-NEXT: s_mov_b32 s6, s8
2504 ; GFX900-NEXT: s_mov_b32 s7, s9
2505 ; GFX900-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2506 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2507 ; GFX900-NEXT: ; return to shader part epilog
2509 ; GFX90A-LABEL: atomic_smax_i64_1d:
2510 ; GFX90A: ; %bb.0: ; %main_body
2511 ; GFX90A-NEXT: s_mov_b32 s0, s2
2512 ; GFX90A-NEXT: s_mov_b32 s1, s3
2513 ; GFX90A-NEXT: s_mov_b32 s2, s4
2514 ; GFX90A-NEXT: s_mov_b32 s3, s5
2515 ; GFX90A-NEXT: s_mov_b32 s4, s6
2516 ; GFX90A-NEXT: s_mov_b32 s5, s7
2517 ; GFX90A-NEXT: s_mov_b32 s6, s8
2518 ; GFX90A-NEXT: s_mov_b32 s7, s9
2519 ; GFX90A-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2520 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2521 ; GFX90A-NEXT: ; return to shader part epilog
2523 ; GFX10PLUS-LABEL: atomic_smax_i64_1d:
2524 ; GFX10PLUS: ; %bb.0: ; %main_body
2525 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2526 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2527 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2528 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2529 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2530 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2531 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2532 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2533 ; GFX10PLUS-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2534 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2535 ; GFX10PLUS-NEXT: ; return to shader part epilog
2537 ; GFX12-LABEL: atomic_smax_i64_1d:
2538 ; GFX12: ; %bb.0: ; %main_body
2539 ; GFX12-NEXT: s_mov_b32 s0, s2
2540 ; GFX12-NEXT: s_mov_b32 s1, s3
2541 ; GFX12-NEXT: s_mov_b32 s2, s4
2542 ; GFX12-NEXT: s_mov_b32 s3, s5
2543 ; GFX12-NEXT: s_mov_b32 s4, s6
2544 ; GFX12-NEXT: s_mov_b32 s5, s7
2545 ; GFX12-NEXT: s_mov_b32 s6, s8
2546 ; GFX12-NEXT: s_mov_b32 s7, s9
2547 ; GFX12-NEXT: image_atomic_max_int v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2548 ; GFX12-NEXT: s_wait_loadcnt 0x0
2549 ; GFX12-NEXT: ; return to shader part epilog
2551 %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2552 %out = bitcast i64 %v to <2 x float>
2553 ret <2 x float> %out
2556 define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2557 ; GFX6-LABEL: atomic_umax_i64_1d:
2558 ; GFX6: ; %bb.0: ; %main_body
2559 ; GFX6-NEXT: s_mov_b32 s0, s2
2560 ; GFX6-NEXT: s_mov_b32 s1, s3
2561 ; GFX6-NEXT: s_mov_b32 s2, s4
2562 ; GFX6-NEXT: s_mov_b32 s3, s5
2563 ; GFX6-NEXT: s_mov_b32 s4, s6
2564 ; GFX6-NEXT: s_mov_b32 s5, s7
2565 ; GFX6-NEXT: s_mov_b32 s6, s8
2566 ; GFX6-NEXT: s_mov_b32 s7, s9
2567 ; GFX6-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2568 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2569 ; GFX6-NEXT: ; return to shader part epilog
2571 ; GFX8-LABEL: atomic_umax_i64_1d:
2572 ; GFX8: ; %bb.0: ; %main_body
2573 ; GFX8-NEXT: s_mov_b32 s0, s2
2574 ; GFX8-NEXT: s_mov_b32 s1, s3
2575 ; GFX8-NEXT: s_mov_b32 s2, s4
2576 ; GFX8-NEXT: s_mov_b32 s3, s5
2577 ; GFX8-NEXT: s_mov_b32 s4, s6
2578 ; GFX8-NEXT: s_mov_b32 s5, s7
2579 ; GFX8-NEXT: s_mov_b32 s6, s8
2580 ; GFX8-NEXT: s_mov_b32 s7, s9
2581 ; GFX8-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2582 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2583 ; GFX8-NEXT: ; return to shader part epilog
2585 ; GFX900-LABEL: atomic_umax_i64_1d:
2586 ; GFX900: ; %bb.0: ; %main_body
2587 ; GFX900-NEXT: s_mov_b32 s0, s2
2588 ; GFX900-NEXT: s_mov_b32 s1, s3
2589 ; GFX900-NEXT: s_mov_b32 s2, s4
2590 ; GFX900-NEXT: s_mov_b32 s3, s5
2591 ; GFX900-NEXT: s_mov_b32 s4, s6
2592 ; GFX900-NEXT: s_mov_b32 s5, s7
2593 ; GFX900-NEXT: s_mov_b32 s6, s8
2594 ; GFX900-NEXT: s_mov_b32 s7, s9
2595 ; GFX900-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2596 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2597 ; GFX900-NEXT: ; return to shader part epilog
2599 ; GFX90A-LABEL: atomic_umax_i64_1d:
2600 ; GFX90A: ; %bb.0: ; %main_body
2601 ; GFX90A-NEXT: s_mov_b32 s0, s2
2602 ; GFX90A-NEXT: s_mov_b32 s1, s3
2603 ; GFX90A-NEXT: s_mov_b32 s2, s4
2604 ; GFX90A-NEXT: s_mov_b32 s3, s5
2605 ; GFX90A-NEXT: s_mov_b32 s4, s6
2606 ; GFX90A-NEXT: s_mov_b32 s5, s7
2607 ; GFX90A-NEXT: s_mov_b32 s6, s8
2608 ; GFX90A-NEXT: s_mov_b32 s7, s9
2609 ; GFX90A-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2610 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2611 ; GFX90A-NEXT: ; return to shader part epilog
2613 ; GFX10PLUS-LABEL: atomic_umax_i64_1d:
2614 ; GFX10PLUS: ; %bb.0: ; %main_body
2615 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2616 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2617 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2618 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2619 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2620 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2621 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2622 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2623 ; GFX10PLUS-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2624 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2625 ; GFX10PLUS-NEXT: ; return to shader part epilog
2627 ; GFX12-LABEL: atomic_umax_i64_1d:
2628 ; GFX12: ; %bb.0: ; %main_body
2629 ; GFX12-NEXT: s_mov_b32 s0, s2
2630 ; GFX12-NEXT: s_mov_b32 s1, s3
2631 ; GFX12-NEXT: s_mov_b32 s2, s4
2632 ; GFX12-NEXT: s_mov_b32 s3, s5
2633 ; GFX12-NEXT: s_mov_b32 s4, s6
2634 ; GFX12-NEXT: s_mov_b32 s5, s7
2635 ; GFX12-NEXT: s_mov_b32 s6, s8
2636 ; GFX12-NEXT: s_mov_b32 s7, s9
2637 ; GFX12-NEXT: image_atomic_max_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2638 ; GFX12-NEXT: s_wait_loadcnt 0x0
2639 ; GFX12-NEXT: ; return to shader part epilog
2641 %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2642 %out = bitcast i64 %v to <2 x float>
2643 ret <2 x float> %out
2646 define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2647 ; GFX6-LABEL: atomic_and_i64_1d:
2648 ; GFX6: ; %bb.0: ; %main_body
2649 ; GFX6-NEXT: s_mov_b32 s0, s2
2650 ; GFX6-NEXT: s_mov_b32 s1, s3
2651 ; GFX6-NEXT: s_mov_b32 s2, s4
2652 ; GFX6-NEXT: s_mov_b32 s3, s5
2653 ; GFX6-NEXT: s_mov_b32 s4, s6
2654 ; GFX6-NEXT: s_mov_b32 s5, s7
2655 ; GFX6-NEXT: s_mov_b32 s6, s8
2656 ; GFX6-NEXT: s_mov_b32 s7, s9
2657 ; GFX6-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2658 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2659 ; GFX6-NEXT: ; return to shader part epilog
2661 ; GFX8-LABEL: atomic_and_i64_1d:
2662 ; GFX8: ; %bb.0: ; %main_body
2663 ; GFX8-NEXT: s_mov_b32 s0, s2
2664 ; GFX8-NEXT: s_mov_b32 s1, s3
2665 ; GFX8-NEXT: s_mov_b32 s2, s4
2666 ; GFX8-NEXT: s_mov_b32 s3, s5
2667 ; GFX8-NEXT: s_mov_b32 s4, s6
2668 ; GFX8-NEXT: s_mov_b32 s5, s7
2669 ; GFX8-NEXT: s_mov_b32 s6, s8
2670 ; GFX8-NEXT: s_mov_b32 s7, s9
2671 ; GFX8-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2672 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2673 ; GFX8-NEXT: ; return to shader part epilog
2675 ; GFX900-LABEL: atomic_and_i64_1d:
2676 ; GFX900: ; %bb.0: ; %main_body
2677 ; GFX900-NEXT: s_mov_b32 s0, s2
2678 ; GFX900-NEXT: s_mov_b32 s1, s3
2679 ; GFX900-NEXT: s_mov_b32 s2, s4
2680 ; GFX900-NEXT: s_mov_b32 s3, s5
2681 ; GFX900-NEXT: s_mov_b32 s4, s6
2682 ; GFX900-NEXT: s_mov_b32 s5, s7
2683 ; GFX900-NEXT: s_mov_b32 s6, s8
2684 ; GFX900-NEXT: s_mov_b32 s7, s9
2685 ; GFX900-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2686 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2687 ; GFX900-NEXT: ; return to shader part epilog
2689 ; GFX90A-LABEL: atomic_and_i64_1d:
2690 ; GFX90A: ; %bb.0: ; %main_body
2691 ; GFX90A-NEXT: s_mov_b32 s0, s2
2692 ; GFX90A-NEXT: s_mov_b32 s1, s3
2693 ; GFX90A-NEXT: s_mov_b32 s2, s4
2694 ; GFX90A-NEXT: s_mov_b32 s3, s5
2695 ; GFX90A-NEXT: s_mov_b32 s4, s6
2696 ; GFX90A-NEXT: s_mov_b32 s5, s7
2697 ; GFX90A-NEXT: s_mov_b32 s6, s8
2698 ; GFX90A-NEXT: s_mov_b32 s7, s9
2699 ; GFX90A-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2700 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2701 ; GFX90A-NEXT: ; return to shader part epilog
2703 ; GFX10PLUS-LABEL: atomic_and_i64_1d:
2704 ; GFX10PLUS: ; %bb.0: ; %main_body
2705 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2706 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2707 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2708 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2709 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2710 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2711 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2712 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2713 ; GFX10PLUS-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2714 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2715 ; GFX10PLUS-NEXT: ; return to shader part epilog
2717 ; GFX12-LABEL: atomic_and_i64_1d:
2718 ; GFX12: ; %bb.0: ; %main_body
2719 ; GFX12-NEXT: s_mov_b32 s0, s2
2720 ; GFX12-NEXT: s_mov_b32 s1, s3
2721 ; GFX12-NEXT: s_mov_b32 s2, s4
2722 ; GFX12-NEXT: s_mov_b32 s3, s5
2723 ; GFX12-NEXT: s_mov_b32 s4, s6
2724 ; GFX12-NEXT: s_mov_b32 s5, s7
2725 ; GFX12-NEXT: s_mov_b32 s6, s8
2726 ; GFX12-NEXT: s_mov_b32 s7, s9
2727 ; GFX12-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2728 ; GFX12-NEXT: s_wait_loadcnt 0x0
2729 ; GFX12-NEXT: ; return to shader part epilog
2731 %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2732 %out = bitcast i64 %v to <2 x float>
2733 ret <2 x float> %out
2736 define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2737 ; GFX6-LABEL: atomic_or_i64_1d:
2738 ; GFX6: ; %bb.0: ; %main_body
2739 ; GFX6-NEXT: s_mov_b32 s0, s2
2740 ; GFX6-NEXT: s_mov_b32 s1, s3
2741 ; GFX6-NEXT: s_mov_b32 s2, s4
2742 ; GFX6-NEXT: s_mov_b32 s3, s5
2743 ; GFX6-NEXT: s_mov_b32 s4, s6
2744 ; GFX6-NEXT: s_mov_b32 s5, s7
2745 ; GFX6-NEXT: s_mov_b32 s6, s8
2746 ; GFX6-NEXT: s_mov_b32 s7, s9
2747 ; GFX6-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2748 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2749 ; GFX6-NEXT: ; return to shader part epilog
2751 ; GFX8-LABEL: atomic_or_i64_1d:
2752 ; GFX8: ; %bb.0: ; %main_body
2753 ; GFX8-NEXT: s_mov_b32 s0, s2
2754 ; GFX8-NEXT: s_mov_b32 s1, s3
2755 ; GFX8-NEXT: s_mov_b32 s2, s4
2756 ; GFX8-NEXT: s_mov_b32 s3, s5
2757 ; GFX8-NEXT: s_mov_b32 s4, s6
2758 ; GFX8-NEXT: s_mov_b32 s5, s7
2759 ; GFX8-NEXT: s_mov_b32 s6, s8
2760 ; GFX8-NEXT: s_mov_b32 s7, s9
2761 ; GFX8-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2762 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2763 ; GFX8-NEXT: ; return to shader part epilog
2765 ; GFX900-LABEL: atomic_or_i64_1d:
2766 ; GFX900: ; %bb.0: ; %main_body
2767 ; GFX900-NEXT: s_mov_b32 s0, s2
2768 ; GFX900-NEXT: s_mov_b32 s1, s3
2769 ; GFX900-NEXT: s_mov_b32 s2, s4
2770 ; GFX900-NEXT: s_mov_b32 s3, s5
2771 ; GFX900-NEXT: s_mov_b32 s4, s6
2772 ; GFX900-NEXT: s_mov_b32 s5, s7
2773 ; GFX900-NEXT: s_mov_b32 s6, s8
2774 ; GFX900-NEXT: s_mov_b32 s7, s9
2775 ; GFX900-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2776 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2777 ; GFX900-NEXT: ; return to shader part epilog
2779 ; GFX90A-LABEL: atomic_or_i64_1d:
2780 ; GFX90A: ; %bb.0: ; %main_body
2781 ; GFX90A-NEXT: s_mov_b32 s0, s2
2782 ; GFX90A-NEXT: s_mov_b32 s1, s3
2783 ; GFX90A-NEXT: s_mov_b32 s2, s4
2784 ; GFX90A-NEXT: s_mov_b32 s3, s5
2785 ; GFX90A-NEXT: s_mov_b32 s4, s6
2786 ; GFX90A-NEXT: s_mov_b32 s5, s7
2787 ; GFX90A-NEXT: s_mov_b32 s6, s8
2788 ; GFX90A-NEXT: s_mov_b32 s7, s9
2789 ; GFX90A-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2790 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2791 ; GFX90A-NEXT: ; return to shader part epilog
2793 ; GFX10PLUS-LABEL: atomic_or_i64_1d:
2794 ; GFX10PLUS: ; %bb.0: ; %main_body
2795 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2796 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2797 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2798 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2799 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2800 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2801 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2802 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2803 ; GFX10PLUS-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2804 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2805 ; GFX10PLUS-NEXT: ; return to shader part epilog
2807 ; GFX12-LABEL: atomic_or_i64_1d:
2808 ; GFX12: ; %bb.0: ; %main_body
2809 ; GFX12-NEXT: s_mov_b32 s0, s2
2810 ; GFX12-NEXT: s_mov_b32 s1, s3
2811 ; GFX12-NEXT: s_mov_b32 s2, s4
2812 ; GFX12-NEXT: s_mov_b32 s3, s5
2813 ; GFX12-NEXT: s_mov_b32 s4, s6
2814 ; GFX12-NEXT: s_mov_b32 s5, s7
2815 ; GFX12-NEXT: s_mov_b32 s6, s8
2816 ; GFX12-NEXT: s_mov_b32 s7, s9
2817 ; GFX12-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2818 ; GFX12-NEXT: s_wait_loadcnt 0x0
2819 ; GFX12-NEXT: ; return to shader part epilog
2821 %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2822 %out = bitcast i64 %v to <2 x float>
2823 ret <2 x float> %out
2826 define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2827 ; GFX6-LABEL: atomic_xor_i64_1d:
2828 ; GFX6: ; %bb.0: ; %main_body
2829 ; GFX6-NEXT: s_mov_b32 s0, s2
2830 ; GFX6-NEXT: s_mov_b32 s1, s3
2831 ; GFX6-NEXT: s_mov_b32 s2, s4
2832 ; GFX6-NEXT: s_mov_b32 s3, s5
2833 ; GFX6-NEXT: s_mov_b32 s4, s6
2834 ; GFX6-NEXT: s_mov_b32 s5, s7
2835 ; GFX6-NEXT: s_mov_b32 s6, s8
2836 ; GFX6-NEXT: s_mov_b32 s7, s9
2837 ; GFX6-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2838 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2839 ; GFX6-NEXT: ; return to shader part epilog
2841 ; GFX8-LABEL: atomic_xor_i64_1d:
2842 ; GFX8: ; %bb.0: ; %main_body
2843 ; GFX8-NEXT: s_mov_b32 s0, s2
2844 ; GFX8-NEXT: s_mov_b32 s1, s3
2845 ; GFX8-NEXT: s_mov_b32 s2, s4
2846 ; GFX8-NEXT: s_mov_b32 s3, s5
2847 ; GFX8-NEXT: s_mov_b32 s4, s6
2848 ; GFX8-NEXT: s_mov_b32 s5, s7
2849 ; GFX8-NEXT: s_mov_b32 s6, s8
2850 ; GFX8-NEXT: s_mov_b32 s7, s9
2851 ; GFX8-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2852 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2853 ; GFX8-NEXT: ; return to shader part epilog
2855 ; GFX900-LABEL: atomic_xor_i64_1d:
2856 ; GFX900: ; %bb.0: ; %main_body
2857 ; GFX900-NEXT: s_mov_b32 s0, s2
2858 ; GFX900-NEXT: s_mov_b32 s1, s3
2859 ; GFX900-NEXT: s_mov_b32 s2, s4
2860 ; GFX900-NEXT: s_mov_b32 s3, s5
2861 ; GFX900-NEXT: s_mov_b32 s4, s6
2862 ; GFX900-NEXT: s_mov_b32 s5, s7
2863 ; GFX900-NEXT: s_mov_b32 s6, s8
2864 ; GFX900-NEXT: s_mov_b32 s7, s9
2865 ; GFX900-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2866 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2867 ; GFX900-NEXT: ; return to shader part epilog
2869 ; GFX90A-LABEL: atomic_xor_i64_1d:
2870 ; GFX90A: ; %bb.0: ; %main_body
2871 ; GFX90A-NEXT: s_mov_b32 s0, s2
2872 ; GFX90A-NEXT: s_mov_b32 s1, s3
2873 ; GFX90A-NEXT: s_mov_b32 s2, s4
2874 ; GFX90A-NEXT: s_mov_b32 s3, s5
2875 ; GFX90A-NEXT: s_mov_b32 s4, s6
2876 ; GFX90A-NEXT: s_mov_b32 s5, s7
2877 ; GFX90A-NEXT: s_mov_b32 s6, s8
2878 ; GFX90A-NEXT: s_mov_b32 s7, s9
2879 ; GFX90A-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2880 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2881 ; GFX90A-NEXT: ; return to shader part epilog
2883 ; GFX10PLUS-LABEL: atomic_xor_i64_1d:
2884 ; GFX10PLUS: ; %bb.0: ; %main_body
2885 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2886 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2887 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2888 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2889 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2890 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2891 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2892 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2893 ; GFX10PLUS-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2894 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2895 ; GFX10PLUS-NEXT: ; return to shader part epilog
2897 ; GFX12-LABEL: atomic_xor_i64_1d:
2898 ; GFX12: ; %bb.0: ; %main_body
2899 ; GFX12-NEXT: s_mov_b32 s0, s2
2900 ; GFX12-NEXT: s_mov_b32 s1, s3
2901 ; GFX12-NEXT: s_mov_b32 s2, s4
2902 ; GFX12-NEXT: s_mov_b32 s3, s5
2903 ; GFX12-NEXT: s_mov_b32 s4, s6
2904 ; GFX12-NEXT: s_mov_b32 s5, s7
2905 ; GFX12-NEXT: s_mov_b32 s6, s8
2906 ; GFX12-NEXT: s_mov_b32 s7, s9
2907 ; GFX12-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2908 ; GFX12-NEXT: s_wait_loadcnt 0x0
2909 ; GFX12-NEXT: ; return to shader part epilog
2911 %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2912 %out = bitcast i64 %v to <2 x float>
2913 ret <2 x float> %out
2916 define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2917 ; GFX6-LABEL: atomic_inc_i64_1d:
2918 ; GFX6: ; %bb.0: ; %main_body
2919 ; GFX6-NEXT: s_mov_b32 s0, s2
2920 ; GFX6-NEXT: s_mov_b32 s1, s3
2921 ; GFX6-NEXT: s_mov_b32 s2, s4
2922 ; GFX6-NEXT: s_mov_b32 s3, s5
2923 ; GFX6-NEXT: s_mov_b32 s4, s6
2924 ; GFX6-NEXT: s_mov_b32 s5, s7
2925 ; GFX6-NEXT: s_mov_b32 s6, s8
2926 ; GFX6-NEXT: s_mov_b32 s7, s9
2927 ; GFX6-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2928 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2929 ; GFX6-NEXT: ; return to shader part epilog
2931 ; GFX8-LABEL: atomic_inc_i64_1d:
2932 ; GFX8: ; %bb.0: ; %main_body
2933 ; GFX8-NEXT: s_mov_b32 s0, s2
2934 ; GFX8-NEXT: s_mov_b32 s1, s3
2935 ; GFX8-NEXT: s_mov_b32 s2, s4
2936 ; GFX8-NEXT: s_mov_b32 s3, s5
2937 ; GFX8-NEXT: s_mov_b32 s4, s6
2938 ; GFX8-NEXT: s_mov_b32 s5, s7
2939 ; GFX8-NEXT: s_mov_b32 s6, s8
2940 ; GFX8-NEXT: s_mov_b32 s7, s9
2941 ; GFX8-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2942 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2943 ; GFX8-NEXT: ; return to shader part epilog
2945 ; GFX900-LABEL: atomic_inc_i64_1d:
2946 ; GFX900: ; %bb.0: ; %main_body
2947 ; GFX900-NEXT: s_mov_b32 s0, s2
2948 ; GFX900-NEXT: s_mov_b32 s1, s3
2949 ; GFX900-NEXT: s_mov_b32 s2, s4
2950 ; GFX900-NEXT: s_mov_b32 s3, s5
2951 ; GFX900-NEXT: s_mov_b32 s4, s6
2952 ; GFX900-NEXT: s_mov_b32 s5, s7
2953 ; GFX900-NEXT: s_mov_b32 s6, s8
2954 ; GFX900-NEXT: s_mov_b32 s7, s9
2955 ; GFX900-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2956 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2957 ; GFX900-NEXT: ; return to shader part epilog
2959 ; GFX90A-LABEL: atomic_inc_i64_1d:
2960 ; GFX90A: ; %bb.0: ; %main_body
2961 ; GFX90A-NEXT: s_mov_b32 s0, s2
2962 ; GFX90A-NEXT: s_mov_b32 s1, s3
2963 ; GFX90A-NEXT: s_mov_b32 s2, s4
2964 ; GFX90A-NEXT: s_mov_b32 s3, s5
2965 ; GFX90A-NEXT: s_mov_b32 s4, s6
2966 ; GFX90A-NEXT: s_mov_b32 s5, s7
2967 ; GFX90A-NEXT: s_mov_b32 s6, s8
2968 ; GFX90A-NEXT: s_mov_b32 s7, s9
2969 ; GFX90A-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2970 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2971 ; GFX90A-NEXT: ; return to shader part epilog
2973 ; GFX10PLUS-LABEL: atomic_inc_i64_1d:
2974 ; GFX10PLUS: ; %bb.0: ; %main_body
2975 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2976 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2977 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2978 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2979 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2980 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2981 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2982 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2983 ; GFX10PLUS-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2984 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2985 ; GFX10PLUS-NEXT: ; return to shader part epilog
2987 ; GFX12-LABEL: atomic_inc_i64_1d:
2988 ; GFX12: ; %bb.0: ; %main_body
2989 ; GFX12-NEXT: s_mov_b32 s0, s2
2990 ; GFX12-NEXT: s_mov_b32 s1, s3
2991 ; GFX12-NEXT: s_mov_b32 s2, s4
2992 ; GFX12-NEXT: s_mov_b32 s3, s5
2993 ; GFX12-NEXT: s_mov_b32 s4, s6
2994 ; GFX12-NEXT: s_mov_b32 s5, s7
2995 ; GFX12-NEXT: s_mov_b32 s6, s8
2996 ; GFX12-NEXT: s_mov_b32 s7, s9
2997 ; GFX12-NEXT: image_atomic_inc_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2998 ; GFX12-NEXT: s_wait_loadcnt 0x0
2999 ; GFX12-NEXT: ; return to shader part epilog
3001 %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
3002 %out = bitcast i64 %v to <2 x float>
3003 ret <2 x float> %out
3006 define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
3007 ; GFX6-LABEL: atomic_dec_i64_1d:
3008 ; GFX6: ; %bb.0: ; %main_body
3009 ; GFX6-NEXT: s_mov_b32 s0, s2
3010 ; GFX6-NEXT: s_mov_b32 s1, s3
3011 ; GFX6-NEXT: s_mov_b32 s2, s4
3012 ; GFX6-NEXT: s_mov_b32 s3, s5
3013 ; GFX6-NEXT: s_mov_b32 s4, s6
3014 ; GFX6-NEXT: s_mov_b32 s5, s7
3015 ; GFX6-NEXT: s_mov_b32 s6, s8
3016 ; GFX6-NEXT: s_mov_b32 s7, s9
3017 ; GFX6-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
3018 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3019 ; GFX6-NEXT: ; return to shader part epilog
3021 ; GFX8-LABEL: atomic_dec_i64_1d:
3022 ; GFX8: ; %bb.0: ; %main_body
3023 ; GFX8-NEXT: s_mov_b32 s0, s2
3024 ; GFX8-NEXT: s_mov_b32 s1, s3
3025 ; GFX8-NEXT: s_mov_b32 s2, s4
3026 ; GFX8-NEXT: s_mov_b32 s3, s5
3027 ; GFX8-NEXT: s_mov_b32 s4, s6
3028 ; GFX8-NEXT: s_mov_b32 s5, s7
3029 ; GFX8-NEXT: s_mov_b32 s6, s8
3030 ; GFX8-NEXT: s_mov_b32 s7, s9
3031 ; GFX8-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
3032 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3033 ; GFX8-NEXT: ; return to shader part epilog
3035 ; GFX900-LABEL: atomic_dec_i64_1d:
3036 ; GFX900: ; %bb.0: ; %main_body
3037 ; GFX900-NEXT: s_mov_b32 s0, s2
3038 ; GFX900-NEXT: s_mov_b32 s1, s3
3039 ; GFX900-NEXT: s_mov_b32 s2, s4
3040 ; GFX900-NEXT: s_mov_b32 s3, s5
3041 ; GFX900-NEXT: s_mov_b32 s4, s6
3042 ; GFX900-NEXT: s_mov_b32 s5, s7
3043 ; GFX900-NEXT: s_mov_b32 s6, s8
3044 ; GFX900-NEXT: s_mov_b32 s7, s9
3045 ; GFX900-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
3046 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3047 ; GFX900-NEXT: ; return to shader part epilog
3049 ; GFX90A-LABEL: atomic_dec_i64_1d:
3050 ; GFX90A: ; %bb.0: ; %main_body
3051 ; GFX90A-NEXT: s_mov_b32 s0, s2
3052 ; GFX90A-NEXT: s_mov_b32 s1, s3
3053 ; GFX90A-NEXT: s_mov_b32 s2, s4
3054 ; GFX90A-NEXT: s_mov_b32 s3, s5
3055 ; GFX90A-NEXT: s_mov_b32 s4, s6
3056 ; GFX90A-NEXT: s_mov_b32 s5, s7
3057 ; GFX90A-NEXT: s_mov_b32 s6, s8
3058 ; GFX90A-NEXT: s_mov_b32 s7, s9
3059 ; GFX90A-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
3060 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3061 ; GFX90A-NEXT: ; return to shader part epilog
3063 ; GFX10PLUS-LABEL: atomic_dec_i64_1d:
3064 ; GFX10PLUS: ; %bb.0: ; %main_body
3065 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3066 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3067 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3068 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3069 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3070 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3071 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3072 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3073 ; GFX10PLUS-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
3074 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3075 ; GFX10PLUS-NEXT: ; return to shader part epilog
3077 ; GFX12-LABEL: atomic_dec_i64_1d:
3078 ; GFX12: ; %bb.0: ; %main_body
3079 ; GFX12-NEXT: s_mov_b32 s0, s2
3080 ; GFX12-NEXT: s_mov_b32 s1, s3
3081 ; GFX12-NEXT: s_mov_b32 s2, s4
3082 ; GFX12-NEXT: s_mov_b32 s3, s5
3083 ; GFX12-NEXT: s_mov_b32 s4, s6
3084 ; GFX12-NEXT: s_mov_b32 s5, s7
3085 ; GFX12-NEXT: s_mov_b32 s6, s8
3086 ; GFX12-NEXT: s_mov_b32 s7, s9
3087 ; GFX12-NEXT: image_atomic_dec_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
3088 ; GFX12-NEXT: s_wait_loadcnt 0x0
3089 ; GFX12-NEXT: ; return to shader part epilog
3091 %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
3092 %out = bitcast i64 %v to <2 x float>
3093 ret <2 x float> %out
3096 define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
3097 ; GFX6-LABEL: atomic_cmpswap_i64_1d:
3098 ; GFX6: ; %bb.0: ; %main_body
3099 ; GFX6-NEXT: s_mov_b32 s0, s2
3100 ; GFX6-NEXT: s_mov_b32 s1, s3
3101 ; GFX6-NEXT: s_mov_b32 s2, s4
3102 ; GFX6-NEXT: s_mov_b32 s3, s5
3103 ; GFX6-NEXT: s_mov_b32 s4, s6
3104 ; GFX6-NEXT: s_mov_b32 s5, s7
3105 ; GFX6-NEXT: s_mov_b32 s6, s8
3106 ; GFX6-NEXT: s_mov_b32 s7, s9
3107 ; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3108 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3109 ; GFX6-NEXT: ; return to shader part epilog
3111 ; GFX8-LABEL: atomic_cmpswap_i64_1d:
3112 ; GFX8: ; %bb.0: ; %main_body
3113 ; GFX8-NEXT: s_mov_b32 s0, s2
3114 ; GFX8-NEXT: s_mov_b32 s1, s3
3115 ; GFX8-NEXT: s_mov_b32 s2, s4
3116 ; GFX8-NEXT: s_mov_b32 s3, s5
3117 ; GFX8-NEXT: s_mov_b32 s4, s6
3118 ; GFX8-NEXT: s_mov_b32 s5, s7
3119 ; GFX8-NEXT: s_mov_b32 s6, s8
3120 ; GFX8-NEXT: s_mov_b32 s7, s9
3121 ; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3122 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3123 ; GFX8-NEXT: ; return to shader part epilog
3125 ; GFX900-LABEL: atomic_cmpswap_i64_1d:
3126 ; GFX900: ; %bb.0: ; %main_body
3127 ; GFX900-NEXT: s_mov_b32 s0, s2
3128 ; GFX900-NEXT: s_mov_b32 s1, s3
3129 ; GFX900-NEXT: s_mov_b32 s2, s4
3130 ; GFX900-NEXT: s_mov_b32 s3, s5
3131 ; GFX900-NEXT: s_mov_b32 s4, s6
3132 ; GFX900-NEXT: s_mov_b32 s5, s7
3133 ; GFX900-NEXT: s_mov_b32 s6, s8
3134 ; GFX900-NEXT: s_mov_b32 s7, s9
3135 ; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3136 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3137 ; GFX900-NEXT: ; return to shader part epilog
3139 ; GFX90A-LABEL: atomic_cmpswap_i64_1d:
3140 ; GFX90A: ; %bb.0: ; %main_body
3141 ; GFX90A-NEXT: s_mov_b32 s0, s2
3142 ; GFX90A-NEXT: s_mov_b32 s1, s3
3143 ; GFX90A-NEXT: s_mov_b32 s2, s4
3144 ; GFX90A-NEXT: s_mov_b32 s3, s5
3145 ; GFX90A-NEXT: s_mov_b32 s4, s6
3146 ; GFX90A-NEXT: s_mov_b32 s5, s7
3147 ; GFX90A-NEXT: s_mov_b32 s6, s8
3148 ; GFX90A-NEXT: s_mov_b32 s7, s9
3149 ; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3150 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3151 ; GFX90A-NEXT: ; return to shader part epilog
3153 ; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d:
3154 ; GFX10PLUS: ; %bb.0: ; %main_body
3155 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3156 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3157 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3158 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3159 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3160 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3161 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3162 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3163 ; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
3164 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3165 ; GFX10PLUS-NEXT: ; return to shader part epilog
3167 ; GFX12-LABEL: atomic_cmpswap_i64_1d:
3168 ; GFX12: ; %bb.0: ; %main_body
3169 ; GFX12-NEXT: s_mov_b32 s0, s2
3170 ; GFX12-NEXT: s_mov_b32 s1, s3
3171 ; GFX12-NEXT: s_mov_b32 s2, s4
3172 ; GFX12-NEXT: s_mov_b32 s3, s5
3173 ; GFX12-NEXT: s_mov_b32 s4, s6
3174 ; GFX12-NEXT: s_mov_b32 s5, s7
3175 ; GFX12-NEXT: s_mov_b32 s6, s8
3176 ; GFX12-NEXT: s_mov_b32 s7, s9
3177 ; GFX12-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
3178 ; GFX12-NEXT: s_wait_loadcnt 0x0
3179 ; GFX12-NEXT: ; return to shader part epilog
3181 %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
3182 %out = bitcast i64 %v to <2 x float>
3183 ret <2 x float> %out
3186 define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
3187 ; GFX6-LABEL: atomic_cmpswap_i64_1d_no_return:
3188 ; GFX6: ; %bb.0: ; %main_body
3189 ; GFX6-NEXT: s_mov_b32 s0, s2
3190 ; GFX6-NEXT: s_mov_b32 s1, s3
3191 ; GFX6-NEXT: s_mov_b32 s2, s4
3192 ; GFX6-NEXT: s_mov_b32 s3, s5
3193 ; GFX6-NEXT: s_mov_b32 s4, s6
3194 ; GFX6-NEXT: s_mov_b32 s5, s7
3195 ; GFX6-NEXT: s_mov_b32 s6, s8
3196 ; GFX6-NEXT: s_mov_b32 s7, s9
3197 ; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3198 ; GFX6-NEXT: s_endpgm
3200 ; GFX8-LABEL: atomic_cmpswap_i64_1d_no_return:
3201 ; GFX8: ; %bb.0: ; %main_body
3202 ; GFX8-NEXT: s_mov_b32 s0, s2
3203 ; GFX8-NEXT: s_mov_b32 s1, s3
3204 ; GFX8-NEXT: s_mov_b32 s2, s4
3205 ; GFX8-NEXT: s_mov_b32 s3, s5
3206 ; GFX8-NEXT: s_mov_b32 s4, s6
3207 ; GFX8-NEXT: s_mov_b32 s5, s7
3208 ; GFX8-NEXT: s_mov_b32 s6, s8
3209 ; GFX8-NEXT: s_mov_b32 s7, s9
3210 ; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3211 ; GFX8-NEXT: s_endpgm
3213 ; GFX900-LABEL: atomic_cmpswap_i64_1d_no_return:
3214 ; GFX900: ; %bb.0: ; %main_body
3215 ; GFX900-NEXT: s_mov_b32 s0, s2
3216 ; GFX900-NEXT: s_mov_b32 s1, s3
3217 ; GFX900-NEXT: s_mov_b32 s2, s4
3218 ; GFX900-NEXT: s_mov_b32 s3, s5
3219 ; GFX900-NEXT: s_mov_b32 s4, s6
3220 ; GFX900-NEXT: s_mov_b32 s5, s7
3221 ; GFX900-NEXT: s_mov_b32 s6, s8
3222 ; GFX900-NEXT: s_mov_b32 s7, s9
3223 ; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3224 ; GFX900-NEXT: s_endpgm
3226 ; GFX90A-LABEL: atomic_cmpswap_i64_1d_no_return:
3227 ; GFX90A: ; %bb.0: ; %main_body
3228 ; GFX90A-NEXT: s_mov_b32 s0, s2
3229 ; GFX90A-NEXT: s_mov_b32 s1, s3
3230 ; GFX90A-NEXT: s_mov_b32 s2, s4
3231 ; GFX90A-NEXT: s_mov_b32 s3, s5
3232 ; GFX90A-NEXT: s_mov_b32 s4, s6
3233 ; GFX90A-NEXT: s_mov_b32 s5, s7
3234 ; GFX90A-NEXT: s_mov_b32 s6, s8
3235 ; GFX90A-NEXT: s_mov_b32 s7, s9
3236 ; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3237 ; GFX90A-NEXT: s_endpgm
3239 ; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d_no_return:
3240 ; GFX10PLUS: ; %bb.0: ; %main_body
3241 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3242 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3243 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3244 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3245 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3246 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3247 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3248 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3249 ; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
3250 ; GFX10PLUS-NEXT: s_endpgm
3252 ; GFX12-LABEL: atomic_cmpswap_i64_1d_no_return:
3253 ; GFX12: ; %bb.0: ; %main_body
3254 ; GFX12-NEXT: s_mov_b32 s0, s2
3255 ; GFX12-NEXT: s_mov_b32 s1, s3
3256 ; GFX12-NEXT: s_mov_b32 s2, s4
3257 ; GFX12-NEXT: s_mov_b32 s3, s5
3258 ; GFX12-NEXT: s_mov_b32 s4, s6
3259 ; GFX12-NEXT: s_mov_b32 s5, s7
3260 ; GFX12-NEXT: s_mov_b32 s6, s8
3261 ; GFX12-NEXT: s_mov_b32 s7, s9
3262 ; GFX12-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
3263 ; GFX12-NEXT: s_endpgm
3265 %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
3269 define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t) {
3270 ; GFX6-LABEL: atomic_add_i64_2d:
3271 ; GFX6: ; %bb.0: ; %main_body
3272 ; GFX6-NEXT: s_mov_b32 s0, s2
3273 ; GFX6-NEXT: s_mov_b32 s1, s3
3274 ; GFX6-NEXT: s_mov_b32 s2, s4
3275 ; GFX6-NEXT: s_mov_b32 s3, s5
3276 ; GFX6-NEXT: s_mov_b32 s4, s6
3277 ; GFX6-NEXT: s_mov_b32 s5, s7
3278 ; GFX6-NEXT: s_mov_b32 s6, s8
3279 ; GFX6-NEXT: s_mov_b32 s7, s9
3280 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
3281 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3282 ; GFX6-NEXT: ; return to shader part epilog
3284 ; GFX8-LABEL: atomic_add_i64_2d:
3285 ; GFX8: ; %bb.0: ; %main_body
3286 ; GFX8-NEXT: s_mov_b32 s0, s2
3287 ; GFX8-NEXT: s_mov_b32 s1, s3
3288 ; GFX8-NEXT: s_mov_b32 s2, s4
3289 ; GFX8-NEXT: s_mov_b32 s3, s5
3290 ; GFX8-NEXT: s_mov_b32 s4, s6
3291 ; GFX8-NEXT: s_mov_b32 s5, s7
3292 ; GFX8-NEXT: s_mov_b32 s6, s8
3293 ; GFX8-NEXT: s_mov_b32 s7, s9
3294 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
3295 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3296 ; GFX8-NEXT: ; return to shader part epilog
3298 ; GFX900-LABEL: atomic_add_i64_2d:
3299 ; GFX900: ; %bb.0: ; %main_body
3300 ; GFX900-NEXT: s_mov_b32 s0, s2
3301 ; GFX900-NEXT: s_mov_b32 s1, s3
3302 ; GFX900-NEXT: s_mov_b32 s2, s4
3303 ; GFX900-NEXT: s_mov_b32 s3, s5
3304 ; GFX900-NEXT: s_mov_b32 s4, s6
3305 ; GFX900-NEXT: s_mov_b32 s5, s7
3306 ; GFX900-NEXT: s_mov_b32 s6, s8
3307 ; GFX900-NEXT: s_mov_b32 s7, s9
3308 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
3309 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3310 ; GFX900-NEXT: ; return to shader part epilog
3312 ; GFX90A-LABEL: atomic_add_i64_2d:
3313 ; GFX90A: ; %bb.0: ; %main_body
3314 ; GFX90A-NEXT: s_mov_b32 s0, s2
3315 ; GFX90A-NEXT: s_mov_b32 s1, s3
3316 ; GFX90A-NEXT: s_mov_b32 s2, s4
3317 ; GFX90A-NEXT: s_mov_b32 s3, s5
3318 ; GFX90A-NEXT: s_mov_b32 s4, s6
3319 ; GFX90A-NEXT: s_mov_b32 s5, s7
3320 ; GFX90A-NEXT: s_mov_b32 s6, s8
3321 ; GFX90A-NEXT: s_mov_b32 s7, s9
3322 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
3323 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3324 ; GFX90A-NEXT: ; return to shader part epilog
3326 ; GFX10PLUS-LABEL: atomic_add_i64_2d:
3327 ; GFX10PLUS: ; %bb.0: ; %main_body
3328 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3329 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3330 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3331 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3332 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3333 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3334 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3335 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3336 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc
3337 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3338 ; GFX10PLUS-NEXT: ; return to shader part epilog
3340 ; GFX12-LABEL: atomic_add_i64_2d:
3341 ; GFX12: ; %bb.0: ; %main_body
3342 ; GFX12-NEXT: s_mov_b32 s0, s2
3343 ; GFX12-NEXT: s_mov_b32 s1, s3
3344 ; GFX12-NEXT: s_mov_b32 s2, s4
3345 ; GFX12-NEXT: s_mov_b32 s3, s5
3346 ; GFX12-NEXT: s_mov_b32 s4, s6
3347 ; GFX12-NEXT: s_mov_b32 s5, s7
3348 ; GFX12-NEXT: s_mov_b32 s6, s8
3349 ; GFX12-NEXT: s_mov_b32 s7, s9
3350 ; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D th:TH_ATOMIC_RETURN
3351 ; GFX12-NEXT: s_wait_loadcnt 0x0
3352 ; GFX12-NEXT: ; return to shader part epilog
3354 %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
3355 %out = bitcast i64 %v to <2 x float>
3356 ret <2 x float> %out
3359 define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %r) {
3360 ; GFX6-LABEL: atomic_add_i64_3d:
3361 ; GFX6: ; %bb.0: ; %main_body
3362 ; GFX6-NEXT: s_mov_b32 s0, s2
3363 ; GFX6-NEXT: s_mov_b32 s1, s3
3364 ; GFX6-NEXT: s_mov_b32 s2, s4
3365 ; GFX6-NEXT: s_mov_b32 s3, s5
3366 ; GFX6-NEXT: s_mov_b32 s4, s6
3367 ; GFX6-NEXT: s_mov_b32 s5, s7
3368 ; GFX6-NEXT: s_mov_b32 s6, s8
3369 ; GFX6-NEXT: s_mov_b32 s7, s9
3370 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3371 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3372 ; GFX6-NEXT: ; return to shader part epilog
3374 ; GFX8-LABEL: atomic_add_i64_3d:
3375 ; GFX8: ; %bb.0: ; %main_body
3376 ; GFX8-NEXT: s_mov_b32 s0, s2
3377 ; GFX8-NEXT: s_mov_b32 s1, s3
3378 ; GFX8-NEXT: s_mov_b32 s2, s4
3379 ; GFX8-NEXT: s_mov_b32 s3, s5
3380 ; GFX8-NEXT: s_mov_b32 s4, s6
3381 ; GFX8-NEXT: s_mov_b32 s5, s7
3382 ; GFX8-NEXT: s_mov_b32 s6, s8
3383 ; GFX8-NEXT: s_mov_b32 s7, s9
3384 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3385 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3386 ; GFX8-NEXT: ; return to shader part epilog
3388 ; GFX900-LABEL: atomic_add_i64_3d:
3389 ; GFX900: ; %bb.0: ; %main_body
3390 ; GFX900-NEXT: s_mov_b32 s0, s2
3391 ; GFX900-NEXT: s_mov_b32 s1, s3
3392 ; GFX900-NEXT: s_mov_b32 s2, s4
3393 ; GFX900-NEXT: s_mov_b32 s3, s5
3394 ; GFX900-NEXT: s_mov_b32 s4, s6
3395 ; GFX900-NEXT: s_mov_b32 s5, s7
3396 ; GFX900-NEXT: s_mov_b32 s6, s8
3397 ; GFX900-NEXT: s_mov_b32 s7, s9
3398 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3399 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3400 ; GFX900-NEXT: ; return to shader part epilog
3402 ; GFX90A-LABEL: atomic_add_i64_3d:
3403 ; GFX90A: ; %bb.0: ; %main_body
3404 ; GFX90A-NEXT: s_mov_b32 s0, s2
3405 ; GFX90A-NEXT: s_mov_b32 s1, s3
3406 ; GFX90A-NEXT: s_mov_b32 s2, s4
3407 ; GFX90A-NEXT: s_mov_b32 s3, s5
3408 ; GFX90A-NEXT: s_mov_b32 s4, s6
3409 ; GFX90A-NEXT: s_mov_b32 s5, s7
3410 ; GFX90A-NEXT: s_mov_b32 s6, s8
3411 ; GFX90A-NEXT: s_mov_b32 s7, s9
3412 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3413 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3414 ; GFX90A-NEXT: ; return to shader part epilog
3416 ; GFX10PLUS-LABEL: atomic_add_i64_3d:
3417 ; GFX10PLUS: ; %bb.0: ; %main_body
3418 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3419 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3420 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3421 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3422 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3423 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3424 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3425 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3426 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc
3427 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3428 ; GFX10PLUS-NEXT: ; return to shader part epilog
3430 ; GFX12-LABEL: atomic_add_i64_3d:
3431 ; GFX12: ; %bb.0: ; %main_body
3432 ; GFX12-NEXT: s_mov_b32 s0, s2
3433 ; GFX12-NEXT: s_mov_b32 s1, s3
3434 ; GFX12-NEXT: s_mov_b32 s2, s4
3435 ; GFX12-NEXT: s_mov_b32 s3, s5
3436 ; GFX12-NEXT: s_mov_b32 s4, s6
3437 ; GFX12-NEXT: s_mov_b32 s5, s7
3438 ; GFX12-NEXT: s_mov_b32 s6, s8
3439 ; GFX12-NEXT: s_mov_b32 s7, s9
3440 ; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D th:TH_ATOMIC_RETURN
3441 ; GFX12-NEXT: s_wait_loadcnt 0x0
3442 ; GFX12-NEXT: ; return to shader part epilog
3444 %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
3445 %out = bitcast i64 %v to <2 x float>
3446 ret <2 x float> %out
3449 define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %face) {
3450 ; GFX6-LABEL: atomic_add_i64_cube:
3451 ; GFX6: ; %bb.0: ; %main_body
3452 ; GFX6-NEXT: s_mov_b32 s0, s2
3453 ; GFX6-NEXT: s_mov_b32 s1, s3
3454 ; GFX6-NEXT: s_mov_b32 s2, s4
3455 ; GFX6-NEXT: s_mov_b32 s3, s5
3456 ; GFX6-NEXT: s_mov_b32 s4, s6
3457 ; GFX6-NEXT: s_mov_b32 s5, s7
3458 ; GFX6-NEXT: s_mov_b32 s6, s8
3459 ; GFX6-NEXT: s_mov_b32 s7, s9
3460 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3461 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3462 ; GFX6-NEXT: ; return to shader part epilog
3464 ; GFX8-LABEL: atomic_add_i64_cube:
3465 ; GFX8: ; %bb.0: ; %main_body
3466 ; GFX8-NEXT: s_mov_b32 s0, s2
3467 ; GFX8-NEXT: s_mov_b32 s1, s3
3468 ; GFX8-NEXT: s_mov_b32 s2, s4
3469 ; GFX8-NEXT: s_mov_b32 s3, s5
3470 ; GFX8-NEXT: s_mov_b32 s4, s6
3471 ; GFX8-NEXT: s_mov_b32 s5, s7
3472 ; GFX8-NEXT: s_mov_b32 s6, s8
3473 ; GFX8-NEXT: s_mov_b32 s7, s9
3474 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3475 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3476 ; GFX8-NEXT: ; return to shader part epilog
3478 ; GFX900-LABEL: atomic_add_i64_cube:
3479 ; GFX900: ; %bb.0: ; %main_body
3480 ; GFX900-NEXT: s_mov_b32 s0, s2
3481 ; GFX900-NEXT: s_mov_b32 s1, s3
3482 ; GFX900-NEXT: s_mov_b32 s2, s4
3483 ; GFX900-NEXT: s_mov_b32 s3, s5
3484 ; GFX900-NEXT: s_mov_b32 s4, s6
3485 ; GFX900-NEXT: s_mov_b32 s5, s7
3486 ; GFX900-NEXT: s_mov_b32 s6, s8
3487 ; GFX900-NEXT: s_mov_b32 s7, s9
3488 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3489 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3490 ; GFX900-NEXT: ; return to shader part epilog
3492 ; GFX90A-LABEL: atomic_add_i64_cube:
3493 ; GFX90A: ; %bb.0: ; %main_body
3494 ; GFX90A-NEXT: s_mov_b32 s0, s2
3495 ; GFX90A-NEXT: s_mov_b32 s1, s3
3496 ; GFX90A-NEXT: s_mov_b32 s2, s4
3497 ; GFX90A-NEXT: s_mov_b32 s3, s5
3498 ; GFX90A-NEXT: s_mov_b32 s4, s6
3499 ; GFX90A-NEXT: s_mov_b32 s5, s7
3500 ; GFX90A-NEXT: s_mov_b32 s6, s8
3501 ; GFX90A-NEXT: s_mov_b32 s7, s9
3502 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3503 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3504 ; GFX90A-NEXT: ; return to shader part epilog
3506 ; GFX10PLUS-LABEL: atomic_add_i64_cube:
3507 ; GFX10PLUS: ; %bb.0: ; %main_body
3508 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3509 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3510 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3511 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3512 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3513 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3514 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3515 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3516 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc
3517 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3518 ; GFX10PLUS-NEXT: ; return to shader part epilog
3520 ; GFX12-LABEL: atomic_add_i64_cube:
3521 ; GFX12: ; %bb.0: ; %main_body
3522 ; GFX12-NEXT: s_mov_b32 s0, s2
3523 ; GFX12-NEXT: s_mov_b32 s1, s3
3524 ; GFX12-NEXT: s_mov_b32 s2, s4
3525 ; GFX12-NEXT: s_mov_b32 s3, s5
3526 ; GFX12-NEXT: s_mov_b32 s4, s6
3527 ; GFX12-NEXT: s_mov_b32 s5, s7
3528 ; GFX12-NEXT: s_mov_b32 s6, s8
3529 ; GFX12-NEXT: s_mov_b32 s7, s9
3530 ; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE th:TH_ATOMIC_RETURN
3531 ; GFX12-NEXT: s_wait_loadcnt 0x0
3532 ; GFX12-NEXT: ; return to shader part epilog
3534 %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
3535 %out = bitcast i64 %v to <2 x float>
3536 ret <2 x float> %out
3539 define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %slice) {
3540 ; GFX6-LABEL: atomic_add_i64_1darray:
3541 ; GFX6: ; %bb.0: ; %main_body
3542 ; GFX6-NEXT: s_mov_b32 s0, s2
3543 ; GFX6-NEXT: s_mov_b32 s1, s3
3544 ; GFX6-NEXT: s_mov_b32 s2, s4
3545 ; GFX6-NEXT: s_mov_b32 s3, s5
3546 ; GFX6-NEXT: s_mov_b32 s4, s6
3547 ; GFX6-NEXT: s_mov_b32 s5, s7
3548 ; GFX6-NEXT: s_mov_b32 s6, s8
3549 ; GFX6-NEXT: s_mov_b32 s7, s9
3550 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3551 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3552 ; GFX6-NEXT: ; return to shader part epilog
3554 ; GFX8-LABEL: atomic_add_i64_1darray:
3555 ; GFX8: ; %bb.0: ; %main_body
3556 ; GFX8-NEXT: s_mov_b32 s0, s2
3557 ; GFX8-NEXT: s_mov_b32 s1, s3
3558 ; GFX8-NEXT: s_mov_b32 s2, s4
3559 ; GFX8-NEXT: s_mov_b32 s3, s5
3560 ; GFX8-NEXT: s_mov_b32 s4, s6
3561 ; GFX8-NEXT: s_mov_b32 s5, s7
3562 ; GFX8-NEXT: s_mov_b32 s6, s8
3563 ; GFX8-NEXT: s_mov_b32 s7, s9
3564 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3565 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3566 ; GFX8-NEXT: ; return to shader part epilog
3568 ; GFX900-LABEL: atomic_add_i64_1darray:
3569 ; GFX900: ; %bb.0: ; %main_body
3570 ; GFX900-NEXT: s_mov_b32 s0, s2
3571 ; GFX900-NEXT: s_mov_b32 s1, s3
3572 ; GFX900-NEXT: s_mov_b32 s2, s4
3573 ; GFX900-NEXT: s_mov_b32 s3, s5
3574 ; GFX900-NEXT: s_mov_b32 s4, s6
3575 ; GFX900-NEXT: s_mov_b32 s5, s7
3576 ; GFX900-NEXT: s_mov_b32 s6, s8
3577 ; GFX900-NEXT: s_mov_b32 s7, s9
3578 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3579 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3580 ; GFX900-NEXT: ; return to shader part epilog
3582 ; GFX90A-LABEL: atomic_add_i64_1darray:
3583 ; GFX90A: ; %bb.0: ; %main_body
3584 ; GFX90A-NEXT: s_mov_b32 s0, s2
3585 ; GFX90A-NEXT: s_mov_b32 s1, s3
3586 ; GFX90A-NEXT: s_mov_b32 s2, s4
3587 ; GFX90A-NEXT: s_mov_b32 s3, s5
3588 ; GFX90A-NEXT: s_mov_b32 s4, s6
3589 ; GFX90A-NEXT: s_mov_b32 s5, s7
3590 ; GFX90A-NEXT: s_mov_b32 s6, s8
3591 ; GFX90A-NEXT: s_mov_b32 s7, s9
3592 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3593 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3594 ; GFX90A-NEXT: ; return to shader part epilog
3596 ; GFX10PLUS-LABEL: atomic_add_i64_1darray:
3597 ; GFX10PLUS: ; %bb.0: ; %main_body
3598 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3599 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3600 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3601 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3602 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3603 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3604 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3605 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3606 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc
3607 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3608 ; GFX10PLUS-NEXT: ; return to shader part epilog
3610 ; GFX12-LABEL: atomic_add_i64_1darray:
3611 ; GFX12: ; %bb.0: ; %main_body
3612 ; GFX12-NEXT: s_mov_b32 s0, s2
3613 ; GFX12-NEXT: s_mov_b32 s1, s3
3614 ; GFX12-NEXT: s_mov_b32 s2, s4
3615 ; GFX12-NEXT: s_mov_b32 s3, s5
3616 ; GFX12-NEXT: s_mov_b32 s4, s6
3617 ; GFX12-NEXT: s_mov_b32 s5, s7
3618 ; GFX12-NEXT: s_mov_b32 s6, s8
3619 ; GFX12-NEXT: s_mov_b32 s7, s9
3620 ; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY th:TH_ATOMIC_RETURN
3621 ; GFX12-NEXT: s_wait_loadcnt 0x0
3622 ; GFX12-NEXT: ; return to shader part epilog
3624 %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
3625 %out = bitcast i64 %v to <2 x float>
3626 ret <2 x float> %out
3629 define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice) {
3630 ; GFX6-LABEL: atomic_add_i64_2darray:
3631 ; GFX6: ; %bb.0: ; %main_body
3632 ; GFX6-NEXT: s_mov_b32 s0, s2
3633 ; GFX6-NEXT: s_mov_b32 s1, s3
3634 ; GFX6-NEXT: s_mov_b32 s2, s4
3635 ; GFX6-NEXT: s_mov_b32 s3, s5
3636 ; GFX6-NEXT: s_mov_b32 s4, s6
3637 ; GFX6-NEXT: s_mov_b32 s5, s7
3638 ; GFX6-NEXT: s_mov_b32 s6, s8
3639 ; GFX6-NEXT: s_mov_b32 s7, s9
3640 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3641 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3642 ; GFX6-NEXT: ; return to shader part epilog
3644 ; GFX8-LABEL: atomic_add_i64_2darray:
3645 ; GFX8: ; %bb.0: ; %main_body
3646 ; GFX8-NEXT: s_mov_b32 s0, s2
3647 ; GFX8-NEXT: s_mov_b32 s1, s3
3648 ; GFX8-NEXT: s_mov_b32 s2, s4
3649 ; GFX8-NEXT: s_mov_b32 s3, s5
3650 ; GFX8-NEXT: s_mov_b32 s4, s6
3651 ; GFX8-NEXT: s_mov_b32 s5, s7
3652 ; GFX8-NEXT: s_mov_b32 s6, s8
3653 ; GFX8-NEXT: s_mov_b32 s7, s9
3654 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3655 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3656 ; GFX8-NEXT: ; return to shader part epilog
3658 ; GFX900-LABEL: atomic_add_i64_2darray:
3659 ; GFX900: ; %bb.0: ; %main_body
3660 ; GFX900-NEXT: s_mov_b32 s0, s2
3661 ; GFX900-NEXT: s_mov_b32 s1, s3
3662 ; GFX900-NEXT: s_mov_b32 s2, s4
3663 ; GFX900-NEXT: s_mov_b32 s3, s5
3664 ; GFX900-NEXT: s_mov_b32 s4, s6
3665 ; GFX900-NEXT: s_mov_b32 s5, s7
3666 ; GFX900-NEXT: s_mov_b32 s6, s8
3667 ; GFX900-NEXT: s_mov_b32 s7, s9
3668 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3669 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3670 ; GFX900-NEXT: ; return to shader part epilog
3672 ; GFX90A-LABEL: atomic_add_i64_2darray:
3673 ; GFX90A: ; %bb.0: ; %main_body
3674 ; GFX90A-NEXT: s_mov_b32 s0, s2
3675 ; GFX90A-NEXT: s_mov_b32 s1, s3
3676 ; GFX90A-NEXT: s_mov_b32 s2, s4
3677 ; GFX90A-NEXT: s_mov_b32 s3, s5
3678 ; GFX90A-NEXT: s_mov_b32 s4, s6
3679 ; GFX90A-NEXT: s_mov_b32 s5, s7
3680 ; GFX90A-NEXT: s_mov_b32 s6, s8
3681 ; GFX90A-NEXT: s_mov_b32 s7, s9
3682 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3683 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3684 ; GFX90A-NEXT: ; return to shader part epilog
3686 ; GFX10PLUS-LABEL: atomic_add_i64_2darray:
3687 ; GFX10PLUS: ; %bb.0: ; %main_body
3688 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3689 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3690 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3691 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3692 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3693 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3694 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3695 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3696 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc
3697 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3698 ; GFX10PLUS-NEXT: ; return to shader part epilog
3700 ; GFX12-LABEL: atomic_add_i64_2darray:
3701 ; GFX12: ; %bb.0: ; %main_body
3702 ; GFX12-NEXT: s_mov_b32 s0, s2
3703 ; GFX12-NEXT: s_mov_b32 s1, s3
3704 ; GFX12-NEXT: s_mov_b32 s2, s4
3705 ; GFX12-NEXT: s_mov_b32 s3, s5
3706 ; GFX12-NEXT: s_mov_b32 s4, s6
3707 ; GFX12-NEXT: s_mov_b32 s5, s7
3708 ; GFX12-NEXT: s_mov_b32 s6, s8
3709 ; GFX12-NEXT: s_mov_b32 s7, s9
3710 ; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY th:TH_ATOMIC_RETURN
3711 ; GFX12-NEXT: s_wait_loadcnt 0x0
3712 ; GFX12-NEXT: ; return to shader part epilog
3714 %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
3715 %out = bitcast i64 %v to <2 x float>
3716 ret <2 x float> %out
3719 define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %fragid) {
3720 ; GFX6-LABEL: atomic_add_i64_2dmsaa:
3721 ; GFX6: ; %bb.0: ; %main_body
3722 ; GFX6-NEXT: s_mov_b32 s0, s2
3723 ; GFX6-NEXT: s_mov_b32 s1, s3
3724 ; GFX6-NEXT: s_mov_b32 s2, s4
3725 ; GFX6-NEXT: s_mov_b32 s3, s5
3726 ; GFX6-NEXT: s_mov_b32 s4, s6
3727 ; GFX6-NEXT: s_mov_b32 s5, s7
3728 ; GFX6-NEXT: s_mov_b32 s6, s8
3729 ; GFX6-NEXT: s_mov_b32 s7, s9
3730 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3731 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3732 ; GFX6-NEXT: ; return to shader part epilog
3734 ; GFX8-LABEL: atomic_add_i64_2dmsaa:
3735 ; GFX8: ; %bb.0: ; %main_body
3736 ; GFX8-NEXT: s_mov_b32 s0, s2
3737 ; GFX8-NEXT: s_mov_b32 s1, s3
3738 ; GFX8-NEXT: s_mov_b32 s2, s4
3739 ; GFX8-NEXT: s_mov_b32 s3, s5
3740 ; GFX8-NEXT: s_mov_b32 s4, s6
3741 ; GFX8-NEXT: s_mov_b32 s5, s7
3742 ; GFX8-NEXT: s_mov_b32 s6, s8
3743 ; GFX8-NEXT: s_mov_b32 s7, s9
3744 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3745 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3746 ; GFX8-NEXT: ; return to shader part epilog
3748 ; GFX900-LABEL: atomic_add_i64_2dmsaa:
3749 ; GFX900: ; %bb.0: ; %main_body
3750 ; GFX900-NEXT: s_mov_b32 s0, s2
3751 ; GFX900-NEXT: s_mov_b32 s1, s3
3752 ; GFX900-NEXT: s_mov_b32 s2, s4
3753 ; GFX900-NEXT: s_mov_b32 s3, s5
3754 ; GFX900-NEXT: s_mov_b32 s4, s6
3755 ; GFX900-NEXT: s_mov_b32 s5, s7
3756 ; GFX900-NEXT: s_mov_b32 s6, s8
3757 ; GFX900-NEXT: s_mov_b32 s7, s9
3758 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3759 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3760 ; GFX900-NEXT: ; return to shader part epilog
3762 ; GFX90A-LABEL: atomic_add_i64_2dmsaa:
3763 ; GFX90A: ; %bb.0: ; %main_body
3764 ; GFX90A-NEXT: s_mov_b32 s0, s2
3765 ; GFX90A-NEXT: s_mov_b32 s1, s3
3766 ; GFX90A-NEXT: s_mov_b32 s2, s4
3767 ; GFX90A-NEXT: s_mov_b32 s3, s5
3768 ; GFX90A-NEXT: s_mov_b32 s4, s6
3769 ; GFX90A-NEXT: s_mov_b32 s5, s7
3770 ; GFX90A-NEXT: s_mov_b32 s6, s8
3771 ; GFX90A-NEXT: s_mov_b32 s7, s9
3772 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3773 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3774 ; GFX90A-NEXT: ; return to shader part epilog
3776 ; GFX10PLUS-LABEL: atomic_add_i64_2dmsaa:
3777 ; GFX10PLUS: ; %bb.0: ; %main_body
3778 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3779 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3780 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3781 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3782 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3783 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3784 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3785 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3786 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc
3787 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3788 ; GFX10PLUS-NEXT: ; return to shader part epilog
3790 ; GFX12-LABEL: atomic_add_i64_2dmsaa:
3791 ; GFX12: ; %bb.0: ; %main_body
3792 ; GFX12-NEXT: s_mov_b32 s0, s2
3793 ; GFX12-NEXT: s_mov_b32 s1, s3
3794 ; GFX12-NEXT: s_mov_b32 s2, s4
3795 ; GFX12-NEXT: s_mov_b32 s3, s5
3796 ; GFX12-NEXT: s_mov_b32 s4, s6
3797 ; GFX12-NEXT: s_mov_b32 s5, s7
3798 ; GFX12-NEXT: s_mov_b32 s6, s8
3799 ; GFX12-NEXT: s_mov_b32 s7, s9
3800 ; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA th:TH_ATOMIC_RETURN
3801 ; GFX12-NEXT: s_wait_loadcnt 0x0
3802 ; GFX12-NEXT: ; return to shader part epilog
3804 %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
3805 %out = bitcast i64 %v to <2 x float>
3806 ret <2 x float> %out
3809 define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
3810 ; GFX6-LABEL: atomic_add_i64_2darraymsaa:
3811 ; GFX6: ; %bb.0: ; %main_body
3812 ; GFX6-NEXT: s_mov_b32 s0, s2
3813 ; GFX6-NEXT: s_mov_b32 s1, s3
3814 ; GFX6-NEXT: s_mov_b32 s2, s4
3815 ; GFX6-NEXT: s_mov_b32 s3, s5
3816 ; GFX6-NEXT: s_mov_b32 s4, s6
3817 ; GFX6-NEXT: s_mov_b32 s5, s7
3818 ; GFX6-NEXT: s_mov_b32 s6, s8
3819 ; GFX6-NEXT: s_mov_b32 s7, s9
3820 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3821 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3822 ; GFX6-NEXT: ; return to shader part epilog
3824 ; GFX8-LABEL: atomic_add_i64_2darraymsaa:
3825 ; GFX8: ; %bb.0: ; %main_body
3826 ; GFX8-NEXT: s_mov_b32 s0, s2
3827 ; GFX8-NEXT: s_mov_b32 s1, s3
3828 ; GFX8-NEXT: s_mov_b32 s2, s4
3829 ; GFX8-NEXT: s_mov_b32 s3, s5
3830 ; GFX8-NEXT: s_mov_b32 s4, s6
3831 ; GFX8-NEXT: s_mov_b32 s5, s7
3832 ; GFX8-NEXT: s_mov_b32 s6, s8
3833 ; GFX8-NEXT: s_mov_b32 s7, s9
3834 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3835 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3836 ; GFX8-NEXT: ; return to shader part epilog
3838 ; GFX900-LABEL: atomic_add_i64_2darraymsaa:
3839 ; GFX900: ; %bb.0: ; %main_body
3840 ; GFX900-NEXT: s_mov_b32 s0, s2
3841 ; GFX900-NEXT: s_mov_b32 s1, s3
3842 ; GFX900-NEXT: s_mov_b32 s2, s4
3843 ; GFX900-NEXT: s_mov_b32 s3, s5
3844 ; GFX900-NEXT: s_mov_b32 s4, s6
3845 ; GFX900-NEXT: s_mov_b32 s5, s7
3846 ; GFX900-NEXT: s_mov_b32 s6, s8
3847 ; GFX900-NEXT: s_mov_b32 s7, s9
3848 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3849 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3850 ; GFX900-NEXT: ; return to shader part epilog
3852 ; GFX90A-LABEL: atomic_add_i64_2darraymsaa:
3853 ; GFX90A: ; %bb.0: ; %main_body
3854 ; GFX90A-NEXT: s_mov_b32 s0, s2
3855 ; GFX90A-NEXT: s_mov_b32 s1, s3
3856 ; GFX90A-NEXT: s_mov_b32 s2, s4
3857 ; GFX90A-NEXT: s_mov_b32 s3, s5
3858 ; GFX90A-NEXT: s_mov_b32 s4, s6
3859 ; GFX90A-NEXT: s_mov_b32 s5, s7
3860 ; GFX90A-NEXT: s_mov_b32 s6, s8
3861 ; GFX90A-NEXT: s_mov_b32 s7, s9
3862 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3863 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3864 ; GFX90A-NEXT: ; return to shader part epilog
3866 ; GFX10PLUS-LABEL: atomic_add_i64_2darraymsaa:
3867 ; GFX10PLUS: ; %bb.0: ; %main_body
3868 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3869 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3870 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3871 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3872 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3873 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3874 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3875 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3876 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc
3877 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3878 ; GFX10PLUS-NEXT: ; return to shader part epilog
3880 ; GFX12-LABEL: atomic_add_i64_2darraymsaa:
3881 ; GFX12: ; %bb.0: ; %main_body
3882 ; GFX12-NEXT: s_mov_b32 s0, s2
3883 ; GFX12-NEXT: s_mov_b32 s1, s3
3884 ; GFX12-NEXT: s_mov_b32 s2, s4
3885 ; GFX12-NEXT: s_mov_b32 s3, s5
3886 ; GFX12-NEXT: s_mov_b32 s4, s6
3887 ; GFX12-NEXT: s_mov_b32 s5, s7
3888 ; GFX12-NEXT: s_mov_b32 s6, s8
3889 ; GFX12-NEXT: s_mov_b32 s7, s9
3890 ; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3, v4, v5], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY th:TH_ATOMIC_RETURN
3891 ; GFX12-NEXT: s_wait_loadcnt 0x0
3892 ; GFX12-NEXT: ; return to shader part epilog
3894 %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
3895 %out = bitcast i64 %v to <2 x float>
3896 ret <2 x float> %out
3899 define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
3900 ; GFX6-LABEL: atomic_add_i64_1d_slc:
3901 ; GFX6: ; %bb.0: ; %main_body
3902 ; GFX6-NEXT: s_mov_b32 s0, s2
3903 ; GFX6-NEXT: s_mov_b32 s1, s3
3904 ; GFX6-NEXT: s_mov_b32 s2, s4
3905 ; GFX6-NEXT: s_mov_b32 s3, s5
3906 ; GFX6-NEXT: s_mov_b32 s4, s6
3907 ; GFX6-NEXT: s_mov_b32 s5, s7
3908 ; GFX6-NEXT: s_mov_b32 s6, s8
3909 ; GFX6-NEXT: s_mov_b32 s7, s9
3910 ; GFX6-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3911 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3912 ; GFX6-NEXT: ; return to shader part epilog
3914 ; GFX8-LABEL: atomic_add_i64_1d_slc:
3915 ; GFX8: ; %bb.0: ; %main_body
3916 ; GFX8-NEXT: s_mov_b32 s0, s2
3917 ; GFX8-NEXT: s_mov_b32 s1, s3
3918 ; GFX8-NEXT: s_mov_b32 s2, s4
3919 ; GFX8-NEXT: s_mov_b32 s3, s5
3920 ; GFX8-NEXT: s_mov_b32 s4, s6
3921 ; GFX8-NEXT: s_mov_b32 s5, s7
3922 ; GFX8-NEXT: s_mov_b32 s6, s8
3923 ; GFX8-NEXT: s_mov_b32 s7, s9
3924 ; GFX8-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3925 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3926 ; GFX8-NEXT: ; return to shader part epilog
3928 ; GFX900-LABEL: atomic_add_i64_1d_slc:
3929 ; GFX900: ; %bb.0: ; %main_body
3930 ; GFX900-NEXT: s_mov_b32 s0, s2
3931 ; GFX900-NEXT: s_mov_b32 s1, s3
3932 ; GFX900-NEXT: s_mov_b32 s2, s4
3933 ; GFX900-NEXT: s_mov_b32 s3, s5
3934 ; GFX900-NEXT: s_mov_b32 s4, s6
3935 ; GFX900-NEXT: s_mov_b32 s5, s7
3936 ; GFX900-NEXT: s_mov_b32 s6, s8
3937 ; GFX900-NEXT: s_mov_b32 s7, s9
3938 ; GFX900-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3939 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3940 ; GFX900-NEXT: ; return to shader part epilog
3942 ; GFX90A-LABEL: atomic_add_i64_1d_slc:
3943 ; GFX90A: ; %bb.0: ; %main_body
3944 ; GFX90A-NEXT: s_mov_b32 s0, s2
3945 ; GFX90A-NEXT: s_mov_b32 s1, s3
3946 ; GFX90A-NEXT: s_mov_b32 s2, s4
3947 ; GFX90A-NEXT: s_mov_b32 s3, s5
3948 ; GFX90A-NEXT: s_mov_b32 s4, s6
3949 ; GFX90A-NEXT: s_mov_b32 s5, s7
3950 ; GFX90A-NEXT: s_mov_b32 s6, s8
3951 ; GFX90A-NEXT: s_mov_b32 s7, s9
3952 ; GFX90A-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3953 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3954 ; GFX90A-NEXT: ; return to shader part epilog
3956 ; GFX10PLUS-LABEL: atomic_add_i64_1d_slc:
3957 ; GFX10PLUS: ; %bb.0: ; %main_body
3958 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3959 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3960 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3961 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3962 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3963 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3964 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3965 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3966 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc
3967 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3968 ; GFX10PLUS-NEXT: ; return to shader part epilog
3970 ; GFX12-LABEL: atomic_add_i64_1d_slc:
3971 ; GFX12: ; %bb.0: ; %main_body
3972 ; GFX12-NEXT: s_mov_b32 s0, s2
3973 ; GFX12-NEXT: s_mov_b32 s1, s3
3974 ; GFX12-NEXT: s_mov_b32 s2, s4
3975 ; GFX12-NEXT: s_mov_b32 s3, s5
3976 ; GFX12-NEXT: s_mov_b32 s4, s6
3977 ; GFX12-NEXT: s_mov_b32 s5, s7
3978 ; GFX12-NEXT: s_mov_b32 s6, s8
3979 ; GFX12-NEXT: s_mov_b32 s7, s9
3980 ; GFX12-NEXT: image_atomic_add_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN
3981 ; GFX12-NEXT: s_wait_loadcnt 0x0
3982 ; GFX12-NEXT: ; return to shader part epilog
3984 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
3985 %out = bitcast i64 %v to <2 x float>
3986 ret <2 x float> %out
3989 declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3990 declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3991 declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3992 declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3993 declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3994 declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3995 declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3996 declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3997 declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3998 declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3999 declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4000 declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4001 declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4002 declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4003 declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4004 declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4005 declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4006 declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4007 declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4008 declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4010 declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4011 declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4012 declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4013 declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4014 declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4015 declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4016 declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4017 declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4018 declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4019 declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4020 declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4021 declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4022 declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4023 declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4024 declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4025 declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4026 declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4027 declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4028 declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4029 declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4031 attributes #0 = { nounwind }