1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
7 define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
8 ; GFX6-LABEL: atomic_swap_i32_1d:
9 ; GFX6: ; %bb.0: ; %main_body
10 ; GFX6-NEXT: s_mov_b32 s0, s2
11 ; GFX6-NEXT: s_mov_b32 s1, s3
12 ; GFX6-NEXT: s_mov_b32 s2, s4
13 ; GFX6-NEXT: s_mov_b32 s3, s5
14 ; GFX6-NEXT: s_mov_b32 s4, s6
15 ; GFX6-NEXT: s_mov_b32 s5, s7
16 ; GFX6-NEXT: s_mov_b32 s6, s8
17 ; GFX6-NEXT: s_mov_b32 s7, s9
18 ; GFX6-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
19 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
20 ; GFX6-NEXT: ; return to shader part epilog
22 ; GFX8-LABEL: atomic_swap_i32_1d:
23 ; GFX8: ; %bb.0: ; %main_body
24 ; GFX8-NEXT: s_mov_b32 s0, s2
25 ; GFX8-NEXT: s_mov_b32 s1, s3
26 ; GFX8-NEXT: s_mov_b32 s2, s4
27 ; GFX8-NEXT: s_mov_b32 s3, s5
28 ; GFX8-NEXT: s_mov_b32 s4, s6
29 ; GFX8-NEXT: s_mov_b32 s5, s7
30 ; GFX8-NEXT: s_mov_b32 s6, s8
31 ; GFX8-NEXT: s_mov_b32 s7, s9
32 ; GFX8-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
33 ; GFX8-NEXT: s_waitcnt vmcnt(0)
34 ; GFX8-NEXT: ; return to shader part epilog
36 ; GFX10-LABEL: atomic_swap_i32_1d:
37 ; GFX10: ; %bb.0: ; %main_body
38 ; GFX10-NEXT: s_mov_b32 s0, s2
39 ; GFX10-NEXT: s_mov_b32 s1, s3
40 ; GFX10-NEXT: s_mov_b32 s2, s4
41 ; GFX10-NEXT: s_mov_b32 s3, s5
42 ; GFX10-NEXT: s_mov_b32 s4, s6
43 ; GFX10-NEXT: s_mov_b32 s5, s7
44 ; GFX10-NEXT: s_mov_b32 s6, s8
45 ; GFX10-NEXT: s_mov_b32 s7, s9
46 ; GFX10-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
47 ; GFX10-NEXT: s_waitcnt vmcnt(0)
48 ; GFX10-NEXT: ; return to shader part epilog
50 %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
51 %out = bitcast i32 %v to float
55 define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
56 ; GFX6-LABEL: atomic_add_i32_1d:
57 ; GFX6: ; %bb.0: ; %main_body
58 ; GFX6-NEXT: s_mov_b32 s0, s2
59 ; GFX6-NEXT: s_mov_b32 s1, s3
60 ; GFX6-NEXT: s_mov_b32 s2, s4
61 ; GFX6-NEXT: s_mov_b32 s3, s5
62 ; GFX6-NEXT: s_mov_b32 s4, s6
63 ; GFX6-NEXT: s_mov_b32 s5, s7
64 ; GFX6-NEXT: s_mov_b32 s6, s8
65 ; GFX6-NEXT: s_mov_b32 s7, s9
66 ; GFX6-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
67 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
68 ; GFX6-NEXT: ; return to shader part epilog
70 ; GFX8-LABEL: atomic_add_i32_1d:
71 ; GFX8: ; %bb.0: ; %main_body
72 ; GFX8-NEXT: s_mov_b32 s0, s2
73 ; GFX8-NEXT: s_mov_b32 s1, s3
74 ; GFX8-NEXT: s_mov_b32 s2, s4
75 ; GFX8-NEXT: s_mov_b32 s3, s5
76 ; GFX8-NEXT: s_mov_b32 s4, s6
77 ; GFX8-NEXT: s_mov_b32 s5, s7
78 ; GFX8-NEXT: s_mov_b32 s6, s8
79 ; GFX8-NEXT: s_mov_b32 s7, s9
80 ; GFX8-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
81 ; GFX8-NEXT: s_waitcnt vmcnt(0)
82 ; GFX8-NEXT: ; return to shader part epilog
84 ; GFX10-LABEL: atomic_add_i32_1d:
85 ; GFX10: ; %bb.0: ; %main_body
86 ; GFX10-NEXT: s_mov_b32 s0, s2
87 ; GFX10-NEXT: s_mov_b32 s1, s3
88 ; GFX10-NEXT: s_mov_b32 s2, s4
89 ; GFX10-NEXT: s_mov_b32 s3, s5
90 ; GFX10-NEXT: s_mov_b32 s4, s6
91 ; GFX10-NEXT: s_mov_b32 s5, s7
92 ; GFX10-NEXT: s_mov_b32 s6, s8
93 ; GFX10-NEXT: s_mov_b32 s7, s9
94 ; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
95 ; GFX10-NEXT: s_waitcnt vmcnt(0)
96 ; GFX10-NEXT: ; return to shader part epilog
98 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
99 %out = bitcast i32 %v to float
103 define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
104 ; GFX6-LABEL: atomic_sub_i32_1d:
105 ; GFX6: ; %bb.0: ; %main_body
106 ; GFX6-NEXT: s_mov_b32 s0, s2
107 ; GFX6-NEXT: s_mov_b32 s1, s3
108 ; GFX6-NEXT: s_mov_b32 s2, s4
109 ; GFX6-NEXT: s_mov_b32 s3, s5
110 ; GFX6-NEXT: s_mov_b32 s4, s6
111 ; GFX6-NEXT: s_mov_b32 s5, s7
112 ; GFX6-NEXT: s_mov_b32 s6, s8
113 ; GFX6-NEXT: s_mov_b32 s7, s9
114 ; GFX6-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
115 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
116 ; GFX6-NEXT: ; return to shader part epilog
118 ; GFX8-LABEL: atomic_sub_i32_1d:
119 ; GFX8: ; %bb.0: ; %main_body
120 ; GFX8-NEXT: s_mov_b32 s0, s2
121 ; GFX8-NEXT: s_mov_b32 s1, s3
122 ; GFX8-NEXT: s_mov_b32 s2, s4
123 ; GFX8-NEXT: s_mov_b32 s3, s5
124 ; GFX8-NEXT: s_mov_b32 s4, s6
125 ; GFX8-NEXT: s_mov_b32 s5, s7
126 ; GFX8-NEXT: s_mov_b32 s6, s8
127 ; GFX8-NEXT: s_mov_b32 s7, s9
128 ; GFX8-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
129 ; GFX8-NEXT: s_waitcnt vmcnt(0)
130 ; GFX8-NEXT: ; return to shader part epilog
132 ; GFX10-LABEL: atomic_sub_i32_1d:
133 ; GFX10: ; %bb.0: ; %main_body
134 ; GFX10-NEXT: s_mov_b32 s0, s2
135 ; GFX10-NEXT: s_mov_b32 s1, s3
136 ; GFX10-NEXT: s_mov_b32 s2, s4
137 ; GFX10-NEXT: s_mov_b32 s3, s5
138 ; GFX10-NEXT: s_mov_b32 s4, s6
139 ; GFX10-NEXT: s_mov_b32 s5, s7
140 ; GFX10-NEXT: s_mov_b32 s6, s8
141 ; GFX10-NEXT: s_mov_b32 s7, s9
142 ; GFX10-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
143 ; GFX10-NEXT: s_waitcnt vmcnt(0)
144 ; GFX10-NEXT: ; return to shader part epilog
146 %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
147 %out = bitcast i32 %v to float
151 define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
152 ; GFX6-LABEL: atomic_smin_i32_1d:
153 ; GFX6: ; %bb.0: ; %main_body
154 ; GFX6-NEXT: s_mov_b32 s0, s2
155 ; GFX6-NEXT: s_mov_b32 s1, s3
156 ; GFX6-NEXT: s_mov_b32 s2, s4
157 ; GFX6-NEXT: s_mov_b32 s3, s5
158 ; GFX6-NEXT: s_mov_b32 s4, s6
159 ; GFX6-NEXT: s_mov_b32 s5, s7
160 ; GFX6-NEXT: s_mov_b32 s6, s8
161 ; GFX6-NEXT: s_mov_b32 s7, s9
162 ; GFX6-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
163 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
164 ; GFX6-NEXT: ; return to shader part epilog
166 ; GFX8-LABEL: atomic_smin_i32_1d:
167 ; GFX8: ; %bb.0: ; %main_body
168 ; GFX8-NEXT: s_mov_b32 s0, s2
169 ; GFX8-NEXT: s_mov_b32 s1, s3
170 ; GFX8-NEXT: s_mov_b32 s2, s4
171 ; GFX8-NEXT: s_mov_b32 s3, s5
172 ; GFX8-NEXT: s_mov_b32 s4, s6
173 ; GFX8-NEXT: s_mov_b32 s5, s7
174 ; GFX8-NEXT: s_mov_b32 s6, s8
175 ; GFX8-NEXT: s_mov_b32 s7, s9
176 ; GFX8-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
177 ; GFX8-NEXT: s_waitcnt vmcnt(0)
178 ; GFX8-NEXT: ; return to shader part epilog
180 ; GFX10-LABEL: atomic_smin_i32_1d:
181 ; GFX10: ; %bb.0: ; %main_body
182 ; GFX10-NEXT: s_mov_b32 s0, s2
183 ; GFX10-NEXT: s_mov_b32 s1, s3
184 ; GFX10-NEXT: s_mov_b32 s2, s4
185 ; GFX10-NEXT: s_mov_b32 s3, s5
186 ; GFX10-NEXT: s_mov_b32 s4, s6
187 ; GFX10-NEXT: s_mov_b32 s5, s7
188 ; GFX10-NEXT: s_mov_b32 s6, s8
189 ; GFX10-NEXT: s_mov_b32 s7, s9
190 ; GFX10-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
191 ; GFX10-NEXT: s_waitcnt vmcnt(0)
192 ; GFX10-NEXT: ; return to shader part epilog
194 %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
195 %out = bitcast i32 %v to float
199 define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
200 ; GFX6-LABEL: atomic_umin_i32_1d:
201 ; GFX6: ; %bb.0: ; %main_body
202 ; GFX6-NEXT: s_mov_b32 s0, s2
203 ; GFX6-NEXT: s_mov_b32 s1, s3
204 ; GFX6-NEXT: s_mov_b32 s2, s4
205 ; GFX6-NEXT: s_mov_b32 s3, s5
206 ; GFX6-NEXT: s_mov_b32 s4, s6
207 ; GFX6-NEXT: s_mov_b32 s5, s7
208 ; GFX6-NEXT: s_mov_b32 s6, s8
209 ; GFX6-NEXT: s_mov_b32 s7, s9
210 ; GFX6-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
211 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
212 ; GFX6-NEXT: ; return to shader part epilog
214 ; GFX8-LABEL: atomic_umin_i32_1d:
215 ; GFX8: ; %bb.0: ; %main_body
216 ; GFX8-NEXT: s_mov_b32 s0, s2
217 ; GFX8-NEXT: s_mov_b32 s1, s3
218 ; GFX8-NEXT: s_mov_b32 s2, s4
219 ; GFX8-NEXT: s_mov_b32 s3, s5
220 ; GFX8-NEXT: s_mov_b32 s4, s6
221 ; GFX8-NEXT: s_mov_b32 s5, s7
222 ; GFX8-NEXT: s_mov_b32 s6, s8
223 ; GFX8-NEXT: s_mov_b32 s7, s9
224 ; GFX8-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
225 ; GFX8-NEXT: s_waitcnt vmcnt(0)
226 ; GFX8-NEXT: ; return to shader part epilog
228 ; GFX10-LABEL: atomic_umin_i32_1d:
229 ; GFX10: ; %bb.0: ; %main_body
230 ; GFX10-NEXT: s_mov_b32 s0, s2
231 ; GFX10-NEXT: s_mov_b32 s1, s3
232 ; GFX10-NEXT: s_mov_b32 s2, s4
233 ; GFX10-NEXT: s_mov_b32 s3, s5
234 ; GFX10-NEXT: s_mov_b32 s4, s6
235 ; GFX10-NEXT: s_mov_b32 s5, s7
236 ; GFX10-NEXT: s_mov_b32 s6, s8
237 ; GFX10-NEXT: s_mov_b32 s7, s9
238 ; GFX10-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
239 ; GFX10-NEXT: s_waitcnt vmcnt(0)
240 ; GFX10-NEXT: ; return to shader part epilog
242 %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
243 %out = bitcast i32 %v to float
247 define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
248 ; GFX6-LABEL: atomic_smax_i32_1d:
249 ; GFX6: ; %bb.0: ; %main_body
250 ; GFX6-NEXT: s_mov_b32 s0, s2
251 ; GFX6-NEXT: s_mov_b32 s1, s3
252 ; GFX6-NEXT: s_mov_b32 s2, s4
253 ; GFX6-NEXT: s_mov_b32 s3, s5
254 ; GFX6-NEXT: s_mov_b32 s4, s6
255 ; GFX6-NEXT: s_mov_b32 s5, s7
256 ; GFX6-NEXT: s_mov_b32 s6, s8
257 ; GFX6-NEXT: s_mov_b32 s7, s9
258 ; GFX6-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
259 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
260 ; GFX6-NEXT: ; return to shader part epilog
262 ; GFX8-LABEL: atomic_smax_i32_1d:
263 ; GFX8: ; %bb.0: ; %main_body
264 ; GFX8-NEXT: s_mov_b32 s0, s2
265 ; GFX8-NEXT: s_mov_b32 s1, s3
266 ; GFX8-NEXT: s_mov_b32 s2, s4
267 ; GFX8-NEXT: s_mov_b32 s3, s5
268 ; GFX8-NEXT: s_mov_b32 s4, s6
269 ; GFX8-NEXT: s_mov_b32 s5, s7
270 ; GFX8-NEXT: s_mov_b32 s6, s8
271 ; GFX8-NEXT: s_mov_b32 s7, s9
272 ; GFX8-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
273 ; GFX8-NEXT: s_waitcnt vmcnt(0)
274 ; GFX8-NEXT: ; return to shader part epilog
276 ; GFX10-LABEL: atomic_smax_i32_1d:
277 ; GFX10: ; %bb.0: ; %main_body
278 ; GFX10-NEXT: s_mov_b32 s0, s2
279 ; GFX10-NEXT: s_mov_b32 s1, s3
280 ; GFX10-NEXT: s_mov_b32 s2, s4
281 ; GFX10-NEXT: s_mov_b32 s3, s5
282 ; GFX10-NEXT: s_mov_b32 s4, s6
283 ; GFX10-NEXT: s_mov_b32 s5, s7
284 ; GFX10-NEXT: s_mov_b32 s6, s8
285 ; GFX10-NEXT: s_mov_b32 s7, s9
286 ; GFX10-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
287 ; GFX10-NEXT: s_waitcnt vmcnt(0)
288 ; GFX10-NEXT: ; return to shader part epilog
290 %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
291 %out = bitcast i32 %v to float
295 define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
296 ; GFX6-LABEL: atomic_umax_i32_1d:
297 ; GFX6: ; %bb.0: ; %main_body
298 ; GFX6-NEXT: s_mov_b32 s0, s2
299 ; GFX6-NEXT: s_mov_b32 s1, s3
300 ; GFX6-NEXT: s_mov_b32 s2, s4
301 ; GFX6-NEXT: s_mov_b32 s3, s5
302 ; GFX6-NEXT: s_mov_b32 s4, s6
303 ; GFX6-NEXT: s_mov_b32 s5, s7
304 ; GFX6-NEXT: s_mov_b32 s6, s8
305 ; GFX6-NEXT: s_mov_b32 s7, s9
306 ; GFX6-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
307 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
308 ; GFX6-NEXT: ; return to shader part epilog
310 ; GFX8-LABEL: atomic_umax_i32_1d:
311 ; GFX8: ; %bb.0: ; %main_body
312 ; GFX8-NEXT: s_mov_b32 s0, s2
313 ; GFX8-NEXT: s_mov_b32 s1, s3
314 ; GFX8-NEXT: s_mov_b32 s2, s4
315 ; GFX8-NEXT: s_mov_b32 s3, s5
316 ; GFX8-NEXT: s_mov_b32 s4, s6
317 ; GFX8-NEXT: s_mov_b32 s5, s7
318 ; GFX8-NEXT: s_mov_b32 s6, s8
319 ; GFX8-NEXT: s_mov_b32 s7, s9
320 ; GFX8-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
321 ; GFX8-NEXT: s_waitcnt vmcnt(0)
322 ; GFX8-NEXT: ; return to shader part epilog
324 ; GFX10-LABEL: atomic_umax_i32_1d:
325 ; GFX10: ; %bb.0: ; %main_body
326 ; GFX10-NEXT: s_mov_b32 s0, s2
327 ; GFX10-NEXT: s_mov_b32 s1, s3
328 ; GFX10-NEXT: s_mov_b32 s2, s4
329 ; GFX10-NEXT: s_mov_b32 s3, s5
330 ; GFX10-NEXT: s_mov_b32 s4, s6
331 ; GFX10-NEXT: s_mov_b32 s5, s7
332 ; GFX10-NEXT: s_mov_b32 s6, s8
333 ; GFX10-NEXT: s_mov_b32 s7, s9
334 ; GFX10-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
335 ; GFX10-NEXT: s_waitcnt vmcnt(0)
336 ; GFX10-NEXT: ; return to shader part epilog
338 %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
339 %out = bitcast i32 %v to float
343 define amdgpu_ps float @atomic_and_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
344 ; GFX6-LABEL: atomic_and_i32_1d:
345 ; GFX6: ; %bb.0: ; %main_body
346 ; GFX6-NEXT: s_mov_b32 s0, s2
347 ; GFX6-NEXT: s_mov_b32 s1, s3
348 ; GFX6-NEXT: s_mov_b32 s2, s4
349 ; GFX6-NEXT: s_mov_b32 s3, s5
350 ; GFX6-NEXT: s_mov_b32 s4, s6
351 ; GFX6-NEXT: s_mov_b32 s5, s7
352 ; GFX6-NEXT: s_mov_b32 s6, s8
353 ; GFX6-NEXT: s_mov_b32 s7, s9
354 ; GFX6-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
355 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
356 ; GFX6-NEXT: ; return to shader part epilog
358 ; GFX8-LABEL: atomic_and_i32_1d:
359 ; GFX8: ; %bb.0: ; %main_body
360 ; GFX8-NEXT: s_mov_b32 s0, s2
361 ; GFX8-NEXT: s_mov_b32 s1, s3
362 ; GFX8-NEXT: s_mov_b32 s2, s4
363 ; GFX8-NEXT: s_mov_b32 s3, s5
364 ; GFX8-NEXT: s_mov_b32 s4, s6
365 ; GFX8-NEXT: s_mov_b32 s5, s7
366 ; GFX8-NEXT: s_mov_b32 s6, s8
367 ; GFX8-NEXT: s_mov_b32 s7, s9
368 ; GFX8-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
369 ; GFX8-NEXT: s_waitcnt vmcnt(0)
370 ; GFX8-NEXT: ; return to shader part epilog
372 ; GFX10-LABEL: atomic_and_i32_1d:
373 ; GFX10: ; %bb.0: ; %main_body
374 ; GFX10-NEXT: s_mov_b32 s0, s2
375 ; GFX10-NEXT: s_mov_b32 s1, s3
376 ; GFX10-NEXT: s_mov_b32 s2, s4
377 ; GFX10-NEXT: s_mov_b32 s3, s5
378 ; GFX10-NEXT: s_mov_b32 s4, s6
379 ; GFX10-NEXT: s_mov_b32 s5, s7
380 ; GFX10-NEXT: s_mov_b32 s6, s8
381 ; GFX10-NEXT: s_mov_b32 s7, s9
382 ; GFX10-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
383 ; GFX10-NEXT: s_waitcnt vmcnt(0)
384 ; GFX10-NEXT: ; return to shader part epilog
386 %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
387 %out = bitcast i32 %v to float
391 define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
392 ; GFX6-LABEL: atomic_or_i32_1d:
393 ; GFX6: ; %bb.0: ; %main_body
394 ; GFX6-NEXT: s_mov_b32 s0, s2
395 ; GFX6-NEXT: s_mov_b32 s1, s3
396 ; GFX6-NEXT: s_mov_b32 s2, s4
397 ; GFX6-NEXT: s_mov_b32 s3, s5
398 ; GFX6-NEXT: s_mov_b32 s4, s6
399 ; GFX6-NEXT: s_mov_b32 s5, s7
400 ; GFX6-NEXT: s_mov_b32 s6, s8
401 ; GFX6-NEXT: s_mov_b32 s7, s9
402 ; GFX6-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
403 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
404 ; GFX6-NEXT: ; return to shader part epilog
406 ; GFX8-LABEL: atomic_or_i32_1d:
407 ; GFX8: ; %bb.0: ; %main_body
408 ; GFX8-NEXT: s_mov_b32 s0, s2
409 ; GFX8-NEXT: s_mov_b32 s1, s3
410 ; GFX8-NEXT: s_mov_b32 s2, s4
411 ; GFX8-NEXT: s_mov_b32 s3, s5
412 ; GFX8-NEXT: s_mov_b32 s4, s6
413 ; GFX8-NEXT: s_mov_b32 s5, s7
414 ; GFX8-NEXT: s_mov_b32 s6, s8
415 ; GFX8-NEXT: s_mov_b32 s7, s9
416 ; GFX8-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
417 ; GFX8-NEXT: s_waitcnt vmcnt(0)
418 ; GFX8-NEXT: ; return to shader part epilog
420 ; GFX10-LABEL: atomic_or_i32_1d:
421 ; GFX10: ; %bb.0: ; %main_body
422 ; GFX10-NEXT: s_mov_b32 s0, s2
423 ; GFX10-NEXT: s_mov_b32 s1, s3
424 ; GFX10-NEXT: s_mov_b32 s2, s4
425 ; GFX10-NEXT: s_mov_b32 s3, s5
426 ; GFX10-NEXT: s_mov_b32 s4, s6
427 ; GFX10-NEXT: s_mov_b32 s5, s7
428 ; GFX10-NEXT: s_mov_b32 s6, s8
429 ; GFX10-NEXT: s_mov_b32 s7, s9
430 ; GFX10-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
431 ; GFX10-NEXT: s_waitcnt vmcnt(0)
432 ; GFX10-NEXT: ; return to shader part epilog
434 %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
435 %out = bitcast i32 %v to float
439 define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
440 ; GFX6-LABEL: atomic_xor_i32_1d:
441 ; GFX6: ; %bb.0: ; %main_body
442 ; GFX6-NEXT: s_mov_b32 s0, s2
443 ; GFX6-NEXT: s_mov_b32 s1, s3
444 ; GFX6-NEXT: s_mov_b32 s2, s4
445 ; GFX6-NEXT: s_mov_b32 s3, s5
446 ; GFX6-NEXT: s_mov_b32 s4, s6
447 ; GFX6-NEXT: s_mov_b32 s5, s7
448 ; GFX6-NEXT: s_mov_b32 s6, s8
449 ; GFX6-NEXT: s_mov_b32 s7, s9
450 ; GFX6-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
451 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
452 ; GFX6-NEXT: ; return to shader part epilog
454 ; GFX8-LABEL: atomic_xor_i32_1d:
455 ; GFX8: ; %bb.0: ; %main_body
456 ; GFX8-NEXT: s_mov_b32 s0, s2
457 ; GFX8-NEXT: s_mov_b32 s1, s3
458 ; GFX8-NEXT: s_mov_b32 s2, s4
459 ; GFX8-NEXT: s_mov_b32 s3, s5
460 ; GFX8-NEXT: s_mov_b32 s4, s6
461 ; GFX8-NEXT: s_mov_b32 s5, s7
462 ; GFX8-NEXT: s_mov_b32 s6, s8
463 ; GFX8-NEXT: s_mov_b32 s7, s9
464 ; GFX8-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
465 ; GFX8-NEXT: s_waitcnt vmcnt(0)
466 ; GFX8-NEXT: ; return to shader part epilog
468 ; GFX10-LABEL: atomic_xor_i32_1d:
469 ; GFX10: ; %bb.0: ; %main_body
470 ; GFX10-NEXT: s_mov_b32 s0, s2
471 ; GFX10-NEXT: s_mov_b32 s1, s3
472 ; GFX10-NEXT: s_mov_b32 s2, s4
473 ; GFX10-NEXT: s_mov_b32 s3, s5
474 ; GFX10-NEXT: s_mov_b32 s4, s6
475 ; GFX10-NEXT: s_mov_b32 s5, s7
476 ; GFX10-NEXT: s_mov_b32 s6, s8
477 ; GFX10-NEXT: s_mov_b32 s7, s9
478 ; GFX10-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
479 ; GFX10-NEXT: s_waitcnt vmcnt(0)
480 ; GFX10-NEXT: ; return to shader part epilog
482 %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
483 %out = bitcast i32 %v to float
487 define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
488 ; GFX6-LABEL: atomic_inc_i32_1d:
489 ; GFX6: ; %bb.0: ; %main_body
490 ; GFX6-NEXT: s_mov_b32 s0, s2
491 ; GFX6-NEXT: s_mov_b32 s1, s3
492 ; GFX6-NEXT: s_mov_b32 s2, s4
493 ; GFX6-NEXT: s_mov_b32 s3, s5
494 ; GFX6-NEXT: s_mov_b32 s4, s6
495 ; GFX6-NEXT: s_mov_b32 s5, s7
496 ; GFX6-NEXT: s_mov_b32 s6, s8
497 ; GFX6-NEXT: s_mov_b32 s7, s9
498 ; GFX6-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
499 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
500 ; GFX6-NEXT: ; return to shader part epilog
502 ; GFX8-LABEL: atomic_inc_i32_1d:
503 ; GFX8: ; %bb.0: ; %main_body
504 ; GFX8-NEXT: s_mov_b32 s0, s2
505 ; GFX8-NEXT: s_mov_b32 s1, s3
506 ; GFX8-NEXT: s_mov_b32 s2, s4
507 ; GFX8-NEXT: s_mov_b32 s3, s5
508 ; GFX8-NEXT: s_mov_b32 s4, s6
509 ; GFX8-NEXT: s_mov_b32 s5, s7
510 ; GFX8-NEXT: s_mov_b32 s6, s8
511 ; GFX8-NEXT: s_mov_b32 s7, s9
512 ; GFX8-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
513 ; GFX8-NEXT: s_waitcnt vmcnt(0)
514 ; GFX8-NEXT: ; return to shader part epilog
516 ; GFX10-LABEL: atomic_inc_i32_1d:
517 ; GFX10: ; %bb.0: ; %main_body
518 ; GFX10-NEXT: s_mov_b32 s0, s2
519 ; GFX10-NEXT: s_mov_b32 s1, s3
520 ; GFX10-NEXT: s_mov_b32 s2, s4
521 ; GFX10-NEXT: s_mov_b32 s3, s5
522 ; GFX10-NEXT: s_mov_b32 s4, s6
523 ; GFX10-NEXT: s_mov_b32 s5, s7
524 ; GFX10-NEXT: s_mov_b32 s6, s8
525 ; GFX10-NEXT: s_mov_b32 s7, s9
526 ; GFX10-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
527 ; GFX10-NEXT: s_waitcnt vmcnt(0)
528 ; GFX10-NEXT: ; return to shader part epilog
530 %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
531 %out = bitcast i32 %v to float
535 define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
536 ; GFX6-LABEL: atomic_dec_i32_1d:
537 ; GFX6: ; %bb.0: ; %main_body
538 ; GFX6-NEXT: s_mov_b32 s0, s2
539 ; GFX6-NEXT: s_mov_b32 s1, s3
540 ; GFX6-NEXT: s_mov_b32 s2, s4
541 ; GFX6-NEXT: s_mov_b32 s3, s5
542 ; GFX6-NEXT: s_mov_b32 s4, s6
543 ; GFX6-NEXT: s_mov_b32 s5, s7
544 ; GFX6-NEXT: s_mov_b32 s6, s8
545 ; GFX6-NEXT: s_mov_b32 s7, s9
546 ; GFX6-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
547 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
548 ; GFX6-NEXT: ; return to shader part epilog
550 ; GFX8-LABEL: atomic_dec_i32_1d:
551 ; GFX8: ; %bb.0: ; %main_body
552 ; GFX8-NEXT: s_mov_b32 s0, s2
553 ; GFX8-NEXT: s_mov_b32 s1, s3
554 ; GFX8-NEXT: s_mov_b32 s2, s4
555 ; GFX8-NEXT: s_mov_b32 s3, s5
556 ; GFX8-NEXT: s_mov_b32 s4, s6
557 ; GFX8-NEXT: s_mov_b32 s5, s7
558 ; GFX8-NEXT: s_mov_b32 s6, s8
559 ; GFX8-NEXT: s_mov_b32 s7, s9
560 ; GFX8-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
561 ; GFX8-NEXT: s_waitcnt vmcnt(0)
562 ; GFX8-NEXT: ; return to shader part epilog
564 ; GFX10-LABEL: atomic_dec_i32_1d:
565 ; GFX10: ; %bb.0: ; %main_body
566 ; GFX10-NEXT: s_mov_b32 s0, s2
567 ; GFX10-NEXT: s_mov_b32 s1, s3
568 ; GFX10-NEXT: s_mov_b32 s2, s4
569 ; GFX10-NEXT: s_mov_b32 s3, s5
570 ; GFX10-NEXT: s_mov_b32 s4, s6
571 ; GFX10-NEXT: s_mov_b32 s5, s7
572 ; GFX10-NEXT: s_mov_b32 s6, s8
573 ; GFX10-NEXT: s_mov_b32 s7, s9
574 ; GFX10-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
575 ; GFX10-NEXT: s_waitcnt vmcnt(0)
576 ; GFX10-NEXT: ; return to shader part epilog
578 %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
579 %out = bitcast i32 %v to float
583 define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
584 ; GFX6-LABEL: atomic_cmpswap_i32_1d:
585 ; GFX6: ; %bb.0: ; %main_body
586 ; GFX6-NEXT: s_mov_b32 s0, s2
587 ; GFX6-NEXT: s_mov_b32 s1, s3
588 ; GFX6-NEXT: s_mov_b32 s2, s4
589 ; GFX6-NEXT: s_mov_b32 s3, s5
590 ; GFX6-NEXT: s_mov_b32 s4, s6
591 ; GFX6-NEXT: s_mov_b32 s5, s7
592 ; GFX6-NEXT: s_mov_b32 s6, s8
593 ; GFX6-NEXT: s_mov_b32 s7, s9
594 ; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
595 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
596 ; GFX6-NEXT: ; return to shader part epilog
598 ; GFX8-LABEL: atomic_cmpswap_i32_1d:
599 ; GFX8: ; %bb.0: ; %main_body
600 ; GFX8-NEXT: s_mov_b32 s0, s2
601 ; GFX8-NEXT: s_mov_b32 s1, s3
602 ; GFX8-NEXT: s_mov_b32 s2, s4
603 ; GFX8-NEXT: s_mov_b32 s3, s5
604 ; GFX8-NEXT: s_mov_b32 s4, s6
605 ; GFX8-NEXT: s_mov_b32 s5, s7
606 ; GFX8-NEXT: s_mov_b32 s6, s8
607 ; GFX8-NEXT: s_mov_b32 s7, s9
608 ; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
609 ; GFX8-NEXT: s_waitcnt vmcnt(0)
610 ; GFX8-NEXT: ; return to shader part epilog
612 ; GFX10-LABEL: atomic_cmpswap_i32_1d:
613 ; GFX10: ; %bb.0: ; %main_body
614 ; GFX10-NEXT: s_mov_b32 s0, s2
615 ; GFX10-NEXT: s_mov_b32 s1, s3
616 ; GFX10-NEXT: s_mov_b32 s2, s4
617 ; GFX10-NEXT: s_mov_b32 s3, s5
618 ; GFX10-NEXT: s_mov_b32 s4, s6
619 ; GFX10-NEXT: s_mov_b32 s5, s7
620 ; GFX10-NEXT: s_mov_b32 s6, s8
621 ; GFX10-NEXT: s_mov_b32 s7, s9
622 ; GFX10-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
623 ; GFX10-NEXT: s_waitcnt vmcnt(0)
624 ; GFX10-NEXT: ; return to shader part epilog
626 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
627 %out = bitcast i32 %v to float
631 define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
632 ; GFX6-LABEL: atomic_cmpswap_i32_1d_no_return:
633 ; GFX6: ; %bb.0: ; %main_body
634 ; GFX6-NEXT: s_mov_b32 s0, s2
635 ; GFX6-NEXT: s_mov_b32 s1, s3
636 ; GFX6-NEXT: s_mov_b32 s2, s4
637 ; GFX6-NEXT: s_mov_b32 s3, s5
638 ; GFX6-NEXT: s_mov_b32 s4, s6
639 ; GFX6-NEXT: s_mov_b32 s5, s7
640 ; GFX6-NEXT: s_mov_b32 s6, s8
641 ; GFX6-NEXT: s_mov_b32 s7, s9
642 ; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
643 ; GFX6-NEXT: s_endpgm
645 ; GFX8-LABEL: atomic_cmpswap_i32_1d_no_return:
646 ; GFX8: ; %bb.0: ; %main_body
647 ; GFX8-NEXT: s_mov_b32 s0, s2
648 ; GFX8-NEXT: s_mov_b32 s1, s3
649 ; GFX8-NEXT: s_mov_b32 s2, s4
650 ; GFX8-NEXT: s_mov_b32 s3, s5
651 ; GFX8-NEXT: s_mov_b32 s4, s6
652 ; GFX8-NEXT: s_mov_b32 s5, s7
653 ; GFX8-NEXT: s_mov_b32 s6, s8
654 ; GFX8-NEXT: s_mov_b32 s7, s9
655 ; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
656 ; GFX8-NEXT: s_endpgm
658 ; GFX10-LABEL: atomic_cmpswap_i32_1d_no_return:
659 ; GFX10: ; %bb.0: ; %main_body
660 ; GFX10-NEXT: s_mov_b32 s0, s2
661 ; GFX10-NEXT: s_mov_b32 s1, s3
662 ; GFX10-NEXT: s_mov_b32 s2, s4
663 ; GFX10-NEXT: s_mov_b32 s3, s5
664 ; GFX10-NEXT: s_mov_b32 s4, s6
665 ; GFX10-NEXT: s_mov_b32 s5, s7
666 ; GFX10-NEXT: s_mov_b32 s6, s8
667 ; GFX10-NEXT: s_mov_b32 s7, s9
668 ; GFX10-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
669 ; GFX10-NEXT: s_endpgm
671 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
675 define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) {
676 ; GFX6-LABEL: atomic_add_i32_2d:
677 ; GFX6: ; %bb.0: ; %main_body
678 ; GFX6-NEXT: s_mov_b32 s0, s2
679 ; GFX6-NEXT: s_mov_b32 s1, s3
680 ; GFX6-NEXT: s_mov_b32 s2, s4
681 ; GFX6-NEXT: s_mov_b32 s3, s5
682 ; GFX6-NEXT: s_mov_b32 s4, s6
683 ; GFX6-NEXT: s_mov_b32 s5, s7
684 ; GFX6-NEXT: s_mov_b32 s6, s8
685 ; GFX6-NEXT: s_mov_b32 s7, s9
686 ; GFX6-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
687 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
688 ; GFX6-NEXT: ; return to shader part epilog
690 ; GFX8-LABEL: atomic_add_i32_2d:
691 ; GFX8: ; %bb.0: ; %main_body
692 ; GFX8-NEXT: s_mov_b32 s0, s2
693 ; GFX8-NEXT: s_mov_b32 s1, s3
694 ; GFX8-NEXT: s_mov_b32 s2, s4
695 ; GFX8-NEXT: s_mov_b32 s3, s5
696 ; GFX8-NEXT: s_mov_b32 s4, s6
697 ; GFX8-NEXT: s_mov_b32 s5, s7
698 ; GFX8-NEXT: s_mov_b32 s6, s8
699 ; GFX8-NEXT: s_mov_b32 s7, s9
700 ; GFX8-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
701 ; GFX8-NEXT: s_waitcnt vmcnt(0)
702 ; GFX8-NEXT: ; return to shader part epilog
704 ; GFX10-LABEL: atomic_add_i32_2d:
705 ; GFX10: ; %bb.0: ; %main_body
706 ; GFX10-NEXT: s_mov_b32 s0, s2
707 ; GFX10-NEXT: s_mov_b32 s1, s3
708 ; GFX10-NEXT: s_mov_b32 s2, s4
709 ; GFX10-NEXT: s_mov_b32 s3, s5
710 ; GFX10-NEXT: s_mov_b32 s4, s6
711 ; GFX10-NEXT: s_mov_b32 s5, s7
712 ; GFX10-NEXT: s_mov_b32 s6, s8
713 ; GFX10-NEXT: s_mov_b32 s7, s9
714 ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc
715 ; GFX10-NEXT: s_waitcnt vmcnt(0)
716 ; GFX10-NEXT: ; return to shader part epilog
718 %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
719 %out = bitcast i32 %v to float
723 define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) {
724 ; GFX6-LABEL: atomic_add_i32_3d:
725 ; GFX6: ; %bb.0: ; %main_body
726 ; GFX6-NEXT: s_mov_b32 s0, s2
727 ; GFX6-NEXT: s_mov_b32 s1, s3
728 ; GFX6-NEXT: s_mov_b32 s2, s4
729 ; GFX6-NEXT: s_mov_b32 s3, s5
730 ; GFX6-NEXT: s_mov_b32 s4, s6
731 ; GFX6-NEXT: s_mov_b32 s5, s7
732 ; GFX6-NEXT: s_mov_b32 s6, s8
733 ; GFX6-NEXT: s_mov_b32 s7, s9
734 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
735 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
736 ; GFX6-NEXT: ; return to shader part epilog
738 ; GFX8-LABEL: atomic_add_i32_3d:
739 ; GFX8: ; %bb.0: ; %main_body
740 ; GFX8-NEXT: s_mov_b32 s0, s2
741 ; GFX8-NEXT: s_mov_b32 s1, s3
742 ; GFX8-NEXT: s_mov_b32 s2, s4
743 ; GFX8-NEXT: s_mov_b32 s3, s5
744 ; GFX8-NEXT: s_mov_b32 s4, s6
745 ; GFX8-NEXT: s_mov_b32 s5, s7
746 ; GFX8-NEXT: s_mov_b32 s6, s8
747 ; GFX8-NEXT: s_mov_b32 s7, s9
748 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
749 ; GFX8-NEXT: s_waitcnt vmcnt(0)
750 ; GFX8-NEXT: ; return to shader part epilog
752 ; GFX10-LABEL: atomic_add_i32_3d:
753 ; GFX10: ; %bb.0: ; %main_body
754 ; GFX10-NEXT: s_mov_b32 s0, s2
755 ; GFX10-NEXT: s_mov_b32 s1, s3
756 ; GFX10-NEXT: s_mov_b32 s2, s4
757 ; GFX10-NEXT: s_mov_b32 s3, s5
758 ; GFX10-NEXT: s_mov_b32 s4, s6
759 ; GFX10-NEXT: s_mov_b32 s5, s7
760 ; GFX10-NEXT: s_mov_b32 s6, s8
761 ; GFX10-NEXT: s_mov_b32 s7, s9
762 ; GFX10-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc
763 ; GFX10-NEXT: s_waitcnt vmcnt(0)
764 ; GFX10-NEXT: ; return to shader part epilog
766 %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
767 %out = bitcast i32 %v to float
771 define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) {
772 ; GFX6-LABEL: atomic_add_i32_cube:
773 ; GFX6: ; %bb.0: ; %main_body
774 ; GFX6-NEXT: s_mov_b32 s0, s2
775 ; GFX6-NEXT: s_mov_b32 s1, s3
776 ; GFX6-NEXT: s_mov_b32 s2, s4
777 ; GFX6-NEXT: s_mov_b32 s3, s5
778 ; GFX6-NEXT: s_mov_b32 s4, s6
779 ; GFX6-NEXT: s_mov_b32 s5, s7
780 ; GFX6-NEXT: s_mov_b32 s6, s8
781 ; GFX6-NEXT: s_mov_b32 s7, s9
782 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
783 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
784 ; GFX6-NEXT: ; return to shader part epilog
786 ; GFX8-LABEL: atomic_add_i32_cube:
787 ; GFX8: ; %bb.0: ; %main_body
788 ; GFX8-NEXT: s_mov_b32 s0, s2
789 ; GFX8-NEXT: s_mov_b32 s1, s3
790 ; GFX8-NEXT: s_mov_b32 s2, s4
791 ; GFX8-NEXT: s_mov_b32 s3, s5
792 ; GFX8-NEXT: s_mov_b32 s4, s6
793 ; GFX8-NEXT: s_mov_b32 s5, s7
794 ; GFX8-NEXT: s_mov_b32 s6, s8
795 ; GFX8-NEXT: s_mov_b32 s7, s9
796 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
797 ; GFX8-NEXT: s_waitcnt vmcnt(0)
798 ; GFX8-NEXT: ; return to shader part epilog
800 ; GFX10-LABEL: atomic_add_i32_cube:
801 ; GFX10: ; %bb.0: ; %main_body
802 ; GFX10-NEXT: s_mov_b32 s0, s2
803 ; GFX10-NEXT: s_mov_b32 s1, s3
804 ; GFX10-NEXT: s_mov_b32 s2, s4
805 ; GFX10-NEXT: s_mov_b32 s3, s5
806 ; GFX10-NEXT: s_mov_b32 s4, s6
807 ; GFX10-NEXT: s_mov_b32 s5, s7
808 ; GFX10-NEXT: s_mov_b32 s6, s8
809 ; GFX10-NEXT: s_mov_b32 s7, s9
810 ; GFX10-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc
811 ; GFX10-NEXT: s_waitcnt vmcnt(0)
812 ; GFX10-NEXT: ; return to shader part epilog
814 %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
815 %out = bitcast i32 %v to float
819 define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) {
820 ; GFX6-LABEL: atomic_add_i32_1darray:
821 ; GFX6: ; %bb.0: ; %main_body
822 ; GFX6-NEXT: s_mov_b32 s0, s2
823 ; GFX6-NEXT: s_mov_b32 s1, s3
824 ; GFX6-NEXT: s_mov_b32 s2, s4
825 ; GFX6-NEXT: s_mov_b32 s3, s5
826 ; GFX6-NEXT: s_mov_b32 s4, s6
827 ; GFX6-NEXT: s_mov_b32 s5, s7
828 ; GFX6-NEXT: s_mov_b32 s6, s8
829 ; GFX6-NEXT: s_mov_b32 s7, s9
830 ; GFX6-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
831 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
832 ; GFX6-NEXT: ; return to shader part epilog
834 ; GFX8-LABEL: atomic_add_i32_1darray:
835 ; GFX8: ; %bb.0: ; %main_body
836 ; GFX8-NEXT: s_mov_b32 s0, s2
837 ; GFX8-NEXT: s_mov_b32 s1, s3
838 ; GFX8-NEXT: s_mov_b32 s2, s4
839 ; GFX8-NEXT: s_mov_b32 s3, s5
840 ; GFX8-NEXT: s_mov_b32 s4, s6
841 ; GFX8-NEXT: s_mov_b32 s5, s7
842 ; GFX8-NEXT: s_mov_b32 s6, s8
843 ; GFX8-NEXT: s_mov_b32 s7, s9
844 ; GFX8-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
845 ; GFX8-NEXT: s_waitcnt vmcnt(0)
846 ; GFX8-NEXT: ; return to shader part epilog
848 ; GFX10-LABEL: atomic_add_i32_1darray:
849 ; GFX10: ; %bb.0: ; %main_body
850 ; GFX10-NEXT: s_mov_b32 s0, s2
851 ; GFX10-NEXT: s_mov_b32 s1, s3
852 ; GFX10-NEXT: s_mov_b32 s2, s4
853 ; GFX10-NEXT: s_mov_b32 s3, s5
854 ; GFX10-NEXT: s_mov_b32 s4, s6
855 ; GFX10-NEXT: s_mov_b32 s5, s7
856 ; GFX10-NEXT: s_mov_b32 s6, s8
857 ; GFX10-NEXT: s_mov_b32 s7, s9
858 ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc
859 ; GFX10-NEXT: s_waitcnt vmcnt(0)
860 ; GFX10-NEXT: ; return to shader part epilog
862 %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
863 %out = bitcast i32 %v to float
867 define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) {
868 ; GFX6-LABEL: atomic_add_i32_2darray:
869 ; GFX6: ; %bb.0: ; %main_body
870 ; GFX6-NEXT: s_mov_b32 s0, s2
871 ; GFX6-NEXT: s_mov_b32 s1, s3
872 ; GFX6-NEXT: s_mov_b32 s2, s4
873 ; GFX6-NEXT: s_mov_b32 s3, s5
874 ; GFX6-NEXT: s_mov_b32 s4, s6
875 ; GFX6-NEXT: s_mov_b32 s5, s7
876 ; GFX6-NEXT: s_mov_b32 s6, s8
877 ; GFX6-NEXT: s_mov_b32 s7, s9
878 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
879 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
880 ; GFX6-NEXT: ; return to shader part epilog
882 ; GFX8-LABEL: atomic_add_i32_2darray:
883 ; GFX8: ; %bb.0: ; %main_body
884 ; GFX8-NEXT: s_mov_b32 s0, s2
885 ; GFX8-NEXT: s_mov_b32 s1, s3
886 ; GFX8-NEXT: s_mov_b32 s2, s4
887 ; GFX8-NEXT: s_mov_b32 s3, s5
888 ; GFX8-NEXT: s_mov_b32 s4, s6
889 ; GFX8-NEXT: s_mov_b32 s5, s7
890 ; GFX8-NEXT: s_mov_b32 s6, s8
891 ; GFX8-NEXT: s_mov_b32 s7, s9
892 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
893 ; GFX8-NEXT: s_waitcnt vmcnt(0)
894 ; GFX8-NEXT: ; return to shader part epilog
896 ; GFX10-LABEL: atomic_add_i32_2darray:
897 ; GFX10: ; %bb.0: ; %main_body
898 ; GFX10-NEXT: s_mov_b32 s0, s2
899 ; GFX10-NEXT: s_mov_b32 s1, s3
900 ; GFX10-NEXT: s_mov_b32 s2, s4
901 ; GFX10-NEXT: s_mov_b32 s3, s5
902 ; GFX10-NEXT: s_mov_b32 s4, s6
903 ; GFX10-NEXT: s_mov_b32 s5, s7
904 ; GFX10-NEXT: s_mov_b32 s6, s8
905 ; GFX10-NEXT: s_mov_b32 s7, s9
906 ; GFX10-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc
907 ; GFX10-NEXT: s_waitcnt vmcnt(0)
908 ; GFX10-NEXT: ; return to shader part epilog
910 %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
911 %out = bitcast i32 %v to float
915 define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) {
916 ; GFX6-LABEL: atomic_add_i32_2dmsaa:
917 ; GFX6: ; %bb.0: ; %main_body
918 ; GFX6-NEXT: s_mov_b32 s0, s2
919 ; GFX6-NEXT: s_mov_b32 s1, s3
920 ; GFX6-NEXT: s_mov_b32 s2, s4
921 ; GFX6-NEXT: s_mov_b32 s3, s5
922 ; GFX6-NEXT: s_mov_b32 s4, s6
923 ; GFX6-NEXT: s_mov_b32 s5, s7
924 ; GFX6-NEXT: s_mov_b32 s6, s8
925 ; GFX6-NEXT: s_mov_b32 s7, s9
926 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
927 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
928 ; GFX6-NEXT: ; return to shader part epilog
930 ; GFX8-LABEL: atomic_add_i32_2dmsaa:
931 ; GFX8: ; %bb.0: ; %main_body
932 ; GFX8-NEXT: s_mov_b32 s0, s2
933 ; GFX8-NEXT: s_mov_b32 s1, s3
934 ; GFX8-NEXT: s_mov_b32 s2, s4
935 ; GFX8-NEXT: s_mov_b32 s3, s5
936 ; GFX8-NEXT: s_mov_b32 s4, s6
937 ; GFX8-NEXT: s_mov_b32 s5, s7
938 ; GFX8-NEXT: s_mov_b32 s6, s8
939 ; GFX8-NEXT: s_mov_b32 s7, s9
940 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
941 ; GFX8-NEXT: s_waitcnt vmcnt(0)
942 ; GFX8-NEXT: ; return to shader part epilog
944 ; GFX10-LABEL: atomic_add_i32_2dmsaa:
945 ; GFX10: ; %bb.0: ; %main_body
946 ; GFX10-NEXT: s_mov_b32 s0, s2
947 ; GFX10-NEXT: s_mov_b32 s1, s3
948 ; GFX10-NEXT: s_mov_b32 s2, s4
949 ; GFX10-NEXT: s_mov_b32 s3, s5
950 ; GFX10-NEXT: s_mov_b32 s4, s6
951 ; GFX10-NEXT: s_mov_b32 s5, s7
952 ; GFX10-NEXT: s_mov_b32 s6, s8
953 ; GFX10-NEXT: s_mov_b32 s7, s9
954 ; GFX10-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc
955 ; GFX10-NEXT: s_waitcnt vmcnt(0)
956 ; GFX10-NEXT: ; return to shader part epilog
958 %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
959 %out = bitcast i32 %v to float
963 define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
964 ; GFX6-LABEL: atomic_add_i32_2darraymsaa:
965 ; GFX6: ; %bb.0: ; %main_body
966 ; GFX6-NEXT: s_mov_b32 s0, s2
967 ; GFX6-NEXT: s_mov_b32 s1, s3
968 ; GFX6-NEXT: s_mov_b32 s2, s4
969 ; GFX6-NEXT: s_mov_b32 s3, s5
970 ; GFX6-NEXT: s_mov_b32 s4, s6
971 ; GFX6-NEXT: s_mov_b32 s5, s7
972 ; GFX6-NEXT: s_mov_b32 s6, s8
973 ; GFX6-NEXT: s_mov_b32 s7, s9
974 ; GFX6-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
975 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
976 ; GFX6-NEXT: ; return to shader part epilog
978 ; GFX8-LABEL: atomic_add_i32_2darraymsaa:
979 ; GFX8: ; %bb.0: ; %main_body
980 ; GFX8-NEXT: s_mov_b32 s0, s2
981 ; GFX8-NEXT: s_mov_b32 s1, s3
982 ; GFX8-NEXT: s_mov_b32 s2, s4
983 ; GFX8-NEXT: s_mov_b32 s3, s5
984 ; GFX8-NEXT: s_mov_b32 s4, s6
985 ; GFX8-NEXT: s_mov_b32 s5, s7
986 ; GFX8-NEXT: s_mov_b32 s6, s8
987 ; GFX8-NEXT: s_mov_b32 s7, s9
988 ; GFX8-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
989 ; GFX8-NEXT: s_waitcnt vmcnt(0)
990 ; GFX8-NEXT: ; return to shader part epilog
992 ; GFX10-LABEL: atomic_add_i32_2darraymsaa:
993 ; GFX10: ; %bb.0: ; %main_body
994 ; GFX10-NEXT: s_mov_b32 s0, s2
995 ; GFX10-NEXT: s_mov_b32 s1, s3
996 ; GFX10-NEXT: s_mov_b32 s2, s4
997 ; GFX10-NEXT: s_mov_b32 s3, s5
998 ; GFX10-NEXT: s_mov_b32 s4, s6
999 ; GFX10-NEXT: s_mov_b32 s5, s7
1000 ; GFX10-NEXT: s_mov_b32 s6, s8
1001 ; GFX10-NEXT: s_mov_b32 s7, s9
1002 ; GFX10-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc
1003 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1004 ; GFX10-NEXT: ; return to shader part epilog
1006 %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1007 %out = bitcast i32 %v to float
1011 define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
1012 ; GFX6-LABEL: atomic_add_i32_1d_slc:
1013 ; GFX6: ; %bb.0: ; %main_body
1014 ; GFX6-NEXT: s_mov_b32 s0, s2
1015 ; GFX6-NEXT: s_mov_b32 s1, s3
1016 ; GFX6-NEXT: s_mov_b32 s2, s4
1017 ; GFX6-NEXT: s_mov_b32 s3, s5
1018 ; GFX6-NEXT: s_mov_b32 s4, s6
1019 ; GFX6-NEXT: s_mov_b32 s5, s7
1020 ; GFX6-NEXT: s_mov_b32 s6, s8
1021 ; GFX6-NEXT: s_mov_b32 s7, s9
1022 ; GFX6-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1023 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1024 ; GFX6-NEXT: ; return to shader part epilog
1026 ; GFX8-LABEL: atomic_add_i32_1d_slc:
1027 ; GFX8: ; %bb.0: ; %main_body
1028 ; GFX8-NEXT: s_mov_b32 s0, s2
1029 ; GFX8-NEXT: s_mov_b32 s1, s3
1030 ; GFX8-NEXT: s_mov_b32 s2, s4
1031 ; GFX8-NEXT: s_mov_b32 s3, s5
1032 ; GFX8-NEXT: s_mov_b32 s4, s6
1033 ; GFX8-NEXT: s_mov_b32 s5, s7
1034 ; GFX8-NEXT: s_mov_b32 s6, s8
1035 ; GFX8-NEXT: s_mov_b32 s7, s9
1036 ; GFX8-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1037 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1038 ; GFX8-NEXT: ; return to shader part epilog
1040 ; GFX10-LABEL: atomic_add_i32_1d_slc:
1041 ; GFX10: ; %bb.0: ; %main_body
1042 ; GFX10-NEXT: s_mov_b32 s0, s2
1043 ; GFX10-NEXT: s_mov_b32 s1, s3
1044 ; GFX10-NEXT: s_mov_b32 s2, s4
1045 ; GFX10-NEXT: s_mov_b32 s3, s5
1046 ; GFX10-NEXT: s_mov_b32 s4, s6
1047 ; GFX10-NEXT: s_mov_b32 s5, s7
1048 ; GFX10-NEXT: s_mov_b32 s6, s8
1049 ; GFX10-NEXT: s_mov_b32 s7, s9
1050 ; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc
1051 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1052 ; GFX10-NEXT: ; return to shader part epilog
1054 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
1055 %out = bitcast i32 %v to float
1059 define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1060 ; GFX6-LABEL: atomic_swap_i64_1d:
1061 ; GFX6: ; %bb.0: ; %main_body
1062 ; GFX6-NEXT: s_mov_b32 s0, s2
1063 ; GFX6-NEXT: s_mov_b32 s1, s3
1064 ; GFX6-NEXT: s_mov_b32 s2, s4
1065 ; GFX6-NEXT: s_mov_b32 s3, s5
1066 ; GFX6-NEXT: s_mov_b32 s4, s6
1067 ; GFX6-NEXT: s_mov_b32 s5, s7
1068 ; GFX6-NEXT: s_mov_b32 s6, s8
1069 ; GFX6-NEXT: s_mov_b32 s7, s9
1070 ; GFX6-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1071 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1072 ; GFX6-NEXT: ; return to shader part epilog
1074 ; GFX8-LABEL: atomic_swap_i64_1d:
1075 ; GFX8: ; %bb.0: ; %main_body
1076 ; GFX8-NEXT: s_mov_b32 s0, s2
1077 ; GFX8-NEXT: s_mov_b32 s1, s3
1078 ; GFX8-NEXT: s_mov_b32 s2, s4
1079 ; GFX8-NEXT: s_mov_b32 s3, s5
1080 ; GFX8-NEXT: s_mov_b32 s4, s6
1081 ; GFX8-NEXT: s_mov_b32 s5, s7
1082 ; GFX8-NEXT: s_mov_b32 s6, s8
1083 ; GFX8-NEXT: s_mov_b32 s7, s9
1084 ; GFX8-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1085 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1086 ; GFX8-NEXT: ; return to shader part epilog
1088 ; GFX10-LABEL: atomic_swap_i64_1d:
1089 ; GFX10: ; %bb.0: ; %main_body
1090 ; GFX10-NEXT: s_mov_b32 s0, s2
1091 ; GFX10-NEXT: s_mov_b32 s1, s3
1092 ; GFX10-NEXT: s_mov_b32 s2, s4
1093 ; GFX10-NEXT: s_mov_b32 s3, s5
1094 ; GFX10-NEXT: s_mov_b32 s4, s6
1095 ; GFX10-NEXT: s_mov_b32 s5, s7
1096 ; GFX10-NEXT: s_mov_b32 s6, s8
1097 ; GFX10-NEXT: s_mov_b32 s7, s9
1098 ; GFX10-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1099 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1100 ; GFX10-NEXT: ; return to shader part epilog
1102 %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1103 %out = bitcast i64 %v to <2 x float>
1104 ret <2 x float> %out
1107 define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1108 ; GFX6-LABEL: atomic_add_i64_1d:
1109 ; GFX6: ; %bb.0: ; %main_body
1110 ; GFX6-NEXT: s_mov_b32 s0, s2
1111 ; GFX6-NEXT: s_mov_b32 s1, s3
1112 ; GFX6-NEXT: s_mov_b32 s2, s4
1113 ; GFX6-NEXT: s_mov_b32 s3, s5
1114 ; GFX6-NEXT: s_mov_b32 s4, s6
1115 ; GFX6-NEXT: s_mov_b32 s5, s7
1116 ; GFX6-NEXT: s_mov_b32 s6, s8
1117 ; GFX6-NEXT: s_mov_b32 s7, s9
1118 ; GFX6-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1119 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1120 ; GFX6-NEXT: ; return to shader part epilog
1122 ; GFX8-LABEL: atomic_add_i64_1d:
1123 ; GFX8: ; %bb.0: ; %main_body
1124 ; GFX8-NEXT: s_mov_b32 s0, s2
1125 ; GFX8-NEXT: s_mov_b32 s1, s3
1126 ; GFX8-NEXT: s_mov_b32 s2, s4
1127 ; GFX8-NEXT: s_mov_b32 s3, s5
1128 ; GFX8-NEXT: s_mov_b32 s4, s6
1129 ; GFX8-NEXT: s_mov_b32 s5, s7
1130 ; GFX8-NEXT: s_mov_b32 s6, s8
1131 ; GFX8-NEXT: s_mov_b32 s7, s9
1132 ; GFX8-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1133 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1134 ; GFX8-NEXT: ; return to shader part epilog
1136 ; GFX10-LABEL: atomic_add_i64_1d:
1137 ; GFX10: ; %bb.0: ; %main_body
1138 ; GFX10-NEXT: s_mov_b32 s0, s2
1139 ; GFX10-NEXT: s_mov_b32 s1, s3
1140 ; GFX10-NEXT: s_mov_b32 s2, s4
1141 ; GFX10-NEXT: s_mov_b32 s3, s5
1142 ; GFX10-NEXT: s_mov_b32 s4, s6
1143 ; GFX10-NEXT: s_mov_b32 s5, s7
1144 ; GFX10-NEXT: s_mov_b32 s6, s8
1145 ; GFX10-NEXT: s_mov_b32 s7, s9
1146 ; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1147 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1148 ; GFX10-NEXT: ; return to shader part epilog
1150 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1151 %out = bitcast i64 %v to <2 x float>
1152 ret <2 x float> %out
1155 define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1156 ; GFX6-LABEL: atomic_sub_i64_1d:
1157 ; GFX6: ; %bb.0: ; %main_body
1158 ; GFX6-NEXT: s_mov_b32 s0, s2
1159 ; GFX6-NEXT: s_mov_b32 s1, s3
1160 ; GFX6-NEXT: s_mov_b32 s2, s4
1161 ; GFX6-NEXT: s_mov_b32 s3, s5
1162 ; GFX6-NEXT: s_mov_b32 s4, s6
1163 ; GFX6-NEXT: s_mov_b32 s5, s7
1164 ; GFX6-NEXT: s_mov_b32 s6, s8
1165 ; GFX6-NEXT: s_mov_b32 s7, s9
1166 ; GFX6-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1167 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1168 ; GFX6-NEXT: ; return to shader part epilog
1170 ; GFX8-LABEL: atomic_sub_i64_1d:
1171 ; GFX8: ; %bb.0: ; %main_body
1172 ; GFX8-NEXT: s_mov_b32 s0, s2
1173 ; GFX8-NEXT: s_mov_b32 s1, s3
1174 ; GFX8-NEXT: s_mov_b32 s2, s4
1175 ; GFX8-NEXT: s_mov_b32 s3, s5
1176 ; GFX8-NEXT: s_mov_b32 s4, s6
1177 ; GFX8-NEXT: s_mov_b32 s5, s7
1178 ; GFX8-NEXT: s_mov_b32 s6, s8
1179 ; GFX8-NEXT: s_mov_b32 s7, s9
1180 ; GFX8-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1181 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1182 ; GFX8-NEXT: ; return to shader part epilog
1184 ; GFX10-LABEL: atomic_sub_i64_1d:
1185 ; GFX10: ; %bb.0: ; %main_body
1186 ; GFX10-NEXT: s_mov_b32 s0, s2
1187 ; GFX10-NEXT: s_mov_b32 s1, s3
1188 ; GFX10-NEXT: s_mov_b32 s2, s4
1189 ; GFX10-NEXT: s_mov_b32 s3, s5
1190 ; GFX10-NEXT: s_mov_b32 s4, s6
1191 ; GFX10-NEXT: s_mov_b32 s5, s7
1192 ; GFX10-NEXT: s_mov_b32 s6, s8
1193 ; GFX10-NEXT: s_mov_b32 s7, s9
1194 ; GFX10-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1195 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1196 ; GFX10-NEXT: ; return to shader part epilog
1198 %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1199 %out = bitcast i64 %v to <2 x float>
1200 ret <2 x float> %out
1203 define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1204 ; GFX6-LABEL: atomic_smin_i64_1d:
1205 ; GFX6: ; %bb.0: ; %main_body
1206 ; GFX6-NEXT: s_mov_b32 s0, s2
1207 ; GFX6-NEXT: s_mov_b32 s1, s3
1208 ; GFX6-NEXT: s_mov_b32 s2, s4
1209 ; GFX6-NEXT: s_mov_b32 s3, s5
1210 ; GFX6-NEXT: s_mov_b32 s4, s6
1211 ; GFX6-NEXT: s_mov_b32 s5, s7
1212 ; GFX6-NEXT: s_mov_b32 s6, s8
1213 ; GFX6-NEXT: s_mov_b32 s7, s9
1214 ; GFX6-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1215 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1216 ; GFX6-NEXT: ; return to shader part epilog
1218 ; GFX8-LABEL: atomic_smin_i64_1d:
1219 ; GFX8: ; %bb.0: ; %main_body
1220 ; GFX8-NEXT: s_mov_b32 s0, s2
1221 ; GFX8-NEXT: s_mov_b32 s1, s3
1222 ; GFX8-NEXT: s_mov_b32 s2, s4
1223 ; GFX8-NEXT: s_mov_b32 s3, s5
1224 ; GFX8-NEXT: s_mov_b32 s4, s6
1225 ; GFX8-NEXT: s_mov_b32 s5, s7
1226 ; GFX8-NEXT: s_mov_b32 s6, s8
1227 ; GFX8-NEXT: s_mov_b32 s7, s9
1228 ; GFX8-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1229 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1230 ; GFX8-NEXT: ; return to shader part epilog
1232 ; GFX10-LABEL: atomic_smin_i64_1d:
1233 ; GFX10: ; %bb.0: ; %main_body
1234 ; GFX10-NEXT: s_mov_b32 s0, s2
1235 ; GFX10-NEXT: s_mov_b32 s1, s3
1236 ; GFX10-NEXT: s_mov_b32 s2, s4
1237 ; GFX10-NEXT: s_mov_b32 s3, s5
1238 ; GFX10-NEXT: s_mov_b32 s4, s6
1239 ; GFX10-NEXT: s_mov_b32 s5, s7
1240 ; GFX10-NEXT: s_mov_b32 s6, s8
1241 ; GFX10-NEXT: s_mov_b32 s7, s9
1242 ; GFX10-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1243 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1244 ; GFX10-NEXT: ; return to shader part epilog
1246 %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1247 %out = bitcast i64 %v to <2 x float>
1248 ret <2 x float> %out
1251 define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1252 ; GFX6-LABEL: atomic_umin_i64_1d:
1253 ; GFX6: ; %bb.0: ; %main_body
1254 ; GFX6-NEXT: s_mov_b32 s0, s2
1255 ; GFX6-NEXT: s_mov_b32 s1, s3
1256 ; GFX6-NEXT: s_mov_b32 s2, s4
1257 ; GFX6-NEXT: s_mov_b32 s3, s5
1258 ; GFX6-NEXT: s_mov_b32 s4, s6
1259 ; GFX6-NEXT: s_mov_b32 s5, s7
1260 ; GFX6-NEXT: s_mov_b32 s6, s8
1261 ; GFX6-NEXT: s_mov_b32 s7, s9
1262 ; GFX6-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1263 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1264 ; GFX6-NEXT: ; return to shader part epilog
1266 ; GFX8-LABEL: atomic_umin_i64_1d:
1267 ; GFX8: ; %bb.0: ; %main_body
1268 ; GFX8-NEXT: s_mov_b32 s0, s2
1269 ; GFX8-NEXT: s_mov_b32 s1, s3
1270 ; GFX8-NEXT: s_mov_b32 s2, s4
1271 ; GFX8-NEXT: s_mov_b32 s3, s5
1272 ; GFX8-NEXT: s_mov_b32 s4, s6
1273 ; GFX8-NEXT: s_mov_b32 s5, s7
1274 ; GFX8-NEXT: s_mov_b32 s6, s8
1275 ; GFX8-NEXT: s_mov_b32 s7, s9
1276 ; GFX8-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1277 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1278 ; GFX8-NEXT: ; return to shader part epilog
1280 ; GFX10-LABEL: atomic_umin_i64_1d:
1281 ; GFX10: ; %bb.0: ; %main_body
1282 ; GFX10-NEXT: s_mov_b32 s0, s2
1283 ; GFX10-NEXT: s_mov_b32 s1, s3
1284 ; GFX10-NEXT: s_mov_b32 s2, s4
1285 ; GFX10-NEXT: s_mov_b32 s3, s5
1286 ; GFX10-NEXT: s_mov_b32 s4, s6
1287 ; GFX10-NEXT: s_mov_b32 s5, s7
1288 ; GFX10-NEXT: s_mov_b32 s6, s8
1289 ; GFX10-NEXT: s_mov_b32 s7, s9
1290 ; GFX10-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1291 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1292 ; GFX10-NEXT: ; return to shader part epilog
1294 %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1295 %out = bitcast i64 %v to <2 x float>
1296 ret <2 x float> %out
1299 define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1300 ; GFX6-LABEL: atomic_smax_i64_1d:
1301 ; GFX6: ; %bb.0: ; %main_body
1302 ; GFX6-NEXT: s_mov_b32 s0, s2
1303 ; GFX6-NEXT: s_mov_b32 s1, s3
1304 ; GFX6-NEXT: s_mov_b32 s2, s4
1305 ; GFX6-NEXT: s_mov_b32 s3, s5
1306 ; GFX6-NEXT: s_mov_b32 s4, s6
1307 ; GFX6-NEXT: s_mov_b32 s5, s7
1308 ; GFX6-NEXT: s_mov_b32 s6, s8
1309 ; GFX6-NEXT: s_mov_b32 s7, s9
1310 ; GFX6-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1311 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1312 ; GFX6-NEXT: ; return to shader part epilog
1314 ; GFX8-LABEL: atomic_smax_i64_1d:
1315 ; GFX8: ; %bb.0: ; %main_body
1316 ; GFX8-NEXT: s_mov_b32 s0, s2
1317 ; GFX8-NEXT: s_mov_b32 s1, s3
1318 ; GFX8-NEXT: s_mov_b32 s2, s4
1319 ; GFX8-NEXT: s_mov_b32 s3, s5
1320 ; GFX8-NEXT: s_mov_b32 s4, s6
1321 ; GFX8-NEXT: s_mov_b32 s5, s7
1322 ; GFX8-NEXT: s_mov_b32 s6, s8
1323 ; GFX8-NEXT: s_mov_b32 s7, s9
1324 ; GFX8-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1325 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1326 ; GFX8-NEXT: ; return to shader part epilog
1328 ; GFX10-LABEL: atomic_smax_i64_1d:
1329 ; GFX10: ; %bb.0: ; %main_body
1330 ; GFX10-NEXT: s_mov_b32 s0, s2
1331 ; GFX10-NEXT: s_mov_b32 s1, s3
1332 ; GFX10-NEXT: s_mov_b32 s2, s4
1333 ; GFX10-NEXT: s_mov_b32 s3, s5
1334 ; GFX10-NEXT: s_mov_b32 s4, s6
1335 ; GFX10-NEXT: s_mov_b32 s5, s7
1336 ; GFX10-NEXT: s_mov_b32 s6, s8
1337 ; GFX10-NEXT: s_mov_b32 s7, s9
1338 ; GFX10-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1339 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1340 ; GFX10-NEXT: ; return to shader part epilog
1342 %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1343 %out = bitcast i64 %v to <2 x float>
1344 ret <2 x float> %out
1347 define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1348 ; GFX6-LABEL: atomic_umax_i64_1d:
1349 ; GFX6: ; %bb.0: ; %main_body
1350 ; GFX6-NEXT: s_mov_b32 s0, s2
1351 ; GFX6-NEXT: s_mov_b32 s1, s3
1352 ; GFX6-NEXT: s_mov_b32 s2, s4
1353 ; GFX6-NEXT: s_mov_b32 s3, s5
1354 ; GFX6-NEXT: s_mov_b32 s4, s6
1355 ; GFX6-NEXT: s_mov_b32 s5, s7
1356 ; GFX6-NEXT: s_mov_b32 s6, s8
1357 ; GFX6-NEXT: s_mov_b32 s7, s9
1358 ; GFX6-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1359 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1360 ; GFX6-NEXT: ; return to shader part epilog
1362 ; GFX8-LABEL: atomic_umax_i64_1d:
1363 ; GFX8: ; %bb.0: ; %main_body
1364 ; GFX8-NEXT: s_mov_b32 s0, s2
1365 ; GFX8-NEXT: s_mov_b32 s1, s3
1366 ; GFX8-NEXT: s_mov_b32 s2, s4
1367 ; GFX8-NEXT: s_mov_b32 s3, s5
1368 ; GFX8-NEXT: s_mov_b32 s4, s6
1369 ; GFX8-NEXT: s_mov_b32 s5, s7
1370 ; GFX8-NEXT: s_mov_b32 s6, s8
1371 ; GFX8-NEXT: s_mov_b32 s7, s9
1372 ; GFX8-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1373 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1374 ; GFX8-NEXT: ; return to shader part epilog
1376 ; GFX10-LABEL: atomic_umax_i64_1d:
1377 ; GFX10: ; %bb.0: ; %main_body
1378 ; GFX10-NEXT: s_mov_b32 s0, s2
1379 ; GFX10-NEXT: s_mov_b32 s1, s3
1380 ; GFX10-NEXT: s_mov_b32 s2, s4
1381 ; GFX10-NEXT: s_mov_b32 s3, s5
1382 ; GFX10-NEXT: s_mov_b32 s4, s6
1383 ; GFX10-NEXT: s_mov_b32 s5, s7
1384 ; GFX10-NEXT: s_mov_b32 s6, s8
1385 ; GFX10-NEXT: s_mov_b32 s7, s9
1386 ; GFX10-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1387 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1388 ; GFX10-NEXT: ; return to shader part epilog
1390 %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1391 %out = bitcast i64 %v to <2 x float>
1392 ret <2 x float> %out
1395 define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1396 ; GFX6-LABEL: atomic_and_i64_1d:
1397 ; GFX6: ; %bb.0: ; %main_body
1398 ; GFX6-NEXT: s_mov_b32 s0, s2
1399 ; GFX6-NEXT: s_mov_b32 s1, s3
1400 ; GFX6-NEXT: s_mov_b32 s2, s4
1401 ; GFX6-NEXT: s_mov_b32 s3, s5
1402 ; GFX6-NEXT: s_mov_b32 s4, s6
1403 ; GFX6-NEXT: s_mov_b32 s5, s7
1404 ; GFX6-NEXT: s_mov_b32 s6, s8
1405 ; GFX6-NEXT: s_mov_b32 s7, s9
1406 ; GFX6-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1407 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1408 ; GFX6-NEXT: ; return to shader part epilog
1410 ; GFX8-LABEL: atomic_and_i64_1d:
1411 ; GFX8: ; %bb.0: ; %main_body
1412 ; GFX8-NEXT: s_mov_b32 s0, s2
1413 ; GFX8-NEXT: s_mov_b32 s1, s3
1414 ; GFX8-NEXT: s_mov_b32 s2, s4
1415 ; GFX8-NEXT: s_mov_b32 s3, s5
1416 ; GFX8-NEXT: s_mov_b32 s4, s6
1417 ; GFX8-NEXT: s_mov_b32 s5, s7
1418 ; GFX8-NEXT: s_mov_b32 s6, s8
1419 ; GFX8-NEXT: s_mov_b32 s7, s9
1420 ; GFX8-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1421 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1422 ; GFX8-NEXT: ; return to shader part epilog
1424 ; GFX10-LABEL: atomic_and_i64_1d:
1425 ; GFX10: ; %bb.0: ; %main_body
1426 ; GFX10-NEXT: s_mov_b32 s0, s2
1427 ; GFX10-NEXT: s_mov_b32 s1, s3
1428 ; GFX10-NEXT: s_mov_b32 s2, s4
1429 ; GFX10-NEXT: s_mov_b32 s3, s5
1430 ; GFX10-NEXT: s_mov_b32 s4, s6
1431 ; GFX10-NEXT: s_mov_b32 s5, s7
1432 ; GFX10-NEXT: s_mov_b32 s6, s8
1433 ; GFX10-NEXT: s_mov_b32 s7, s9
1434 ; GFX10-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1435 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1436 ; GFX10-NEXT: ; return to shader part epilog
1438 %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1439 %out = bitcast i64 %v to <2 x float>
1440 ret <2 x float> %out
1443 define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1444 ; GFX6-LABEL: atomic_or_i64_1d:
1445 ; GFX6: ; %bb.0: ; %main_body
1446 ; GFX6-NEXT: s_mov_b32 s0, s2
1447 ; GFX6-NEXT: s_mov_b32 s1, s3
1448 ; GFX6-NEXT: s_mov_b32 s2, s4
1449 ; GFX6-NEXT: s_mov_b32 s3, s5
1450 ; GFX6-NEXT: s_mov_b32 s4, s6
1451 ; GFX6-NEXT: s_mov_b32 s5, s7
1452 ; GFX6-NEXT: s_mov_b32 s6, s8
1453 ; GFX6-NEXT: s_mov_b32 s7, s9
1454 ; GFX6-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1455 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1456 ; GFX6-NEXT: ; return to shader part epilog
1458 ; GFX8-LABEL: atomic_or_i64_1d:
1459 ; GFX8: ; %bb.0: ; %main_body
1460 ; GFX8-NEXT: s_mov_b32 s0, s2
1461 ; GFX8-NEXT: s_mov_b32 s1, s3
1462 ; GFX8-NEXT: s_mov_b32 s2, s4
1463 ; GFX8-NEXT: s_mov_b32 s3, s5
1464 ; GFX8-NEXT: s_mov_b32 s4, s6
1465 ; GFX8-NEXT: s_mov_b32 s5, s7
1466 ; GFX8-NEXT: s_mov_b32 s6, s8
1467 ; GFX8-NEXT: s_mov_b32 s7, s9
1468 ; GFX8-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1469 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1470 ; GFX8-NEXT: ; return to shader part epilog
1472 ; GFX10-LABEL: atomic_or_i64_1d:
1473 ; GFX10: ; %bb.0: ; %main_body
1474 ; GFX10-NEXT: s_mov_b32 s0, s2
1475 ; GFX10-NEXT: s_mov_b32 s1, s3
1476 ; GFX10-NEXT: s_mov_b32 s2, s4
1477 ; GFX10-NEXT: s_mov_b32 s3, s5
1478 ; GFX10-NEXT: s_mov_b32 s4, s6
1479 ; GFX10-NEXT: s_mov_b32 s5, s7
1480 ; GFX10-NEXT: s_mov_b32 s6, s8
1481 ; GFX10-NEXT: s_mov_b32 s7, s9
1482 ; GFX10-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1483 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1484 ; GFX10-NEXT: ; return to shader part epilog
1486 %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1487 %out = bitcast i64 %v to <2 x float>
1488 ret <2 x float> %out
1491 define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1492 ; GFX6-LABEL: atomic_xor_i64_1d:
1493 ; GFX6: ; %bb.0: ; %main_body
1494 ; GFX6-NEXT: s_mov_b32 s0, s2
1495 ; GFX6-NEXT: s_mov_b32 s1, s3
1496 ; GFX6-NEXT: s_mov_b32 s2, s4
1497 ; GFX6-NEXT: s_mov_b32 s3, s5
1498 ; GFX6-NEXT: s_mov_b32 s4, s6
1499 ; GFX6-NEXT: s_mov_b32 s5, s7
1500 ; GFX6-NEXT: s_mov_b32 s6, s8
1501 ; GFX6-NEXT: s_mov_b32 s7, s9
1502 ; GFX6-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1503 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1504 ; GFX6-NEXT: ; return to shader part epilog
1506 ; GFX8-LABEL: atomic_xor_i64_1d:
1507 ; GFX8: ; %bb.0: ; %main_body
1508 ; GFX8-NEXT: s_mov_b32 s0, s2
1509 ; GFX8-NEXT: s_mov_b32 s1, s3
1510 ; GFX8-NEXT: s_mov_b32 s2, s4
1511 ; GFX8-NEXT: s_mov_b32 s3, s5
1512 ; GFX8-NEXT: s_mov_b32 s4, s6
1513 ; GFX8-NEXT: s_mov_b32 s5, s7
1514 ; GFX8-NEXT: s_mov_b32 s6, s8
1515 ; GFX8-NEXT: s_mov_b32 s7, s9
1516 ; GFX8-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1517 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1518 ; GFX8-NEXT: ; return to shader part epilog
1520 ; GFX10-LABEL: atomic_xor_i64_1d:
1521 ; GFX10: ; %bb.0: ; %main_body
1522 ; GFX10-NEXT: s_mov_b32 s0, s2
1523 ; GFX10-NEXT: s_mov_b32 s1, s3
1524 ; GFX10-NEXT: s_mov_b32 s2, s4
1525 ; GFX10-NEXT: s_mov_b32 s3, s5
1526 ; GFX10-NEXT: s_mov_b32 s4, s6
1527 ; GFX10-NEXT: s_mov_b32 s5, s7
1528 ; GFX10-NEXT: s_mov_b32 s6, s8
1529 ; GFX10-NEXT: s_mov_b32 s7, s9
1530 ; GFX10-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1531 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1532 ; GFX10-NEXT: ; return to shader part epilog
1534 %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1535 %out = bitcast i64 %v to <2 x float>
1536 ret <2 x float> %out
1539 define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1540 ; GFX6-LABEL: atomic_inc_i64_1d:
1541 ; GFX6: ; %bb.0: ; %main_body
1542 ; GFX6-NEXT: s_mov_b32 s0, s2
1543 ; GFX6-NEXT: s_mov_b32 s1, s3
1544 ; GFX6-NEXT: s_mov_b32 s2, s4
1545 ; GFX6-NEXT: s_mov_b32 s3, s5
1546 ; GFX6-NEXT: s_mov_b32 s4, s6
1547 ; GFX6-NEXT: s_mov_b32 s5, s7
1548 ; GFX6-NEXT: s_mov_b32 s6, s8
1549 ; GFX6-NEXT: s_mov_b32 s7, s9
1550 ; GFX6-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1551 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1552 ; GFX6-NEXT: ; return to shader part epilog
1554 ; GFX8-LABEL: atomic_inc_i64_1d:
1555 ; GFX8: ; %bb.0: ; %main_body
1556 ; GFX8-NEXT: s_mov_b32 s0, s2
1557 ; GFX8-NEXT: s_mov_b32 s1, s3
1558 ; GFX8-NEXT: s_mov_b32 s2, s4
1559 ; GFX8-NEXT: s_mov_b32 s3, s5
1560 ; GFX8-NEXT: s_mov_b32 s4, s6
1561 ; GFX8-NEXT: s_mov_b32 s5, s7
1562 ; GFX8-NEXT: s_mov_b32 s6, s8
1563 ; GFX8-NEXT: s_mov_b32 s7, s9
1564 ; GFX8-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1565 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1566 ; GFX8-NEXT: ; return to shader part epilog
1568 ; GFX10-LABEL: atomic_inc_i64_1d:
1569 ; GFX10: ; %bb.0: ; %main_body
1570 ; GFX10-NEXT: s_mov_b32 s0, s2
1571 ; GFX10-NEXT: s_mov_b32 s1, s3
1572 ; GFX10-NEXT: s_mov_b32 s2, s4
1573 ; GFX10-NEXT: s_mov_b32 s3, s5
1574 ; GFX10-NEXT: s_mov_b32 s4, s6
1575 ; GFX10-NEXT: s_mov_b32 s5, s7
1576 ; GFX10-NEXT: s_mov_b32 s6, s8
1577 ; GFX10-NEXT: s_mov_b32 s7, s9
1578 ; GFX10-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1579 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1580 ; GFX10-NEXT: ; return to shader part epilog
1582 %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1583 %out = bitcast i64 %v to <2 x float>
1584 ret <2 x float> %out
1587 define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1588 ; GFX6-LABEL: atomic_dec_i64_1d:
1589 ; GFX6: ; %bb.0: ; %main_body
1590 ; GFX6-NEXT: s_mov_b32 s0, s2
1591 ; GFX6-NEXT: s_mov_b32 s1, s3
1592 ; GFX6-NEXT: s_mov_b32 s2, s4
1593 ; GFX6-NEXT: s_mov_b32 s3, s5
1594 ; GFX6-NEXT: s_mov_b32 s4, s6
1595 ; GFX6-NEXT: s_mov_b32 s5, s7
1596 ; GFX6-NEXT: s_mov_b32 s6, s8
1597 ; GFX6-NEXT: s_mov_b32 s7, s9
1598 ; GFX6-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1599 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1600 ; GFX6-NEXT: ; return to shader part epilog
1602 ; GFX8-LABEL: atomic_dec_i64_1d:
1603 ; GFX8: ; %bb.0: ; %main_body
1604 ; GFX8-NEXT: s_mov_b32 s0, s2
1605 ; GFX8-NEXT: s_mov_b32 s1, s3
1606 ; GFX8-NEXT: s_mov_b32 s2, s4
1607 ; GFX8-NEXT: s_mov_b32 s3, s5
1608 ; GFX8-NEXT: s_mov_b32 s4, s6
1609 ; GFX8-NEXT: s_mov_b32 s5, s7
1610 ; GFX8-NEXT: s_mov_b32 s6, s8
1611 ; GFX8-NEXT: s_mov_b32 s7, s9
1612 ; GFX8-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1613 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1614 ; GFX8-NEXT: ; return to shader part epilog
1616 ; GFX10-LABEL: atomic_dec_i64_1d:
1617 ; GFX10: ; %bb.0: ; %main_body
1618 ; GFX10-NEXT: s_mov_b32 s0, s2
1619 ; GFX10-NEXT: s_mov_b32 s1, s3
1620 ; GFX10-NEXT: s_mov_b32 s2, s4
1621 ; GFX10-NEXT: s_mov_b32 s3, s5
1622 ; GFX10-NEXT: s_mov_b32 s4, s6
1623 ; GFX10-NEXT: s_mov_b32 s5, s7
1624 ; GFX10-NEXT: s_mov_b32 s6, s8
1625 ; GFX10-NEXT: s_mov_b32 s7, s9
1626 ; GFX10-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1627 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1628 ; GFX10-NEXT: ; return to shader part epilog
1630 %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1631 %out = bitcast i64 %v to <2 x float>
1632 ret <2 x float> %out
1635 define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
1636 ; GFX6-LABEL: atomic_cmpswap_i64_1d:
1637 ; GFX6: ; %bb.0: ; %main_body
1638 ; GFX6-NEXT: s_mov_b32 s0, s2
1639 ; GFX6-NEXT: s_mov_b32 s1, s3
1640 ; GFX6-NEXT: s_mov_b32 s2, s4
1641 ; GFX6-NEXT: s_mov_b32 s3, s5
1642 ; GFX6-NEXT: s_mov_b32 s4, s6
1643 ; GFX6-NEXT: s_mov_b32 s5, s7
1644 ; GFX6-NEXT: s_mov_b32 s6, s8
1645 ; GFX6-NEXT: s_mov_b32 s7, s9
1646 ; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
1647 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1648 ; GFX6-NEXT: ; return to shader part epilog
1650 ; GFX8-LABEL: atomic_cmpswap_i64_1d:
1651 ; GFX8: ; %bb.0: ; %main_body
1652 ; GFX8-NEXT: s_mov_b32 s0, s2
1653 ; GFX8-NEXT: s_mov_b32 s1, s3
1654 ; GFX8-NEXT: s_mov_b32 s2, s4
1655 ; GFX8-NEXT: s_mov_b32 s3, s5
1656 ; GFX8-NEXT: s_mov_b32 s4, s6
1657 ; GFX8-NEXT: s_mov_b32 s5, s7
1658 ; GFX8-NEXT: s_mov_b32 s6, s8
1659 ; GFX8-NEXT: s_mov_b32 s7, s9
1660 ; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
1661 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1662 ; GFX8-NEXT: ; return to shader part epilog
1664 ; GFX10-LABEL: atomic_cmpswap_i64_1d:
1665 ; GFX10: ; %bb.0: ; %main_body
1666 ; GFX10-NEXT: s_mov_b32 s0, s2
1667 ; GFX10-NEXT: s_mov_b32 s1, s3
1668 ; GFX10-NEXT: s_mov_b32 s2, s4
1669 ; GFX10-NEXT: s_mov_b32 s3, s5
1670 ; GFX10-NEXT: s_mov_b32 s4, s6
1671 ; GFX10-NEXT: s_mov_b32 s5, s7
1672 ; GFX10-NEXT: s_mov_b32 s6, s8
1673 ; GFX10-NEXT: s_mov_b32 s7, s9
1674 ; GFX10-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
1675 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1676 ; GFX10-NEXT: ; return to shader part epilog
1678 %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1679 %out = bitcast i64 %v to <2 x float>
1680 ret <2 x float> %out
1683 define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
1684 ; GFX6-LABEL: atomic_cmpswap_i64_1d_no_return:
1685 ; GFX6: ; %bb.0: ; %main_body
1686 ; GFX6-NEXT: s_mov_b32 s0, s2
1687 ; GFX6-NEXT: s_mov_b32 s1, s3
1688 ; GFX6-NEXT: s_mov_b32 s2, s4
1689 ; GFX6-NEXT: s_mov_b32 s3, s5
1690 ; GFX6-NEXT: s_mov_b32 s4, s6
1691 ; GFX6-NEXT: s_mov_b32 s5, s7
1692 ; GFX6-NEXT: s_mov_b32 s6, s8
1693 ; GFX6-NEXT: s_mov_b32 s7, s9
1694 ; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
1695 ; GFX6-NEXT: s_endpgm
1697 ; GFX8-LABEL: atomic_cmpswap_i64_1d_no_return:
1698 ; GFX8: ; %bb.0: ; %main_body
1699 ; GFX8-NEXT: s_mov_b32 s0, s2
1700 ; GFX8-NEXT: s_mov_b32 s1, s3
1701 ; GFX8-NEXT: s_mov_b32 s2, s4
1702 ; GFX8-NEXT: s_mov_b32 s3, s5
1703 ; GFX8-NEXT: s_mov_b32 s4, s6
1704 ; GFX8-NEXT: s_mov_b32 s5, s7
1705 ; GFX8-NEXT: s_mov_b32 s6, s8
1706 ; GFX8-NEXT: s_mov_b32 s7, s9
1707 ; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
1708 ; GFX8-NEXT: s_endpgm
1710 ; GFX10-LABEL: atomic_cmpswap_i64_1d_no_return:
1711 ; GFX10: ; %bb.0: ; %main_body
1712 ; GFX10-NEXT: s_mov_b32 s0, s2
1713 ; GFX10-NEXT: s_mov_b32 s1, s3
1714 ; GFX10-NEXT: s_mov_b32 s2, s4
1715 ; GFX10-NEXT: s_mov_b32 s3, s5
1716 ; GFX10-NEXT: s_mov_b32 s4, s6
1717 ; GFX10-NEXT: s_mov_b32 s5, s7
1718 ; GFX10-NEXT: s_mov_b32 s6, s8
1719 ; GFX10-NEXT: s_mov_b32 s7, s9
1720 ; GFX10-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
1721 ; GFX10-NEXT: s_endpgm
1723 %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1727 define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t) {
1728 ; GFX6-LABEL: atomic_add_i64_2d:
1729 ; GFX6: ; %bb.0: ; %main_body
1730 ; GFX6-NEXT: s_mov_b32 s0, s2
1731 ; GFX6-NEXT: s_mov_b32 s1, s3
1732 ; GFX6-NEXT: s_mov_b32 s2, s4
1733 ; GFX6-NEXT: s_mov_b32 s3, s5
1734 ; GFX6-NEXT: s_mov_b32 s4, s6
1735 ; GFX6-NEXT: s_mov_b32 s5, s7
1736 ; GFX6-NEXT: s_mov_b32 s6, s8
1737 ; GFX6-NEXT: s_mov_b32 s7, s9
1738 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
1739 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1740 ; GFX6-NEXT: ; return to shader part epilog
1742 ; GFX8-LABEL: atomic_add_i64_2d:
1743 ; GFX8: ; %bb.0: ; %main_body
1744 ; GFX8-NEXT: s_mov_b32 s0, s2
1745 ; GFX8-NEXT: s_mov_b32 s1, s3
1746 ; GFX8-NEXT: s_mov_b32 s2, s4
1747 ; GFX8-NEXT: s_mov_b32 s3, s5
1748 ; GFX8-NEXT: s_mov_b32 s4, s6
1749 ; GFX8-NEXT: s_mov_b32 s5, s7
1750 ; GFX8-NEXT: s_mov_b32 s6, s8
1751 ; GFX8-NEXT: s_mov_b32 s7, s9
1752 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
1753 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1754 ; GFX8-NEXT: ; return to shader part epilog
1756 ; GFX10-LABEL: atomic_add_i64_2d:
1757 ; GFX10: ; %bb.0: ; %main_body
1758 ; GFX10-NEXT: s_mov_b32 s0, s2
1759 ; GFX10-NEXT: s_mov_b32 s1, s3
1760 ; GFX10-NEXT: s_mov_b32 s2, s4
1761 ; GFX10-NEXT: s_mov_b32 s3, s5
1762 ; GFX10-NEXT: s_mov_b32 s4, s6
1763 ; GFX10-NEXT: s_mov_b32 s5, s7
1764 ; GFX10-NEXT: s_mov_b32 s6, s8
1765 ; GFX10-NEXT: s_mov_b32 s7, s9
1766 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc
1767 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1768 ; GFX10-NEXT: ; return to shader part epilog
1770 %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
1771 %out = bitcast i64 %v to <2 x float>
1772 ret <2 x float> %out
1775 define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %r) {
1776 ; GFX6-LABEL: atomic_add_i64_3d:
1777 ; GFX6: ; %bb.0: ; %main_body
1778 ; GFX6-NEXT: s_mov_b32 s0, s2
1779 ; GFX6-NEXT: s_mov_b32 s1, s3
1780 ; GFX6-NEXT: s_mov_b32 s2, s4
1781 ; GFX6-NEXT: s_mov_b32 s3, s5
1782 ; GFX6-NEXT: s_mov_b32 s4, s6
1783 ; GFX6-NEXT: s_mov_b32 s5, s7
1784 ; GFX6-NEXT: s_mov_b32 s6, s8
1785 ; GFX6-NEXT: s_mov_b32 s7, s9
1786 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
1787 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1788 ; GFX6-NEXT: ; return to shader part epilog
1790 ; GFX8-LABEL: atomic_add_i64_3d:
1791 ; GFX8: ; %bb.0: ; %main_body
1792 ; GFX8-NEXT: s_mov_b32 s0, s2
1793 ; GFX8-NEXT: s_mov_b32 s1, s3
1794 ; GFX8-NEXT: s_mov_b32 s2, s4
1795 ; GFX8-NEXT: s_mov_b32 s3, s5
1796 ; GFX8-NEXT: s_mov_b32 s4, s6
1797 ; GFX8-NEXT: s_mov_b32 s5, s7
1798 ; GFX8-NEXT: s_mov_b32 s6, s8
1799 ; GFX8-NEXT: s_mov_b32 s7, s9
1800 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
1801 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1802 ; GFX8-NEXT: ; return to shader part epilog
1804 ; GFX10-LABEL: atomic_add_i64_3d:
1805 ; GFX10: ; %bb.0: ; %main_body
1806 ; GFX10-NEXT: s_mov_b32 s0, s2
1807 ; GFX10-NEXT: s_mov_b32 s1, s3
1808 ; GFX10-NEXT: s_mov_b32 s2, s4
1809 ; GFX10-NEXT: s_mov_b32 s3, s5
1810 ; GFX10-NEXT: s_mov_b32 s4, s6
1811 ; GFX10-NEXT: s_mov_b32 s5, s7
1812 ; GFX10-NEXT: s_mov_b32 s6, s8
1813 ; GFX10-NEXT: s_mov_b32 s7, s9
1814 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc
1815 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1816 ; GFX10-NEXT: ; return to shader part epilog
1818 %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
1819 %out = bitcast i64 %v to <2 x float>
1820 ret <2 x float> %out
1823 define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %face) {
1824 ; GFX6-LABEL: atomic_add_i64_cube:
1825 ; GFX6: ; %bb.0: ; %main_body
1826 ; GFX6-NEXT: s_mov_b32 s0, s2
1827 ; GFX6-NEXT: s_mov_b32 s1, s3
1828 ; GFX6-NEXT: s_mov_b32 s2, s4
1829 ; GFX6-NEXT: s_mov_b32 s3, s5
1830 ; GFX6-NEXT: s_mov_b32 s4, s6
1831 ; GFX6-NEXT: s_mov_b32 s5, s7
1832 ; GFX6-NEXT: s_mov_b32 s6, s8
1833 ; GFX6-NEXT: s_mov_b32 s7, s9
1834 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
1835 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1836 ; GFX6-NEXT: ; return to shader part epilog
1838 ; GFX8-LABEL: atomic_add_i64_cube:
1839 ; GFX8: ; %bb.0: ; %main_body
1840 ; GFX8-NEXT: s_mov_b32 s0, s2
1841 ; GFX8-NEXT: s_mov_b32 s1, s3
1842 ; GFX8-NEXT: s_mov_b32 s2, s4
1843 ; GFX8-NEXT: s_mov_b32 s3, s5
1844 ; GFX8-NEXT: s_mov_b32 s4, s6
1845 ; GFX8-NEXT: s_mov_b32 s5, s7
1846 ; GFX8-NEXT: s_mov_b32 s6, s8
1847 ; GFX8-NEXT: s_mov_b32 s7, s9
1848 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
1849 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1850 ; GFX8-NEXT: ; return to shader part epilog
1852 ; GFX10-LABEL: atomic_add_i64_cube:
1853 ; GFX10: ; %bb.0: ; %main_body
1854 ; GFX10-NEXT: s_mov_b32 s0, s2
1855 ; GFX10-NEXT: s_mov_b32 s1, s3
1856 ; GFX10-NEXT: s_mov_b32 s2, s4
1857 ; GFX10-NEXT: s_mov_b32 s3, s5
1858 ; GFX10-NEXT: s_mov_b32 s4, s6
1859 ; GFX10-NEXT: s_mov_b32 s5, s7
1860 ; GFX10-NEXT: s_mov_b32 s6, s8
1861 ; GFX10-NEXT: s_mov_b32 s7, s9
1862 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc
1863 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1864 ; GFX10-NEXT: ; return to shader part epilog
1866 %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
1867 %out = bitcast i64 %v to <2 x float>
1868 ret <2 x float> %out
1871 define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %slice) {
1872 ; GFX6-LABEL: atomic_add_i64_1darray:
1873 ; GFX6: ; %bb.0: ; %main_body
1874 ; GFX6-NEXT: s_mov_b32 s0, s2
1875 ; GFX6-NEXT: s_mov_b32 s1, s3
1876 ; GFX6-NEXT: s_mov_b32 s2, s4
1877 ; GFX6-NEXT: s_mov_b32 s3, s5
1878 ; GFX6-NEXT: s_mov_b32 s4, s6
1879 ; GFX6-NEXT: s_mov_b32 s5, s7
1880 ; GFX6-NEXT: s_mov_b32 s6, s8
1881 ; GFX6-NEXT: s_mov_b32 s7, s9
1882 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
1883 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1884 ; GFX6-NEXT: ; return to shader part epilog
1886 ; GFX8-LABEL: atomic_add_i64_1darray:
1887 ; GFX8: ; %bb.0: ; %main_body
1888 ; GFX8-NEXT: s_mov_b32 s0, s2
1889 ; GFX8-NEXT: s_mov_b32 s1, s3
1890 ; GFX8-NEXT: s_mov_b32 s2, s4
1891 ; GFX8-NEXT: s_mov_b32 s3, s5
1892 ; GFX8-NEXT: s_mov_b32 s4, s6
1893 ; GFX8-NEXT: s_mov_b32 s5, s7
1894 ; GFX8-NEXT: s_mov_b32 s6, s8
1895 ; GFX8-NEXT: s_mov_b32 s7, s9
1896 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
1897 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1898 ; GFX8-NEXT: ; return to shader part epilog
1900 ; GFX10-LABEL: atomic_add_i64_1darray:
1901 ; GFX10: ; %bb.0: ; %main_body
1902 ; GFX10-NEXT: s_mov_b32 s0, s2
1903 ; GFX10-NEXT: s_mov_b32 s1, s3
1904 ; GFX10-NEXT: s_mov_b32 s2, s4
1905 ; GFX10-NEXT: s_mov_b32 s3, s5
1906 ; GFX10-NEXT: s_mov_b32 s4, s6
1907 ; GFX10-NEXT: s_mov_b32 s5, s7
1908 ; GFX10-NEXT: s_mov_b32 s6, s8
1909 ; GFX10-NEXT: s_mov_b32 s7, s9
1910 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc
1911 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1912 ; GFX10-NEXT: ; return to shader part epilog
1914 %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1915 %out = bitcast i64 %v to <2 x float>
1916 ret <2 x float> %out
1919 define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice) {
1920 ; GFX6-LABEL: atomic_add_i64_2darray:
1921 ; GFX6: ; %bb.0: ; %main_body
1922 ; GFX6-NEXT: s_mov_b32 s0, s2
1923 ; GFX6-NEXT: s_mov_b32 s1, s3
1924 ; GFX6-NEXT: s_mov_b32 s2, s4
1925 ; GFX6-NEXT: s_mov_b32 s3, s5
1926 ; GFX6-NEXT: s_mov_b32 s4, s6
1927 ; GFX6-NEXT: s_mov_b32 s5, s7
1928 ; GFX6-NEXT: s_mov_b32 s6, s8
1929 ; GFX6-NEXT: s_mov_b32 s7, s9
1930 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
1931 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1932 ; GFX6-NEXT: ; return to shader part epilog
1934 ; GFX8-LABEL: atomic_add_i64_2darray:
1935 ; GFX8: ; %bb.0: ; %main_body
1936 ; GFX8-NEXT: s_mov_b32 s0, s2
1937 ; GFX8-NEXT: s_mov_b32 s1, s3
1938 ; GFX8-NEXT: s_mov_b32 s2, s4
1939 ; GFX8-NEXT: s_mov_b32 s3, s5
1940 ; GFX8-NEXT: s_mov_b32 s4, s6
1941 ; GFX8-NEXT: s_mov_b32 s5, s7
1942 ; GFX8-NEXT: s_mov_b32 s6, s8
1943 ; GFX8-NEXT: s_mov_b32 s7, s9
1944 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
1945 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1946 ; GFX8-NEXT: ; return to shader part epilog
1948 ; GFX10-LABEL: atomic_add_i64_2darray:
1949 ; GFX10: ; %bb.0: ; %main_body
1950 ; GFX10-NEXT: s_mov_b32 s0, s2
1951 ; GFX10-NEXT: s_mov_b32 s1, s3
1952 ; GFX10-NEXT: s_mov_b32 s2, s4
1953 ; GFX10-NEXT: s_mov_b32 s3, s5
1954 ; GFX10-NEXT: s_mov_b32 s4, s6
1955 ; GFX10-NEXT: s_mov_b32 s5, s7
1956 ; GFX10-NEXT: s_mov_b32 s6, s8
1957 ; GFX10-NEXT: s_mov_b32 s7, s9
1958 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc
1959 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1960 ; GFX10-NEXT: ; return to shader part epilog
1962 %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1963 %out = bitcast i64 %v to <2 x float>
1964 ret <2 x float> %out
1967 define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %fragid) {
1968 ; GFX6-LABEL: atomic_add_i64_2dmsaa:
1969 ; GFX6: ; %bb.0: ; %main_body
1970 ; GFX6-NEXT: s_mov_b32 s0, s2
1971 ; GFX6-NEXT: s_mov_b32 s1, s3
1972 ; GFX6-NEXT: s_mov_b32 s2, s4
1973 ; GFX6-NEXT: s_mov_b32 s3, s5
1974 ; GFX6-NEXT: s_mov_b32 s4, s6
1975 ; GFX6-NEXT: s_mov_b32 s5, s7
1976 ; GFX6-NEXT: s_mov_b32 s6, s8
1977 ; GFX6-NEXT: s_mov_b32 s7, s9
1978 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
1979 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1980 ; GFX6-NEXT: ; return to shader part epilog
1982 ; GFX8-LABEL: atomic_add_i64_2dmsaa:
1983 ; GFX8: ; %bb.0: ; %main_body
1984 ; GFX8-NEXT: s_mov_b32 s0, s2
1985 ; GFX8-NEXT: s_mov_b32 s1, s3
1986 ; GFX8-NEXT: s_mov_b32 s2, s4
1987 ; GFX8-NEXT: s_mov_b32 s3, s5
1988 ; GFX8-NEXT: s_mov_b32 s4, s6
1989 ; GFX8-NEXT: s_mov_b32 s5, s7
1990 ; GFX8-NEXT: s_mov_b32 s6, s8
1991 ; GFX8-NEXT: s_mov_b32 s7, s9
1992 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
1993 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1994 ; GFX8-NEXT: ; return to shader part epilog
1996 ; GFX10-LABEL: atomic_add_i64_2dmsaa:
1997 ; GFX10: ; %bb.0: ; %main_body
1998 ; GFX10-NEXT: s_mov_b32 s0, s2
1999 ; GFX10-NEXT: s_mov_b32 s1, s3
2000 ; GFX10-NEXT: s_mov_b32 s2, s4
2001 ; GFX10-NEXT: s_mov_b32 s3, s5
2002 ; GFX10-NEXT: s_mov_b32 s4, s6
2003 ; GFX10-NEXT: s_mov_b32 s5, s7
2004 ; GFX10-NEXT: s_mov_b32 s6, s8
2005 ; GFX10-NEXT: s_mov_b32 s7, s9
2006 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc
2007 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2008 ; GFX10-NEXT: ; return to shader part epilog
2010 %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
2011 %out = bitcast i64 %v to <2 x float>
2012 ret <2 x float> %out
2015 define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
2016 ; GFX6-LABEL: atomic_add_i64_2darraymsaa:
2017 ; GFX6: ; %bb.0: ; %main_body
2018 ; GFX6-NEXT: s_mov_b32 s0, s2
2019 ; GFX6-NEXT: s_mov_b32 s1, s3
2020 ; GFX6-NEXT: s_mov_b32 s2, s4
2021 ; GFX6-NEXT: s_mov_b32 s3, s5
2022 ; GFX6-NEXT: s_mov_b32 s4, s6
2023 ; GFX6-NEXT: s_mov_b32 s5, s7
2024 ; GFX6-NEXT: s_mov_b32 s6, s8
2025 ; GFX6-NEXT: s_mov_b32 s7, s9
2026 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
2027 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2028 ; GFX6-NEXT: ; return to shader part epilog
2030 ; GFX8-LABEL: atomic_add_i64_2darraymsaa:
2031 ; GFX8: ; %bb.0: ; %main_body
2032 ; GFX8-NEXT: s_mov_b32 s0, s2
2033 ; GFX8-NEXT: s_mov_b32 s1, s3
2034 ; GFX8-NEXT: s_mov_b32 s2, s4
2035 ; GFX8-NEXT: s_mov_b32 s3, s5
2036 ; GFX8-NEXT: s_mov_b32 s4, s6
2037 ; GFX8-NEXT: s_mov_b32 s5, s7
2038 ; GFX8-NEXT: s_mov_b32 s6, s8
2039 ; GFX8-NEXT: s_mov_b32 s7, s9
2040 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
2041 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2042 ; GFX8-NEXT: ; return to shader part epilog
2044 ; GFX10-LABEL: atomic_add_i64_2darraymsaa:
2045 ; GFX10: ; %bb.0: ; %main_body
2046 ; GFX10-NEXT: s_mov_b32 s0, s2
2047 ; GFX10-NEXT: s_mov_b32 s1, s3
2048 ; GFX10-NEXT: s_mov_b32 s2, s4
2049 ; GFX10-NEXT: s_mov_b32 s3, s5
2050 ; GFX10-NEXT: s_mov_b32 s4, s6
2051 ; GFX10-NEXT: s_mov_b32 s5, s7
2052 ; GFX10-NEXT: s_mov_b32 s6, s8
2053 ; GFX10-NEXT: s_mov_b32 s7, s9
2054 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc
2055 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2056 ; GFX10-NEXT: ; return to shader part epilog
2058 %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
2059 %out = bitcast i64 %v to <2 x float>
2060 ret <2 x float> %out
2063 define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2064 ; GFX6-LABEL: atomic_add_i64_1d_slc:
2065 ; GFX6: ; %bb.0: ; %main_body
2066 ; GFX6-NEXT: s_mov_b32 s0, s2
2067 ; GFX6-NEXT: s_mov_b32 s1, s3
2068 ; GFX6-NEXT: s_mov_b32 s2, s4
2069 ; GFX6-NEXT: s_mov_b32 s3, s5
2070 ; GFX6-NEXT: s_mov_b32 s4, s6
2071 ; GFX6-NEXT: s_mov_b32 s5, s7
2072 ; GFX6-NEXT: s_mov_b32 s6, s8
2073 ; GFX6-NEXT: s_mov_b32 s7, s9
2074 ; GFX6-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
2075 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2076 ; GFX6-NEXT: ; return to shader part epilog
2078 ; GFX8-LABEL: atomic_add_i64_1d_slc:
2079 ; GFX8: ; %bb.0: ; %main_body
2080 ; GFX8-NEXT: s_mov_b32 s0, s2
2081 ; GFX8-NEXT: s_mov_b32 s1, s3
2082 ; GFX8-NEXT: s_mov_b32 s2, s4
2083 ; GFX8-NEXT: s_mov_b32 s3, s5
2084 ; GFX8-NEXT: s_mov_b32 s4, s6
2085 ; GFX8-NEXT: s_mov_b32 s5, s7
2086 ; GFX8-NEXT: s_mov_b32 s6, s8
2087 ; GFX8-NEXT: s_mov_b32 s7, s9
2088 ; GFX8-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
2089 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2090 ; GFX8-NEXT: ; return to shader part epilog
2092 ; GFX10-LABEL: atomic_add_i64_1d_slc:
2093 ; GFX10: ; %bb.0: ; %main_body
2094 ; GFX10-NEXT: s_mov_b32 s0, s2
2095 ; GFX10-NEXT: s_mov_b32 s1, s3
2096 ; GFX10-NEXT: s_mov_b32 s2, s4
2097 ; GFX10-NEXT: s_mov_b32 s3, s5
2098 ; GFX10-NEXT: s_mov_b32 s4, s6
2099 ; GFX10-NEXT: s_mov_b32 s5, s7
2100 ; GFX10-NEXT: s_mov_b32 s6, s8
2101 ; GFX10-NEXT: s_mov_b32 s7, s9
2102 ; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc
2103 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2104 ; GFX10-NEXT: ; return to shader part epilog
2106 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
2107 %out = bitcast i64 %v to <2 x float>
2108 ret <2 x float> %out
2111 declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2112 declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2113 declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2114 declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2115 declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2116 declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2117 declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2118 declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2119 declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2120 declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2121 declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2122 declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2123 declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2124 declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2125 declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2126 declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2127 declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2128 declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2129 declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2130 declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2132 declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2133 declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2134 declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2135 declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2136 declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2137 declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2138 declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2139 declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2140 declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2141 declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2142 declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2143 declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2144 declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2145 declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2146 declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2147 declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2148 declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2149 declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2150 declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2151 declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
2153 attributes #0 = { nounwind }