1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX900 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - %s | FileCheck -check-prefix=GFX90A %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
8 define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
9 ; GFX6-LABEL: atomic_swap_i32_1d:
10 ; GFX6: ; %bb.0: ; %main_body
11 ; GFX6-NEXT: s_mov_b32 s0, s2
12 ; GFX6-NEXT: s_mov_b32 s1, s3
13 ; GFX6-NEXT: s_mov_b32 s2, s4
14 ; GFX6-NEXT: s_mov_b32 s3, s5
15 ; GFX6-NEXT: s_mov_b32 s4, s6
16 ; GFX6-NEXT: s_mov_b32 s5, s7
17 ; GFX6-NEXT: s_mov_b32 s6, s8
18 ; GFX6-NEXT: s_mov_b32 s7, s9
19 ; GFX6-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
20 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
21 ; GFX6-NEXT: ; return to shader part epilog
23 ; GFX8-LABEL: atomic_swap_i32_1d:
24 ; GFX8: ; %bb.0: ; %main_body
25 ; GFX8-NEXT: s_mov_b32 s0, s2
26 ; GFX8-NEXT: s_mov_b32 s1, s3
27 ; GFX8-NEXT: s_mov_b32 s2, s4
28 ; GFX8-NEXT: s_mov_b32 s3, s5
29 ; GFX8-NEXT: s_mov_b32 s4, s6
30 ; GFX8-NEXT: s_mov_b32 s5, s7
31 ; GFX8-NEXT: s_mov_b32 s6, s8
32 ; GFX8-NEXT: s_mov_b32 s7, s9
33 ; GFX8-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
34 ; GFX8-NEXT: s_waitcnt vmcnt(0)
35 ; GFX8-NEXT: ; return to shader part epilog
37 ; GFX900-LABEL: atomic_swap_i32_1d:
38 ; GFX900: ; %bb.0: ; %main_body
39 ; GFX900-NEXT: s_mov_b32 s0, s2
40 ; GFX900-NEXT: s_mov_b32 s1, s3
41 ; GFX900-NEXT: s_mov_b32 s2, s4
42 ; GFX900-NEXT: s_mov_b32 s3, s5
43 ; GFX900-NEXT: s_mov_b32 s4, s6
44 ; GFX900-NEXT: s_mov_b32 s5, s7
45 ; GFX900-NEXT: s_mov_b32 s6, s8
46 ; GFX900-NEXT: s_mov_b32 s7, s9
47 ; GFX900-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
48 ; GFX900-NEXT: s_waitcnt vmcnt(0)
49 ; GFX900-NEXT: ; return to shader part epilog
51 ; GFX90A-LABEL: atomic_swap_i32_1d:
52 ; GFX90A: ; %bb.0: ; %main_body
53 ; GFX90A-NEXT: s_mov_b32 s0, s2
54 ; GFX90A-NEXT: s_mov_b32 s1, s3
55 ; GFX90A-NEXT: s_mov_b32 s2, s4
56 ; GFX90A-NEXT: s_mov_b32 s3, s5
57 ; GFX90A-NEXT: s_mov_b32 s4, s6
58 ; GFX90A-NEXT: s_mov_b32 s5, s7
59 ; GFX90A-NEXT: s_mov_b32 s6, s8
60 ; GFX90A-NEXT: s_mov_b32 s7, s9
61 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
62 ; GFX90A-NEXT: image_atomic_swap v0, v2, s[0:7] dmask:0x1 unorm glc
63 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
64 ; GFX90A-NEXT: ; return to shader part epilog
66 ; GFX10-LABEL: atomic_swap_i32_1d:
67 ; GFX10: ; %bb.0: ; %main_body
68 ; GFX10-NEXT: s_mov_b32 s0, s2
69 ; GFX10-NEXT: s_mov_b32 s1, s3
70 ; GFX10-NEXT: s_mov_b32 s2, s4
71 ; GFX10-NEXT: s_mov_b32 s3, s5
72 ; GFX10-NEXT: s_mov_b32 s4, s6
73 ; GFX10-NEXT: s_mov_b32 s5, s7
74 ; GFX10-NEXT: s_mov_b32 s6, s8
75 ; GFX10-NEXT: s_mov_b32 s7, s9
76 ; GFX10-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
77 ; GFX10-NEXT: s_waitcnt vmcnt(0)
78 ; GFX10-NEXT: ; return to shader part epilog
80 %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
81 %out = bitcast i32 %v to float
85 define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
86 ; GFX6-LABEL: atomic_add_i32_1d:
87 ; GFX6: ; %bb.0: ; %main_body
88 ; GFX6-NEXT: s_mov_b32 s0, s2
89 ; GFX6-NEXT: s_mov_b32 s1, s3
90 ; GFX6-NEXT: s_mov_b32 s2, s4
91 ; GFX6-NEXT: s_mov_b32 s3, s5
92 ; GFX6-NEXT: s_mov_b32 s4, s6
93 ; GFX6-NEXT: s_mov_b32 s5, s7
94 ; GFX6-NEXT: s_mov_b32 s6, s8
95 ; GFX6-NEXT: s_mov_b32 s7, s9
96 ; GFX6-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
97 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
98 ; GFX6-NEXT: ; return to shader part epilog
100 ; GFX8-LABEL: atomic_add_i32_1d:
101 ; GFX8: ; %bb.0: ; %main_body
102 ; GFX8-NEXT: s_mov_b32 s0, s2
103 ; GFX8-NEXT: s_mov_b32 s1, s3
104 ; GFX8-NEXT: s_mov_b32 s2, s4
105 ; GFX8-NEXT: s_mov_b32 s3, s5
106 ; GFX8-NEXT: s_mov_b32 s4, s6
107 ; GFX8-NEXT: s_mov_b32 s5, s7
108 ; GFX8-NEXT: s_mov_b32 s6, s8
109 ; GFX8-NEXT: s_mov_b32 s7, s9
110 ; GFX8-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
111 ; GFX8-NEXT: s_waitcnt vmcnt(0)
112 ; GFX8-NEXT: ; return to shader part epilog
114 ; GFX900-LABEL: atomic_add_i32_1d:
115 ; GFX900: ; %bb.0: ; %main_body
116 ; GFX900-NEXT: s_mov_b32 s0, s2
117 ; GFX900-NEXT: s_mov_b32 s1, s3
118 ; GFX900-NEXT: s_mov_b32 s2, s4
119 ; GFX900-NEXT: s_mov_b32 s3, s5
120 ; GFX900-NEXT: s_mov_b32 s4, s6
121 ; GFX900-NEXT: s_mov_b32 s5, s7
122 ; GFX900-NEXT: s_mov_b32 s6, s8
123 ; GFX900-NEXT: s_mov_b32 s7, s9
124 ; GFX900-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
125 ; GFX900-NEXT: s_waitcnt vmcnt(0)
126 ; GFX900-NEXT: ; return to shader part epilog
128 ; GFX90A-LABEL: atomic_add_i32_1d:
129 ; GFX90A: ; %bb.0: ; %main_body
130 ; GFX90A-NEXT: s_mov_b32 s0, s2
131 ; GFX90A-NEXT: s_mov_b32 s1, s3
132 ; GFX90A-NEXT: s_mov_b32 s2, s4
133 ; GFX90A-NEXT: s_mov_b32 s3, s5
134 ; GFX90A-NEXT: s_mov_b32 s4, s6
135 ; GFX90A-NEXT: s_mov_b32 s5, s7
136 ; GFX90A-NEXT: s_mov_b32 s6, s8
137 ; GFX90A-NEXT: s_mov_b32 s7, s9
138 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
139 ; GFX90A-NEXT: image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc
140 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
141 ; GFX90A-NEXT: ; return to shader part epilog
143 ; GFX10-LABEL: atomic_add_i32_1d:
144 ; GFX10: ; %bb.0: ; %main_body
145 ; GFX10-NEXT: s_mov_b32 s0, s2
146 ; GFX10-NEXT: s_mov_b32 s1, s3
147 ; GFX10-NEXT: s_mov_b32 s2, s4
148 ; GFX10-NEXT: s_mov_b32 s3, s5
149 ; GFX10-NEXT: s_mov_b32 s4, s6
150 ; GFX10-NEXT: s_mov_b32 s5, s7
151 ; GFX10-NEXT: s_mov_b32 s6, s8
152 ; GFX10-NEXT: s_mov_b32 s7, s9
153 ; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
154 ; GFX10-NEXT: s_waitcnt vmcnt(0)
155 ; GFX10-NEXT: ; return to shader part epilog
157 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
158 %out = bitcast i32 %v to float
162 define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
163 ; GFX6-LABEL: atomic_sub_i32_1d:
164 ; GFX6: ; %bb.0: ; %main_body
165 ; GFX6-NEXT: s_mov_b32 s0, s2
166 ; GFX6-NEXT: s_mov_b32 s1, s3
167 ; GFX6-NEXT: s_mov_b32 s2, s4
168 ; GFX6-NEXT: s_mov_b32 s3, s5
169 ; GFX6-NEXT: s_mov_b32 s4, s6
170 ; GFX6-NEXT: s_mov_b32 s5, s7
171 ; GFX6-NEXT: s_mov_b32 s6, s8
172 ; GFX6-NEXT: s_mov_b32 s7, s9
173 ; GFX6-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
174 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
175 ; GFX6-NEXT: ; return to shader part epilog
177 ; GFX8-LABEL: atomic_sub_i32_1d:
178 ; GFX8: ; %bb.0: ; %main_body
179 ; GFX8-NEXT: s_mov_b32 s0, s2
180 ; GFX8-NEXT: s_mov_b32 s1, s3
181 ; GFX8-NEXT: s_mov_b32 s2, s4
182 ; GFX8-NEXT: s_mov_b32 s3, s5
183 ; GFX8-NEXT: s_mov_b32 s4, s6
184 ; GFX8-NEXT: s_mov_b32 s5, s7
185 ; GFX8-NEXT: s_mov_b32 s6, s8
186 ; GFX8-NEXT: s_mov_b32 s7, s9
187 ; GFX8-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
188 ; GFX8-NEXT: s_waitcnt vmcnt(0)
189 ; GFX8-NEXT: ; return to shader part epilog
191 ; GFX900-LABEL: atomic_sub_i32_1d:
192 ; GFX900: ; %bb.0: ; %main_body
193 ; GFX900-NEXT: s_mov_b32 s0, s2
194 ; GFX900-NEXT: s_mov_b32 s1, s3
195 ; GFX900-NEXT: s_mov_b32 s2, s4
196 ; GFX900-NEXT: s_mov_b32 s3, s5
197 ; GFX900-NEXT: s_mov_b32 s4, s6
198 ; GFX900-NEXT: s_mov_b32 s5, s7
199 ; GFX900-NEXT: s_mov_b32 s6, s8
200 ; GFX900-NEXT: s_mov_b32 s7, s9
201 ; GFX900-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
202 ; GFX900-NEXT: s_waitcnt vmcnt(0)
203 ; GFX900-NEXT: ; return to shader part epilog
205 ; GFX90A-LABEL: atomic_sub_i32_1d:
206 ; GFX90A: ; %bb.0: ; %main_body
207 ; GFX90A-NEXT: s_mov_b32 s0, s2
208 ; GFX90A-NEXT: s_mov_b32 s1, s3
209 ; GFX90A-NEXT: s_mov_b32 s2, s4
210 ; GFX90A-NEXT: s_mov_b32 s3, s5
211 ; GFX90A-NEXT: s_mov_b32 s4, s6
212 ; GFX90A-NEXT: s_mov_b32 s5, s7
213 ; GFX90A-NEXT: s_mov_b32 s6, s8
214 ; GFX90A-NEXT: s_mov_b32 s7, s9
215 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
216 ; GFX90A-NEXT: image_atomic_sub v0, v2, s[0:7] dmask:0x1 unorm glc
217 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
218 ; GFX90A-NEXT: ; return to shader part epilog
220 ; GFX10-LABEL: atomic_sub_i32_1d:
221 ; GFX10: ; %bb.0: ; %main_body
222 ; GFX10-NEXT: s_mov_b32 s0, s2
223 ; GFX10-NEXT: s_mov_b32 s1, s3
224 ; GFX10-NEXT: s_mov_b32 s2, s4
225 ; GFX10-NEXT: s_mov_b32 s3, s5
226 ; GFX10-NEXT: s_mov_b32 s4, s6
227 ; GFX10-NEXT: s_mov_b32 s5, s7
228 ; GFX10-NEXT: s_mov_b32 s6, s8
229 ; GFX10-NEXT: s_mov_b32 s7, s9
230 ; GFX10-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
231 ; GFX10-NEXT: s_waitcnt vmcnt(0)
232 ; GFX10-NEXT: ; return to shader part epilog
234 %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
235 %out = bitcast i32 %v to float
239 define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
240 ; GFX6-LABEL: atomic_smin_i32_1d:
241 ; GFX6: ; %bb.0: ; %main_body
242 ; GFX6-NEXT: s_mov_b32 s0, s2
243 ; GFX6-NEXT: s_mov_b32 s1, s3
244 ; GFX6-NEXT: s_mov_b32 s2, s4
245 ; GFX6-NEXT: s_mov_b32 s3, s5
246 ; GFX6-NEXT: s_mov_b32 s4, s6
247 ; GFX6-NEXT: s_mov_b32 s5, s7
248 ; GFX6-NEXT: s_mov_b32 s6, s8
249 ; GFX6-NEXT: s_mov_b32 s7, s9
250 ; GFX6-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
251 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
252 ; GFX6-NEXT: ; return to shader part epilog
254 ; GFX8-LABEL: atomic_smin_i32_1d:
255 ; GFX8: ; %bb.0: ; %main_body
256 ; GFX8-NEXT: s_mov_b32 s0, s2
257 ; GFX8-NEXT: s_mov_b32 s1, s3
258 ; GFX8-NEXT: s_mov_b32 s2, s4
259 ; GFX8-NEXT: s_mov_b32 s3, s5
260 ; GFX8-NEXT: s_mov_b32 s4, s6
261 ; GFX8-NEXT: s_mov_b32 s5, s7
262 ; GFX8-NEXT: s_mov_b32 s6, s8
263 ; GFX8-NEXT: s_mov_b32 s7, s9
264 ; GFX8-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
265 ; GFX8-NEXT: s_waitcnt vmcnt(0)
266 ; GFX8-NEXT: ; return to shader part epilog
268 ; GFX900-LABEL: atomic_smin_i32_1d:
269 ; GFX900: ; %bb.0: ; %main_body
270 ; GFX900-NEXT: s_mov_b32 s0, s2
271 ; GFX900-NEXT: s_mov_b32 s1, s3
272 ; GFX900-NEXT: s_mov_b32 s2, s4
273 ; GFX900-NEXT: s_mov_b32 s3, s5
274 ; GFX900-NEXT: s_mov_b32 s4, s6
275 ; GFX900-NEXT: s_mov_b32 s5, s7
276 ; GFX900-NEXT: s_mov_b32 s6, s8
277 ; GFX900-NEXT: s_mov_b32 s7, s9
278 ; GFX900-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
279 ; GFX900-NEXT: s_waitcnt vmcnt(0)
280 ; GFX900-NEXT: ; return to shader part epilog
282 ; GFX90A-LABEL: atomic_smin_i32_1d:
283 ; GFX90A: ; %bb.0: ; %main_body
284 ; GFX90A-NEXT: s_mov_b32 s0, s2
285 ; GFX90A-NEXT: s_mov_b32 s1, s3
286 ; GFX90A-NEXT: s_mov_b32 s2, s4
287 ; GFX90A-NEXT: s_mov_b32 s3, s5
288 ; GFX90A-NEXT: s_mov_b32 s4, s6
289 ; GFX90A-NEXT: s_mov_b32 s5, s7
290 ; GFX90A-NEXT: s_mov_b32 s6, s8
291 ; GFX90A-NEXT: s_mov_b32 s7, s9
292 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
293 ; GFX90A-NEXT: image_atomic_smin v0, v2, s[0:7] dmask:0x1 unorm glc
294 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
295 ; GFX90A-NEXT: ; return to shader part epilog
297 ; GFX10-LABEL: atomic_smin_i32_1d:
298 ; GFX10: ; %bb.0: ; %main_body
299 ; GFX10-NEXT: s_mov_b32 s0, s2
300 ; GFX10-NEXT: s_mov_b32 s1, s3
301 ; GFX10-NEXT: s_mov_b32 s2, s4
302 ; GFX10-NEXT: s_mov_b32 s3, s5
303 ; GFX10-NEXT: s_mov_b32 s4, s6
304 ; GFX10-NEXT: s_mov_b32 s5, s7
305 ; GFX10-NEXT: s_mov_b32 s6, s8
306 ; GFX10-NEXT: s_mov_b32 s7, s9
307 ; GFX10-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
308 ; GFX10-NEXT: s_waitcnt vmcnt(0)
309 ; GFX10-NEXT: ; return to shader part epilog
311 %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
312 %out = bitcast i32 %v to float
316 define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
317 ; GFX6-LABEL: atomic_umin_i32_1d:
318 ; GFX6: ; %bb.0: ; %main_body
319 ; GFX6-NEXT: s_mov_b32 s0, s2
320 ; GFX6-NEXT: s_mov_b32 s1, s3
321 ; GFX6-NEXT: s_mov_b32 s2, s4
322 ; GFX6-NEXT: s_mov_b32 s3, s5
323 ; GFX6-NEXT: s_mov_b32 s4, s6
324 ; GFX6-NEXT: s_mov_b32 s5, s7
325 ; GFX6-NEXT: s_mov_b32 s6, s8
326 ; GFX6-NEXT: s_mov_b32 s7, s9
327 ; GFX6-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
328 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
329 ; GFX6-NEXT: ; return to shader part epilog
331 ; GFX8-LABEL: atomic_umin_i32_1d:
332 ; GFX8: ; %bb.0: ; %main_body
333 ; GFX8-NEXT: s_mov_b32 s0, s2
334 ; GFX8-NEXT: s_mov_b32 s1, s3
335 ; GFX8-NEXT: s_mov_b32 s2, s4
336 ; GFX8-NEXT: s_mov_b32 s3, s5
337 ; GFX8-NEXT: s_mov_b32 s4, s6
338 ; GFX8-NEXT: s_mov_b32 s5, s7
339 ; GFX8-NEXT: s_mov_b32 s6, s8
340 ; GFX8-NEXT: s_mov_b32 s7, s9
341 ; GFX8-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
342 ; GFX8-NEXT: s_waitcnt vmcnt(0)
343 ; GFX8-NEXT: ; return to shader part epilog
345 ; GFX900-LABEL: atomic_umin_i32_1d:
346 ; GFX900: ; %bb.0: ; %main_body
347 ; GFX900-NEXT: s_mov_b32 s0, s2
348 ; GFX900-NEXT: s_mov_b32 s1, s3
349 ; GFX900-NEXT: s_mov_b32 s2, s4
350 ; GFX900-NEXT: s_mov_b32 s3, s5
351 ; GFX900-NEXT: s_mov_b32 s4, s6
352 ; GFX900-NEXT: s_mov_b32 s5, s7
353 ; GFX900-NEXT: s_mov_b32 s6, s8
354 ; GFX900-NEXT: s_mov_b32 s7, s9
355 ; GFX900-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
356 ; GFX900-NEXT: s_waitcnt vmcnt(0)
357 ; GFX900-NEXT: ; return to shader part epilog
359 ; GFX90A-LABEL: atomic_umin_i32_1d:
360 ; GFX90A: ; %bb.0: ; %main_body
361 ; GFX90A-NEXT: s_mov_b32 s0, s2
362 ; GFX90A-NEXT: s_mov_b32 s1, s3
363 ; GFX90A-NEXT: s_mov_b32 s2, s4
364 ; GFX90A-NEXT: s_mov_b32 s3, s5
365 ; GFX90A-NEXT: s_mov_b32 s4, s6
366 ; GFX90A-NEXT: s_mov_b32 s5, s7
367 ; GFX90A-NEXT: s_mov_b32 s6, s8
368 ; GFX90A-NEXT: s_mov_b32 s7, s9
369 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
370 ; GFX90A-NEXT: image_atomic_umin v0, v2, s[0:7] dmask:0x1 unorm glc
371 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
372 ; GFX90A-NEXT: ; return to shader part epilog
374 ; GFX10-LABEL: atomic_umin_i32_1d:
375 ; GFX10: ; %bb.0: ; %main_body
376 ; GFX10-NEXT: s_mov_b32 s0, s2
377 ; GFX10-NEXT: s_mov_b32 s1, s3
378 ; GFX10-NEXT: s_mov_b32 s2, s4
379 ; GFX10-NEXT: s_mov_b32 s3, s5
380 ; GFX10-NEXT: s_mov_b32 s4, s6
381 ; GFX10-NEXT: s_mov_b32 s5, s7
382 ; GFX10-NEXT: s_mov_b32 s6, s8
383 ; GFX10-NEXT: s_mov_b32 s7, s9
384 ; GFX10-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
385 ; GFX10-NEXT: s_waitcnt vmcnt(0)
386 ; GFX10-NEXT: ; return to shader part epilog
388 %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
389 %out = bitcast i32 %v to float
393 define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
394 ; GFX6-LABEL: atomic_smax_i32_1d:
395 ; GFX6: ; %bb.0: ; %main_body
396 ; GFX6-NEXT: s_mov_b32 s0, s2
397 ; GFX6-NEXT: s_mov_b32 s1, s3
398 ; GFX6-NEXT: s_mov_b32 s2, s4
399 ; GFX6-NEXT: s_mov_b32 s3, s5
400 ; GFX6-NEXT: s_mov_b32 s4, s6
401 ; GFX6-NEXT: s_mov_b32 s5, s7
402 ; GFX6-NEXT: s_mov_b32 s6, s8
403 ; GFX6-NEXT: s_mov_b32 s7, s9
404 ; GFX6-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
405 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
406 ; GFX6-NEXT: ; return to shader part epilog
408 ; GFX8-LABEL: atomic_smax_i32_1d:
409 ; GFX8: ; %bb.0: ; %main_body
410 ; GFX8-NEXT: s_mov_b32 s0, s2
411 ; GFX8-NEXT: s_mov_b32 s1, s3
412 ; GFX8-NEXT: s_mov_b32 s2, s4
413 ; GFX8-NEXT: s_mov_b32 s3, s5
414 ; GFX8-NEXT: s_mov_b32 s4, s6
415 ; GFX8-NEXT: s_mov_b32 s5, s7
416 ; GFX8-NEXT: s_mov_b32 s6, s8
417 ; GFX8-NEXT: s_mov_b32 s7, s9
418 ; GFX8-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
419 ; GFX8-NEXT: s_waitcnt vmcnt(0)
420 ; GFX8-NEXT: ; return to shader part epilog
422 ; GFX900-LABEL: atomic_smax_i32_1d:
423 ; GFX900: ; %bb.0: ; %main_body
424 ; GFX900-NEXT: s_mov_b32 s0, s2
425 ; GFX900-NEXT: s_mov_b32 s1, s3
426 ; GFX900-NEXT: s_mov_b32 s2, s4
427 ; GFX900-NEXT: s_mov_b32 s3, s5
428 ; GFX900-NEXT: s_mov_b32 s4, s6
429 ; GFX900-NEXT: s_mov_b32 s5, s7
430 ; GFX900-NEXT: s_mov_b32 s6, s8
431 ; GFX900-NEXT: s_mov_b32 s7, s9
432 ; GFX900-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
433 ; GFX900-NEXT: s_waitcnt vmcnt(0)
434 ; GFX900-NEXT: ; return to shader part epilog
436 ; GFX90A-LABEL: atomic_smax_i32_1d:
437 ; GFX90A: ; %bb.0: ; %main_body
438 ; GFX90A-NEXT: s_mov_b32 s0, s2
439 ; GFX90A-NEXT: s_mov_b32 s1, s3
440 ; GFX90A-NEXT: s_mov_b32 s2, s4
441 ; GFX90A-NEXT: s_mov_b32 s3, s5
442 ; GFX90A-NEXT: s_mov_b32 s4, s6
443 ; GFX90A-NEXT: s_mov_b32 s5, s7
444 ; GFX90A-NEXT: s_mov_b32 s6, s8
445 ; GFX90A-NEXT: s_mov_b32 s7, s9
446 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
447 ; GFX90A-NEXT: image_atomic_smax v0, v2, s[0:7] dmask:0x1 unorm glc
448 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
449 ; GFX90A-NEXT: ; return to shader part epilog
451 ; GFX10-LABEL: atomic_smax_i32_1d:
452 ; GFX10: ; %bb.0: ; %main_body
453 ; GFX10-NEXT: s_mov_b32 s0, s2
454 ; GFX10-NEXT: s_mov_b32 s1, s3
455 ; GFX10-NEXT: s_mov_b32 s2, s4
456 ; GFX10-NEXT: s_mov_b32 s3, s5
457 ; GFX10-NEXT: s_mov_b32 s4, s6
458 ; GFX10-NEXT: s_mov_b32 s5, s7
459 ; GFX10-NEXT: s_mov_b32 s6, s8
460 ; GFX10-NEXT: s_mov_b32 s7, s9
461 ; GFX10-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
462 ; GFX10-NEXT: s_waitcnt vmcnt(0)
463 ; GFX10-NEXT: ; return to shader part epilog
465 %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
466 %out = bitcast i32 %v to float
470 define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
471 ; GFX6-LABEL: atomic_umax_i32_1d:
472 ; GFX6: ; %bb.0: ; %main_body
473 ; GFX6-NEXT: s_mov_b32 s0, s2
474 ; GFX6-NEXT: s_mov_b32 s1, s3
475 ; GFX6-NEXT: s_mov_b32 s2, s4
476 ; GFX6-NEXT: s_mov_b32 s3, s5
477 ; GFX6-NEXT: s_mov_b32 s4, s6
478 ; GFX6-NEXT: s_mov_b32 s5, s7
479 ; GFX6-NEXT: s_mov_b32 s6, s8
480 ; GFX6-NEXT: s_mov_b32 s7, s9
481 ; GFX6-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
482 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
483 ; GFX6-NEXT: ; return to shader part epilog
485 ; GFX8-LABEL: atomic_umax_i32_1d:
486 ; GFX8: ; %bb.0: ; %main_body
487 ; GFX8-NEXT: s_mov_b32 s0, s2
488 ; GFX8-NEXT: s_mov_b32 s1, s3
489 ; GFX8-NEXT: s_mov_b32 s2, s4
490 ; GFX8-NEXT: s_mov_b32 s3, s5
491 ; GFX8-NEXT: s_mov_b32 s4, s6
492 ; GFX8-NEXT: s_mov_b32 s5, s7
493 ; GFX8-NEXT: s_mov_b32 s6, s8
494 ; GFX8-NEXT: s_mov_b32 s7, s9
495 ; GFX8-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
496 ; GFX8-NEXT: s_waitcnt vmcnt(0)
497 ; GFX8-NEXT: ; return to shader part epilog
499 ; GFX900-LABEL: atomic_umax_i32_1d:
500 ; GFX900: ; %bb.0: ; %main_body
501 ; GFX900-NEXT: s_mov_b32 s0, s2
502 ; GFX900-NEXT: s_mov_b32 s1, s3
503 ; GFX900-NEXT: s_mov_b32 s2, s4
504 ; GFX900-NEXT: s_mov_b32 s3, s5
505 ; GFX900-NEXT: s_mov_b32 s4, s6
506 ; GFX900-NEXT: s_mov_b32 s5, s7
507 ; GFX900-NEXT: s_mov_b32 s6, s8
508 ; GFX900-NEXT: s_mov_b32 s7, s9
509 ; GFX900-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
510 ; GFX900-NEXT: s_waitcnt vmcnt(0)
511 ; GFX900-NEXT: ; return to shader part epilog
513 ; GFX90A-LABEL: atomic_umax_i32_1d:
514 ; GFX90A: ; %bb.0: ; %main_body
515 ; GFX90A-NEXT: s_mov_b32 s0, s2
516 ; GFX90A-NEXT: s_mov_b32 s1, s3
517 ; GFX90A-NEXT: s_mov_b32 s2, s4
518 ; GFX90A-NEXT: s_mov_b32 s3, s5
519 ; GFX90A-NEXT: s_mov_b32 s4, s6
520 ; GFX90A-NEXT: s_mov_b32 s5, s7
521 ; GFX90A-NEXT: s_mov_b32 s6, s8
522 ; GFX90A-NEXT: s_mov_b32 s7, s9
523 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
524 ; GFX90A-NEXT: image_atomic_umax v0, v2, s[0:7] dmask:0x1 unorm glc
525 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
526 ; GFX90A-NEXT: ; return to shader part epilog
528 ; GFX10-LABEL: atomic_umax_i32_1d:
529 ; GFX10: ; %bb.0: ; %main_body
530 ; GFX10-NEXT: s_mov_b32 s0, s2
531 ; GFX10-NEXT: s_mov_b32 s1, s3
532 ; GFX10-NEXT: s_mov_b32 s2, s4
533 ; GFX10-NEXT: s_mov_b32 s3, s5
534 ; GFX10-NEXT: s_mov_b32 s4, s6
535 ; GFX10-NEXT: s_mov_b32 s5, s7
536 ; GFX10-NEXT: s_mov_b32 s6, s8
537 ; GFX10-NEXT: s_mov_b32 s7, s9
538 ; GFX10-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
539 ; GFX10-NEXT: s_waitcnt vmcnt(0)
540 ; GFX10-NEXT: ; return to shader part epilog
542 %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
543 %out = bitcast i32 %v to float
547 define amdgpu_ps float @atomic_and_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
548 ; GFX6-LABEL: atomic_and_i32_1d:
549 ; GFX6: ; %bb.0: ; %main_body
550 ; GFX6-NEXT: s_mov_b32 s0, s2
551 ; GFX6-NEXT: s_mov_b32 s1, s3
552 ; GFX6-NEXT: s_mov_b32 s2, s4
553 ; GFX6-NEXT: s_mov_b32 s3, s5
554 ; GFX6-NEXT: s_mov_b32 s4, s6
555 ; GFX6-NEXT: s_mov_b32 s5, s7
556 ; GFX6-NEXT: s_mov_b32 s6, s8
557 ; GFX6-NEXT: s_mov_b32 s7, s9
558 ; GFX6-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
559 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
560 ; GFX6-NEXT: ; return to shader part epilog
562 ; GFX8-LABEL: atomic_and_i32_1d:
563 ; GFX8: ; %bb.0: ; %main_body
564 ; GFX8-NEXT: s_mov_b32 s0, s2
565 ; GFX8-NEXT: s_mov_b32 s1, s3
566 ; GFX8-NEXT: s_mov_b32 s2, s4
567 ; GFX8-NEXT: s_mov_b32 s3, s5
568 ; GFX8-NEXT: s_mov_b32 s4, s6
569 ; GFX8-NEXT: s_mov_b32 s5, s7
570 ; GFX8-NEXT: s_mov_b32 s6, s8
571 ; GFX8-NEXT: s_mov_b32 s7, s9
572 ; GFX8-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
573 ; GFX8-NEXT: s_waitcnt vmcnt(0)
574 ; GFX8-NEXT: ; return to shader part epilog
576 ; GFX900-LABEL: atomic_and_i32_1d:
577 ; GFX900: ; %bb.0: ; %main_body
578 ; GFX900-NEXT: s_mov_b32 s0, s2
579 ; GFX900-NEXT: s_mov_b32 s1, s3
580 ; GFX900-NEXT: s_mov_b32 s2, s4
581 ; GFX900-NEXT: s_mov_b32 s3, s5
582 ; GFX900-NEXT: s_mov_b32 s4, s6
583 ; GFX900-NEXT: s_mov_b32 s5, s7
584 ; GFX900-NEXT: s_mov_b32 s6, s8
585 ; GFX900-NEXT: s_mov_b32 s7, s9
586 ; GFX900-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
587 ; GFX900-NEXT: s_waitcnt vmcnt(0)
588 ; GFX900-NEXT: ; return to shader part epilog
590 ; GFX90A-LABEL: atomic_and_i32_1d:
591 ; GFX90A: ; %bb.0: ; %main_body
592 ; GFX90A-NEXT: s_mov_b32 s0, s2
593 ; GFX90A-NEXT: s_mov_b32 s1, s3
594 ; GFX90A-NEXT: s_mov_b32 s2, s4
595 ; GFX90A-NEXT: s_mov_b32 s3, s5
596 ; GFX90A-NEXT: s_mov_b32 s4, s6
597 ; GFX90A-NEXT: s_mov_b32 s5, s7
598 ; GFX90A-NEXT: s_mov_b32 s6, s8
599 ; GFX90A-NEXT: s_mov_b32 s7, s9
600 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
601 ; GFX90A-NEXT: image_atomic_and v0, v2, s[0:7] dmask:0x1 unorm glc
602 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
603 ; GFX90A-NEXT: ; return to shader part epilog
605 ; GFX10-LABEL: atomic_and_i32_1d:
606 ; GFX10: ; %bb.0: ; %main_body
607 ; GFX10-NEXT: s_mov_b32 s0, s2
608 ; GFX10-NEXT: s_mov_b32 s1, s3
609 ; GFX10-NEXT: s_mov_b32 s2, s4
610 ; GFX10-NEXT: s_mov_b32 s3, s5
611 ; GFX10-NEXT: s_mov_b32 s4, s6
612 ; GFX10-NEXT: s_mov_b32 s5, s7
613 ; GFX10-NEXT: s_mov_b32 s6, s8
614 ; GFX10-NEXT: s_mov_b32 s7, s9
615 ; GFX10-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
616 ; GFX10-NEXT: s_waitcnt vmcnt(0)
617 ; GFX10-NEXT: ; return to shader part epilog
619 %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
620 %out = bitcast i32 %v to float
624 define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
625 ; GFX6-LABEL: atomic_or_i32_1d:
626 ; GFX6: ; %bb.0: ; %main_body
627 ; GFX6-NEXT: s_mov_b32 s0, s2
628 ; GFX6-NEXT: s_mov_b32 s1, s3
629 ; GFX6-NEXT: s_mov_b32 s2, s4
630 ; GFX6-NEXT: s_mov_b32 s3, s5
631 ; GFX6-NEXT: s_mov_b32 s4, s6
632 ; GFX6-NEXT: s_mov_b32 s5, s7
633 ; GFX6-NEXT: s_mov_b32 s6, s8
634 ; GFX6-NEXT: s_mov_b32 s7, s9
635 ; GFX6-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
636 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
637 ; GFX6-NEXT: ; return to shader part epilog
639 ; GFX8-LABEL: atomic_or_i32_1d:
640 ; GFX8: ; %bb.0: ; %main_body
641 ; GFX8-NEXT: s_mov_b32 s0, s2
642 ; GFX8-NEXT: s_mov_b32 s1, s3
643 ; GFX8-NEXT: s_mov_b32 s2, s4
644 ; GFX8-NEXT: s_mov_b32 s3, s5
645 ; GFX8-NEXT: s_mov_b32 s4, s6
646 ; GFX8-NEXT: s_mov_b32 s5, s7
647 ; GFX8-NEXT: s_mov_b32 s6, s8
648 ; GFX8-NEXT: s_mov_b32 s7, s9
649 ; GFX8-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
650 ; GFX8-NEXT: s_waitcnt vmcnt(0)
651 ; GFX8-NEXT: ; return to shader part epilog
653 ; GFX900-LABEL: atomic_or_i32_1d:
654 ; GFX900: ; %bb.0: ; %main_body
655 ; GFX900-NEXT: s_mov_b32 s0, s2
656 ; GFX900-NEXT: s_mov_b32 s1, s3
657 ; GFX900-NEXT: s_mov_b32 s2, s4
658 ; GFX900-NEXT: s_mov_b32 s3, s5
659 ; GFX900-NEXT: s_mov_b32 s4, s6
660 ; GFX900-NEXT: s_mov_b32 s5, s7
661 ; GFX900-NEXT: s_mov_b32 s6, s8
662 ; GFX900-NEXT: s_mov_b32 s7, s9
663 ; GFX900-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
664 ; GFX900-NEXT: s_waitcnt vmcnt(0)
665 ; GFX900-NEXT: ; return to shader part epilog
667 ; GFX90A-LABEL: atomic_or_i32_1d:
668 ; GFX90A: ; %bb.0: ; %main_body
669 ; GFX90A-NEXT: s_mov_b32 s0, s2
670 ; GFX90A-NEXT: s_mov_b32 s1, s3
671 ; GFX90A-NEXT: s_mov_b32 s2, s4
672 ; GFX90A-NEXT: s_mov_b32 s3, s5
673 ; GFX90A-NEXT: s_mov_b32 s4, s6
674 ; GFX90A-NEXT: s_mov_b32 s5, s7
675 ; GFX90A-NEXT: s_mov_b32 s6, s8
676 ; GFX90A-NEXT: s_mov_b32 s7, s9
677 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
678 ; GFX90A-NEXT: image_atomic_or v0, v2, s[0:7] dmask:0x1 unorm glc
679 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
680 ; GFX90A-NEXT: ; return to shader part epilog
682 ; GFX10-LABEL: atomic_or_i32_1d:
683 ; GFX10: ; %bb.0: ; %main_body
684 ; GFX10-NEXT: s_mov_b32 s0, s2
685 ; GFX10-NEXT: s_mov_b32 s1, s3
686 ; GFX10-NEXT: s_mov_b32 s2, s4
687 ; GFX10-NEXT: s_mov_b32 s3, s5
688 ; GFX10-NEXT: s_mov_b32 s4, s6
689 ; GFX10-NEXT: s_mov_b32 s5, s7
690 ; GFX10-NEXT: s_mov_b32 s6, s8
691 ; GFX10-NEXT: s_mov_b32 s7, s9
692 ; GFX10-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
693 ; GFX10-NEXT: s_waitcnt vmcnt(0)
694 ; GFX10-NEXT: ; return to shader part epilog
696 %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
697 %out = bitcast i32 %v to float
701 define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
702 ; GFX6-LABEL: atomic_xor_i32_1d:
703 ; GFX6: ; %bb.0: ; %main_body
704 ; GFX6-NEXT: s_mov_b32 s0, s2
705 ; GFX6-NEXT: s_mov_b32 s1, s3
706 ; GFX6-NEXT: s_mov_b32 s2, s4
707 ; GFX6-NEXT: s_mov_b32 s3, s5
708 ; GFX6-NEXT: s_mov_b32 s4, s6
709 ; GFX6-NEXT: s_mov_b32 s5, s7
710 ; GFX6-NEXT: s_mov_b32 s6, s8
711 ; GFX6-NEXT: s_mov_b32 s7, s9
712 ; GFX6-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
713 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
714 ; GFX6-NEXT: ; return to shader part epilog
716 ; GFX8-LABEL: atomic_xor_i32_1d:
717 ; GFX8: ; %bb.0: ; %main_body
718 ; GFX8-NEXT: s_mov_b32 s0, s2
719 ; GFX8-NEXT: s_mov_b32 s1, s3
720 ; GFX8-NEXT: s_mov_b32 s2, s4
721 ; GFX8-NEXT: s_mov_b32 s3, s5
722 ; GFX8-NEXT: s_mov_b32 s4, s6
723 ; GFX8-NEXT: s_mov_b32 s5, s7
724 ; GFX8-NEXT: s_mov_b32 s6, s8
725 ; GFX8-NEXT: s_mov_b32 s7, s9
726 ; GFX8-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
727 ; GFX8-NEXT: s_waitcnt vmcnt(0)
728 ; GFX8-NEXT: ; return to shader part epilog
730 ; GFX900-LABEL: atomic_xor_i32_1d:
731 ; GFX900: ; %bb.0: ; %main_body
732 ; GFX900-NEXT: s_mov_b32 s0, s2
733 ; GFX900-NEXT: s_mov_b32 s1, s3
734 ; GFX900-NEXT: s_mov_b32 s2, s4
735 ; GFX900-NEXT: s_mov_b32 s3, s5
736 ; GFX900-NEXT: s_mov_b32 s4, s6
737 ; GFX900-NEXT: s_mov_b32 s5, s7
738 ; GFX900-NEXT: s_mov_b32 s6, s8
739 ; GFX900-NEXT: s_mov_b32 s7, s9
740 ; GFX900-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
741 ; GFX900-NEXT: s_waitcnt vmcnt(0)
742 ; GFX900-NEXT: ; return to shader part epilog
744 ; GFX90A-LABEL: atomic_xor_i32_1d:
745 ; GFX90A: ; %bb.0: ; %main_body
746 ; GFX90A-NEXT: s_mov_b32 s0, s2
747 ; GFX90A-NEXT: s_mov_b32 s1, s3
748 ; GFX90A-NEXT: s_mov_b32 s2, s4
749 ; GFX90A-NEXT: s_mov_b32 s3, s5
750 ; GFX90A-NEXT: s_mov_b32 s4, s6
751 ; GFX90A-NEXT: s_mov_b32 s5, s7
752 ; GFX90A-NEXT: s_mov_b32 s6, s8
753 ; GFX90A-NEXT: s_mov_b32 s7, s9
754 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
755 ; GFX90A-NEXT: image_atomic_xor v0, v2, s[0:7] dmask:0x1 unorm glc
756 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
757 ; GFX90A-NEXT: ; return to shader part epilog
759 ; GFX10-LABEL: atomic_xor_i32_1d:
760 ; GFX10: ; %bb.0: ; %main_body
761 ; GFX10-NEXT: s_mov_b32 s0, s2
762 ; GFX10-NEXT: s_mov_b32 s1, s3
763 ; GFX10-NEXT: s_mov_b32 s2, s4
764 ; GFX10-NEXT: s_mov_b32 s3, s5
765 ; GFX10-NEXT: s_mov_b32 s4, s6
766 ; GFX10-NEXT: s_mov_b32 s5, s7
767 ; GFX10-NEXT: s_mov_b32 s6, s8
768 ; GFX10-NEXT: s_mov_b32 s7, s9
769 ; GFX10-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
770 ; GFX10-NEXT: s_waitcnt vmcnt(0)
771 ; GFX10-NEXT: ; return to shader part epilog
773 %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
774 %out = bitcast i32 %v to float
778 define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
779 ; GFX6-LABEL: atomic_inc_i32_1d:
780 ; GFX6: ; %bb.0: ; %main_body
781 ; GFX6-NEXT: s_mov_b32 s0, s2
782 ; GFX6-NEXT: s_mov_b32 s1, s3
783 ; GFX6-NEXT: s_mov_b32 s2, s4
784 ; GFX6-NEXT: s_mov_b32 s3, s5
785 ; GFX6-NEXT: s_mov_b32 s4, s6
786 ; GFX6-NEXT: s_mov_b32 s5, s7
787 ; GFX6-NEXT: s_mov_b32 s6, s8
788 ; GFX6-NEXT: s_mov_b32 s7, s9
789 ; GFX6-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
790 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
791 ; GFX6-NEXT: ; return to shader part epilog
793 ; GFX8-LABEL: atomic_inc_i32_1d:
794 ; GFX8: ; %bb.0: ; %main_body
795 ; GFX8-NEXT: s_mov_b32 s0, s2
796 ; GFX8-NEXT: s_mov_b32 s1, s3
797 ; GFX8-NEXT: s_mov_b32 s2, s4
798 ; GFX8-NEXT: s_mov_b32 s3, s5
799 ; GFX8-NEXT: s_mov_b32 s4, s6
800 ; GFX8-NEXT: s_mov_b32 s5, s7
801 ; GFX8-NEXT: s_mov_b32 s6, s8
802 ; GFX8-NEXT: s_mov_b32 s7, s9
803 ; GFX8-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
804 ; GFX8-NEXT: s_waitcnt vmcnt(0)
805 ; GFX8-NEXT: ; return to shader part epilog
807 ; GFX900-LABEL: atomic_inc_i32_1d:
808 ; GFX900: ; %bb.0: ; %main_body
809 ; GFX900-NEXT: s_mov_b32 s0, s2
810 ; GFX900-NEXT: s_mov_b32 s1, s3
811 ; GFX900-NEXT: s_mov_b32 s2, s4
812 ; GFX900-NEXT: s_mov_b32 s3, s5
813 ; GFX900-NEXT: s_mov_b32 s4, s6
814 ; GFX900-NEXT: s_mov_b32 s5, s7
815 ; GFX900-NEXT: s_mov_b32 s6, s8
816 ; GFX900-NEXT: s_mov_b32 s7, s9
817 ; GFX900-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
818 ; GFX900-NEXT: s_waitcnt vmcnt(0)
819 ; GFX900-NEXT: ; return to shader part epilog
821 ; GFX90A-LABEL: atomic_inc_i32_1d:
822 ; GFX90A: ; %bb.0: ; %main_body
823 ; GFX90A-NEXT: s_mov_b32 s0, s2
824 ; GFX90A-NEXT: s_mov_b32 s1, s3
825 ; GFX90A-NEXT: s_mov_b32 s2, s4
826 ; GFX90A-NEXT: s_mov_b32 s3, s5
827 ; GFX90A-NEXT: s_mov_b32 s4, s6
828 ; GFX90A-NEXT: s_mov_b32 s5, s7
829 ; GFX90A-NEXT: s_mov_b32 s6, s8
830 ; GFX90A-NEXT: s_mov_b32 s7, s9
831 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
832 ; GFX90A-NEXT: image_atomic_inc v0, v2, s[0:7] dmask:0x1 unorm glc
833 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
834 ; GFX90A-NEXT: ; return to shader part epilog
836 ; GFX10-LABEL: atomic_inc_i32_1d:
837 ; GFX10: ; %bb.0: ; %main_body
838 ; GFX10-NEXT: s_mov_b32 s0, s2
839 ; GFX10-NEXT: s_mov_b32 s1, s3
840 ; GFX10-NEXT: s_mov_b32 s2, s4
841 ; GFX10-NEXT: s_mov_b32 s3, s5
842 ; GFX10-NEXT: s_mov_b32 s4, s6
843 ; GFX10-NEXT: s_mov_b32 s5, s7
844 ; GFX10-NEXT: s_mov_b32 s6, s8
845 ; GFX10-NEXT: s_mov_b32 s7, s9
846 ; GFX10-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
847 ; GFX10-NEXT: s_waitcnt vmcnt(0)
848 ; GFX10-NEXT: ; return to shader part epilog
850 %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
851 %out = bitcast i32 %v to float
855 define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
856 ; GFX6-LABEL: atomic_dec_i32_1d:
857 ; GFX6: ; %bb.0: ; %main_body
858 ; GFX6-NEXT: s_mov_b32 s0, s2
859 ; GFX6-NEXT: s_mov_b32 s1, s3
860 ; GFX6-NEXT: s_mov_b32 s2, s4
861 ; GFX6-NEXT: s_mov_b32 s3, s5
862 ; GFX6-NEXT: s_mov_b32 s4, s6
863 ; GFX6-NEXT: s_mov_b32 s5, s7
864 ; GFX6-NEXT: s_mov_b32 s6, s8
865 ; GFX6-NEXT: s_mov_b32 s7, s9
866 ; GFX6-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
867 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
868 ; GFX6-NEXT: ; return to shader part epilog
870 ; GFX8-LABEL: atomic_dec_i32_1d:
871 ; GFX8: ; %bb.0: ; %main_body
872 ; GFX8-NEXT: s_mov_b32 s0, s2
873 ; GFX8-NEXT: s_mov_b32 s1, s3
874 ; GFX8-NEXT: s_mov_b32 s2, s4
875 ; GFX8-NEXT: s_mov_b32 s3, s5
876 ; GFX8-NEXT: s_mov_b32 s4, s6
877 ; GFX8-NEXT: s_mov_b32 s5, s7
878 ; GFX8-NEXT: s_mov_b32 s6, s8
879 ; GFX8-NEXT: s_mov_b32 s7, s9
880 ; GFX8-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
881 ; GFX8-NEXT: s_waitcnt vmcnt(0)
882 ; GFX8-NEXT: ; return to shader part epilog
884 ; GFX900-LABEL: atomic_dec_i32_1d:
885 ; GFX900: ; %bb.0: ; %main_body
886 ; GFX900-NEXT: s_mov_b32 s0, s2
887 ; GFX900-NEXT: s_mov_b32 s1, s3
888 ; GFX900-NEXT: s_mov_b32 s2, s4
889 ; GFX900-NEXT: s_mov_b32 s3, s5
890 ; GFX900-NEXT: s_mov_b32 s4, s6
891 ; GFX900-NEXT: s_mov_b32 s5, s7
892 ; GFX900-NEXT: s_mov_b32 s6, s8
893 ; GFX900-NEXT: s_mov_b32 s7, s9
894 ; GFX900-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
895 ; GFX900-NEXT: s_waitcnt vmcnt(0)
896 ; GFX900-NEXT: ; return to shader part epilog
898 ; GFX90A-LABEL: atomic_dec_i32_1d:
899 ; GFX90A: ; %bb.0: ; %main_body
900 ; GFX90A-NEXT: s_mov_b32 s0, s2
901 ; GFX90A-NEXT: s_mov_b32 s1, s3
902 ; GFX90A-NEXT: s_mov_b32 s2, s4
903 ; GFX90A-NEXT: s_mov_b32 s3, s5
904 ; GFX90A-NEXT: s_mov_b32 s4, s6
905 ; GFX90A-NEXT: s_mov_b32 s5, s7
906 ; GFX90A-NEXT: s_mov_b32 s6, s8
907 ; GFX90A-NEXT: s_mov_b32 s7, s9
908 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
909 ; GFX90A-NEXT: image_atomic_dec v0, v2, s[0:7] dmask:0x1 unorm glc
910 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
911 ; GFX90A-NEXT: ; return to shader part epilog
913 ; GFX10-LABEL: atomic_dec_i32_1d:
914 ; GFX10: ; %bb.0: ; %main_body
915 ; GFX10-NEXT: s_mov_b32 s0, s2
916 ; GFX10-NEXT: s_mov_b32 s1, s3
917 ; GFX10-NEXT: s_mov_b32 s2, s4
918 ; GFX10-NEXT: s_mov_b32 s3, s5
919 ; GFX10-NEXT: s_mov_b32 s4, s6
920 ; GFX10-NEXT: s_mov_b32 s5, s7
921 ; GFX10-NEXT: s_mov_b32 s6, s8
922 ; GFX10-NEXT: s_mov_b32 s7, s9
923 ; GFX10-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
924 ; GFX10-NEXT: s_waitcnt vmcnt(0)
925 ; GFX10-NEXT: ; return to shader part epilog
927 %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
928 %out = bitcast i32 %v to float
932 define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
933 ; GFX6-LABEL: atomic_cmpswap_i32_1d:
934 ; GFX6: ; %bb.0: ; %main_body
935 ; GFX6-NEXT: s_mov_b32 s0, s2
936 ; GFX6-NEXT: s_mov_b32 s1, s3
937 ; GFX6-NEXT: s_mov_b32 s2, s4
938 ; GFX6-NEXT: s_mov_b32 s3, s5
939 ; GFX6-NEXT: s_mov_b32 s4, s6
940 ; GFX6-NEXT: s_mov_b32 s5, s7
941 ; GFX6-NEXT: s_mov_b32 s6, s8
942 ; GFX6-NEXT: s_mov_b32 s7, s9
943 ; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
944 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
945 ; GFX6-NEXT: ; return to shader part epilog
947 ; GFX8-LABEL: atomic_cmpswap_i32_1d:
948 ; GFX8: ; %bb.0: ; %main_body
949 ; GFX8-NEXT: s_mov_b32 s0, s2
950 ; GFX8-NEXT: s_mov_b32 s1, s3
951 ; GFX8-NEXT: s_mov_b32 s2, s4
952 ; GFX8-NEXT: s_mov_b32 s3, s5
953 ; GFX8-NEXT: s_mov_b32 s4, s6
954 ; GFX8-NEXT: s_mov_b32 s5, s7
955 ; GFX8-NEXT: s_mov_b32 s6, s8
956 ; GFX8-NEXT: s_mov_b32 s7, s9
957 ; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
958 ; GFX8-NEXT: s_waitcnt vmcnt(0)
959 ; GFX8-NEXT: ; return to shader part epilog
961 ; GFX900-LABEL: atomic_cmpswap_i32_1d:
962 ; GFX900: ; %bb.0: ; %main_body
963 ; GFX900-NEXT: s_mov_b32 s0, s2
964 ; GFX900-NEXT: s_mov_b32 s1, s3
965 ; GFX900-NEXT: s_mov_b32 s2, s4
966 ; GFX900-NEXT: s_mov_b32 s3, s5
967 ; GFX900-NEXT: s_mov_b32 s4, s6
968 ; GFX900-NEXT: s_mov_b32 s5, s7
969 ; GFX900-NEXT: s_mov_b32 s6, s8
970 ; GFX900-NEXT: s_mov_b32 s7, s9
971 ; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
972 ; GFX900-NEXT: s_waitcnt vmcnt(0)
973 ; GFX900-NEXT: ; return to shader part epilog
975 ; GFX90A-LABEL: atomic_cmpswap_i32_1d:
976 ; GFX90A: ; %bb.0: ; %main_body
977 ; GFX90A-NEXT: s_mov_b32 s0, s2
978 ; GFX90A-NEXT: s_mov_b32 s1, s3
979 ; GFX90A-NEXT: s_mov_b32 s2, s4
980 ; GFX90A-NEXT: s_mov_b32 s3, s5
981 ; GFX90A-NEXT: s_mov_b32 s4, s6
982 ; GFX90A-NEXT: s_mov_b32 s5, s7
983 ; GFX90A-NEXT: s_mov_b32 s6, s8
984 ; GFX90A-NEXT: s_mov_b32 s7, s9
985 ; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
986 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
987 ; GFX90A-NEXT: ; return to shader part epilog
989 ; GFX10-LABEL: atomic_cmpswap_i32_1d:
990 ; GFX10: ; %bb.0: ; %main_body
991 ; GFX10-NEXT: s_mov_b32 s0, s2
992 ; GFX10-NEXT: s_mov_b32 s1, s3
993 ; GFX10-NEXT: s_mov_b32 s2, s4
994 ; GFX10-NEXT: s_mov_b32 s3, s5
995 ; GFX10-NEXT: s_mov_b32 s4, s6
996 ; GFX10-NEXT: s_mov_b32 s5, s7
997 ; GFX10-NEXT: s_mov_b32 s6, s8
998 ; GFX10-NEXT: s_mov_b32 s7, s9
999 ; GFX10-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1000 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1001 ; GFX10-NEXT: ; return to shader part epilog
1003 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1004 %out = bitcast i32 %v to float
1008 define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
1009 ; GFX6-LABEL: atomic_cmpswap_i32_1d_no_return:
1010 ; GFX6: ; %bb.0: ; %main_body
1011 ; GFX6-NEXT: s_mov_b32 s0, s2
1012 ; GFX6-NEXT: s_mov_b32 s1, s3
1013 ; GFX6-NEXT: s_mov_b32 s2, s4
1014 ; GFX6-NEXT: s_mov_b32 s3, s5
1015 ; GFX6-NEXT: s_mov_b32 s4, s6
1016 ; GFX6-NEXT: s_mov_b32 s5, s7
1017 ; GFX6-NEXT: s_mov_b32 s6, s8
1018 ; GFX6-NEXT: s_mov_b32 s7, s9
1019 ; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1020 ; GFX6-NEXT: s_endpgm
1022 ; GFX8-LABEL: atomic_cmpswap_i32_1d_no_return:
1023 ; GFX8: ; %bb.0: ; %main_body
1024 ; GFX8-NEXT: s_mov_b32 s0, s2
1025 ; GFX8-NEXT: s_mov_b32 s1, s3
1026 ; GFX8-NEXT: s_mov_b32 s2, s4
1027 ; GFX8-NEXT: s_mov_b32 s3, s5
1028 ; GFX8-NEXT: s_mov_b32 s4, s6
1029 ; GFX8-NEXT: s_mov_b32 s5, s7
1030 ; GFX8-NEXT: s_mov_b32 s6, s8
1031 ; GFX8-NEXT: s_mov_b32 s7, s9
1032 ; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1033 ; GFX8-NEXT: s_endpgm
1035 ; GFX900-LABEL: atomic_cmpswap_i32_1d_no_return:
1036 ; GFX900: ; %bb.0: ; %main_body
1037 ; GFX900-NEXT: s_mov_b32 s0, s2
1038 ; GFX900-NEXT: s_mov_b32 s1, s3
1039 ; GFX900-NEXT: s_mov_b32 s2, s4
1040 ; GFX900-NEXT: s_mov_b32 s3, s5
1041 ; GFX900-NEXT: s_mov_b32 s4, s6
1042 ; GFX900-NEXT: s_mov_b32 s5, s7
1043 ; GFX900-NEXT: s_mov_b32 s6, s8
1044 ; GFX900-NEXT: s_mov_b32 s7, s9
1045 ; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1046 ; GFX900-NEXT: s_endpgm
1048 ; GFX90A-LABEL: atomic_cmpswap_i32_1d_no_return:
1049 ; GFX90A: ; %bb.0: ; %main_body
1050 ; GFX90A-NEXT: s_mov_b32 s0, s2
1051 ; GFX90A-NEXT: s_mov_b32 s1, s3
1052 ; GFX90A-NEXT: s_mov_b32 s2, s4
1053 ; GFX90A-NEXT: s_mov_b32 s3, s5
1054 ; GFX90A-NEXT: s_mov_b32 s4, s6
1055 ; GFX90A-NEXT: s_mov_b32 s5, s7
1056 ; GFX90A-NEXT: s_mov_b32 s6, s8
1057 ; GFX90A-NEXT: s_mov_b32 s7, s9
1058 ; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1059 ; GFX90A-NEXT: s_endpgm
1061 ; GFX10-LABEL: atomic_cmpswap_i32_1d_no_return:
1062 ; GFX10: ; %bb.0: ; %main_body
1063 ; GFX10-NEXT: s_mov_b32 s0, s2
1064 ; GFX10-NEXT: s_mov_b32 s1, s3
1065 ; GFX10-NEXT: s_mov_b32 s2, s4
1066 ; GFX10-NEXT: s_mov_b32 s3, s5
1067 ; GFX10-NEXT: s_mov_b32 s4, s6
1068 ; GFX10-NEXT: s_mov_b32 s5, s7
1069 ; GFX10-NEXT: s_mov_b32 s6, s8
1070 ; GFX10-NEXT: s_mov_b32 s7, s9
1071 ; GFX10-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1072 ; GFX10-NEXT: s_endpgm
1074 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1078 define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) {
1079 ; GFX6-LABEL: atomic_add_i32_2d:
1080 ; GFX6: ; %bb.0: ; %main_body
1081 ; GFX6-NEXT: s_mov_b32 s0, s2
1082 ; GFX6-NEXT: s_mov_b32 s1, s3
1083 ; GFX6-NEXT: s_mov_b32 s2, s4
1084 ; GFX6-NEXT: s_mov_b32 s3, s5
1085 ; GFX6-NEXT: s_mov_b32 s4, s6
1086 ; GFX6-NEXT: s_mov_b32 s5, s7
1087 ; GFX6-NEXT: s_mov_b32 s6, s8
1088 ; GFX6-NEXT: s_mov_b32 s7, s9
1089 ; GFX6-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
1090 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1091 ; GFX6-NEXT: ; return to shader part epilog
1093 ; GFX8-LABEL: atomic_add_i32_2d:
1094 ; GFX8: ; %bb.0: ; %main_body
1095 ; GFX8-NEXT: s_mov_b32 s0, s2
1096 ; GFX8-NEXT: s_mov_b32 s1, s3
1097 ; GFX8-NEXT: s_mov_b32 s2, s4
1098 ; GFX8-NEXT: s_mov_b32 s3, s5
1099 ; GFX8-NEXT: s_mov_b32 s4, s6
1100 ; GFX8-NEXT: s_mov_b32 s5, s7
1101 ; GFX8-NEXT: s_mov_b32 s6, s8
1102 ; GFX8-NEXT: s_mov_b32 s7, s9
1103 ; GFX8-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
1104 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1105 ; GFX8-NEXT: ; return to shader part epilog
1107 ; GFX900-LABEL: atomic_add_i32_2d:
1108 ; GFX900: ; %bb.0: ; %main_body
1109 ; GFX900-NEXT: s_mov_b32 s0, s2
1110 ; GFX900-NEXT: s_mov_b32 s1, s3
1111 ; GFX900-NEXT: s_mov_b32 s2, s4
1112 ; GFX900-NEXT: s_mov_b32 s3, s5
1113 ; GFX900-NEXT: s_mov_b32 s4, s6
1114 ; GFX900-NEXT: s_mov_b32 s5, s7
1115 ; GFX900-NEXT: s_mov_b32 s6, s8
1116 ; GFX900-NEXT: s_mov_b32 s7, s9
1117 ; GFX900-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
1118 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1119 ; GFX900-NEXT: ; return to shader part epilog
1121 ; GFX90A-LABEL: atomic_add_i32_2d:
1122 ; GFX90A: ; %bb.0: ; %main_body
1123 ; GFX90A-NEXT: s_mov_b32 s0, s2
1124 ; GFX90A-NEXT: s_mov_b32 s1, s3
1125 ; GFX90A-NEXT: s_mov_b32 s2, s4
1126 ; GFX90A-NEXT: s_mov_b32 s3, s5
1127 ; GFX90A-NEXT: s_mov_b32 s4, s6
1128 ; GFX90A-NEXT: s_mov_b32 s5, s7
1129 ; GFX90A-NEXT: s_mov_b32 s6, s8
1130 ; GFX90A-NEXT: s_mov_b32 s7, s9
1131 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1132 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1133 ; GFX90A-NEXT: image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc
1134 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1135 ; GFX90A-NEXT: ; return to shader part epilog
1137 ; GFX10-LABEL: atomic_add_i32_2d:
1138 ; GFX10: ; %bb.0: ; %main_body
1139 ; GFX10-NEXT: s_mov_b32 s0, s2
1140 ; GFX10-NEXT: s_mov_b32 s1, s3
1141 ; GFX10-NEXT: s_mov_b32 s2, s4
1142 ; GFX10-NEXT: s_mov_b32 s3, s5
1143 ; GFX10-NEXT: s_mov_b32 s4, s6
1144 ; GFX10-NEXT: s_mov_b32 s5, s7
1145 ; GFX10-NEXT: s_mov_b32 s6, s8
1146 ; GFX10-NEXT: s_mov_b32 s7, s9
1147 ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc
1148 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1149 ; GFX10-NEXT: ; return to shader part epilog
1151 %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
1152 %out = bitcast i32 %v to float
1156 define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) {
1157 ; GFX6-LABEL: atomic_add_i32_3d:
1158 ; GFX6: ; %bb.0: ; %main_body
1159 ; GFX6-NEXT: s_mov_b32 s0, s2
1160 ; GFX6-NEXT: s_mov_b32 s1, s3
1161 ; GFX6-NEXT: s_mov_b32 s2, s4
1162 ; GFX6-NEXT: s_mov_b32 s3, s5
1163 ; GFX6-NEXT: s_mov_b32 s4, s6
1164 ; GFX6-NEXT: s_mov_b32 s5, s7
1165 ; GFX6-NEXT: s_mov_b32 s6, s8
1166 ; GFX6-NEXT: s_mov_b32 s7, s9
1167 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1168 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1169 ; GFX6-NEXT: ; return to shader part epilog
1171 ; GFX8-LABEL: atomic_add_i32_3d:
1172 ; GFX8: ; %bb.0: ; %main_body
1173 ; GFX8-NEXT: s_mov_b32 s0, s2
1174 ; GFX8-NEXT: s_mov_b32 s1, s3
1175 ; GFX8-NEXT: s_mov_b32 s2, s4
1176 ; GFX8-NEXT: s_mov_b32 s3, s5
1177 ; GFX8-NEXT: s_mov_b32 s4, s6
1178 ; GFX8-NEXT: s_mov_b32 s5, s7
1179 ; GFX8-NEXT: s_mov_b32 s6, s8
1180 ; GFX8-NEXT: s_mov_b32 s7, s9
1181 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1182 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1183 ; GFX8-NEXT: ; return to shader part epilog
1185 ; GFX900-LABEL: atomic_add_i32_3d:
1186 ; GFX900: ; %bb.0: ; %main_body
1187 ; GFX900-NEXT: s_mov_b32 s0, s2
1188 ; GFX900-NEXT: s_mov_b32 s1, s3
1189 ; GFX900-NEXT: s_mov_b32 s2, s4
1190 ; GFX900-NEXT: s_mov_b32 s3, s5
1191 ; GFX900-NEXT: s_mov_b32 s4, s6
1192 ; GFX900-NEXT: s_mov_b32 s5, s7
1193 ; GFX900-NEXT: s_mov_b32 s6, s8
1194 ; GFX900-NEXT: s_mov_b32 s7, s9
1195 ; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1196 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1197 ; GFX900-NEXT: ; return to shader part epilog
1199 ; GFX90A-LABEL: atomic_add_i32_3d:
1200 ; GFX90A: ; %bb.0: ; %main_body
1201 ; GFX90A-NEXT: s_mov_b32 s0, s2
1202 ; GFX90A-NEXT: s_mov_b32 s1, s3
1203 ; GFX90A-NEXT: s_mov_b32 s2, s4
1204 ; GFX90A-NEXT: s_mov_b32 s3, s5
1205 ; GFX90A-NEXT: s_mov_b32 s4, s6
1206 ; GFX90A-NEXT: s_mov_b32 s5, s7
1207 ; GFX90A-NEXT: s_mov_b32 s6, s8
1208 ; GFX90A-NEXT: s_mov_b32 s7, s9
1209 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1210 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1211 ; GFX90A-NEXT: v_mov_b32_e32 v6, v3
1212 ; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc
1213 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1214 ; GFX90A-NEXT: ; return to shader part epilog
1216 ; GFX10-LABEL: atomic_add_i32_3d:
1217 ; GFX10: ; %bb.0: ; %main_body
1218 ; GFX10-NEXT: s_mov_b32 s0, s2
1219 ; GFX10-NEXT: s_mov_b32 s1, s3
1220 ; GFX10-NEXT: s_mov_b32 s2, s4
1221 ; GFX10-NEXT: s_mov_b32 s3, s5
1222 ; GFX10-NEXT: s_mov_b32 s4, s6
1223 ; GFX10-NEXT: s_mov_b32 s5, s7
1224 ; GFX10-NEXT: s_mov_b32 s6, s8
1225 ; GFX10-NEXT: s_mov_b32 s7, s9
1226 ; GFX10-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc
1227 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1228 ; GFX10-NEXT: ; return to shader part epilog
1230 %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
1231 %out = bitcast i32 %v to float
1235 define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) {
1236 ; GFX6-LABEL: atomic_add_i32_cube:
1237 ; GFX6: ; %bb.0: ; %main_body
1238 ; GFX6-NEXT: s_mov_b32 s0, s2
1239 ; GFX6-NEXT: s_mov_b32 s1, s3
1240 ; GFX6-NEXT: s_mov_b32 s2, s4
1241 ; GFX6-NEXT: s_mov_b32 s3, s5
1242 ; GFX6-NEXT: s_mov_b32 s4, s6
1243 ; GFX6-NEXT: s_mov_b32 s5, s7
1244 ; GFX6-NEXT: s_mov_b32 s6, s8
1245 ; GFX6-NEXT: s_mov_b32 s7, s9
1246 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1247 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1248 ; GFX6-NEXT: ; return to shader part epilog
1250 ; GFX8-LABEL: atomic_add_i32_cube:
1251 ; GFX8: ; %bb.0: ; %main_body
1252 ; GFX8-NEXT: s_mov_b32 s0, s2
1253 ; GFX8-NEXT: s_mov_b32 s1, s3
1254 ; GFX8-NEXT: s_mov_b32 s2, s4
1255 ; GFX8-NEXT: s_mov_b32 s3, s5
1256 ; GFX8-NEXT: s_mov_b32 s4, s6
1257 ; GFX8-NEXT: s_mov_b32 s5, s7
1258 ; GFX8-NEXT: s_mov_b32 s6, s8
1259 ; GFX8-NEXT: s_mov_b32 s7, s9
1260 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1261 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1262 ; GFX8-NEXT: ; return to shader part epilog
1264 ; GFX900-LABEL: atomic_add_i32_cube:
1265 ; GFX900: ; %bb.0: ; %main_body
1266 ; GFX900-NEXT: s_mov_b32 s0, s2
1267 ; GFX900-NEXT: s_mov_b32 s1, s3
1268 ; GFX900-NEXT: s_mov_b32 s2, s4
1269 ; GFX900-NEXT: s_mov_b32 s3, s5
1270 ; GFX900-NEXT: s_mov_b32 s4, s6
1271 ; GFX900-NEXT: s_mov_b32 s5, s7
1272 ; GFX900-NEXT: s_mov_b32 s6, s8
1273 ; GFX900-NEXT: s_mov_b32 s7, s9
1274 ; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1275 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1276 ; GFX900-NEXT: ; return to shader part epilog
1278 ; GFX90A-LABEL: atomic_add_i32_cube:
1279 ; GFX90A: ; %bb.0: ; %main_body
1280 ; GFX90A-NEXT: s_mov_b32 s0, s2
1281 ; GFX90A-NEXT: s_mov_b32 s1, s3
1282 ; GFX90A-NEXT: s_mov_b32 s2, s4
1283 ; GFX90A-NEXT: s_mov_b32 s3, s5
1284 ; GFX90A-NEXT: s_mov_b32 s4, s6
1285 ; GFX90A-NEXT: s_mov_b32 s5, s7
1286 ; GFX90A-NEXT: s_mov_b32 s6, s8
1287 ; GFX90A-NEXT: s_mov_b32 s7, s9
1288 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1289 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1290 ; GFX90A-NEXT: v_mov_b32_e32 v6, v3
1291 ; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da
1292 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1293 ; GFX90A-NEXT: ; return to shader part epilog
1295 ; GFX10-LABEL: atomic_add_i32_cube:
1296 ; GFX10: ; %bb.0: ; %main_body
1297 ; GFX10-NEXT: s_mov_b32 s0, s2
1298 ; GFX10-NEXT: s_mov_b32 s1, s3
1299 ; GFX10-NEXT: s_mov_b32 s2, s4
1300 ; GFX10-NEXT: s_mov_b32 s3, s5
1301 ; GFX10-NEXT: s_mov_b32 s4, s6
1302 ; GFX10-NEXT: s_mov_b32 s5, s7
1303 ; GFX10-NEXT: s_mov_b32 s6, s8
1304 ; GFX10-NEXT: s_mov_b32 s7, s9
1305 ; GFX10-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc
1306 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1307 ; GFX10-NEXT: ; return to shader part epilog
1309 %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
1310 %out = bitcast i32 %v to float
1314 define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) {
1315 ; GFX6-LABEL: atomic_add_i32_1darray:
1316 ; GFX6: ; %bb.0: ; %main_body
1317 ; GFX6-NEXT: s_mov_b32 s0, s2
1318 ; GFX6-NEXT: s_mov_b32 s1, s3
1319 ; GFX6-NEXT: s_mov_b32 s2, s4
1320 ; GFX6-NEXT: s_mov_b32 s3, s5
1321 ; GFX6-NEXT: s_mov_b32 s4, s6
1322 ; GFX6-NEXT: s_mov_b32 s5, s7
1323 ; GFX6-NEXT: s_mov_b32 s6, s8
1324 ; GFX6-NEXT: s_mov_b32 s7, s9
1325 ; GFX6-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
1326 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1327 ; GFX6-NEXT: ; return to shader part epilog
1329 ; GFX8-LABEL: atomic_add_i32_1darray:
1330 ; GFX8: ; %bb.0: ; %main_body
1331 ; GFX8-NEXT: s_mov_b32 s0, s2
1332 ; GFX8-NEXT: s_mov_b32 s1, s3
1333 ; GFX8-NEXT: s_mov_b32 s2, s4
1334 ; GFX8-NEXT: s_mov_b32 s3, s5
1335 ; GFX8-NEXT: s_mov_b32 s4, s6
1336 ; GFX8-NEXT: s_mov_b32 s5, s7
1337 ; GFX8-NEXT: s_mov_b32 s6, s8
1338 ; GFX8-NEXT: s_mov_b32 s7, s9
1339 ; GFX8-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
1340 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1341 ; GFX8-NEXT: ; return to shader part epilog
1343 ; GFX900-LABEL: atomic_add_i32_1darray:
1344 ; GFX900: ; %bb.0: ; %main_body
1345 ; GFX900-NEXT: s_mov_b32 s0, s2
1346 ; GFX900-NEXT: s_mov_b32 s1, s3
1347 ; GFX900-NEXT: s_mov_b32 s2, s4
1348 ; GFX900-NEXT: s_mov_b32 s3, s5
1349 ; GFX900-NEXT: s_mov_b32 s4, s6
1350 ; GFX900-NEXT: s_mov_b32 s5, s7
1351 ; GFX900-NEXT: s_mov_b32 s6, s8
1352 ; GFX900-NEXT: s_mov_b32 s7, s9
1353 ; GFX900-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
1354 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1355 ; GFX900-NEXT: ; return to shader part epilog
1357 ; GFX90A-LABEL: atomic_add_i32_1darray:
1358 ; GFX90A: ; %bb.0: ; %main_body
1359 ; GFX90A-NEXT: s_mov_b32 s0, s2
1360 ; GFX90A-NEXT: s_mov_b32 s1, s3
1361 ; GFX90A-NEXT: s_mov_b32 s2, s4
1362 ; GFX90A-NEXT: s_mov_b32 s3, s5
1363 ; GFX90A-NEXT: s_mov_b32 s4, s6
1364 ; GFX90A-NEXT: s_mov_b32 s5, s7
1365 ; GFX90A-NEXT: s_mov_b32 s6, s8
1366 ; GFX90A-NEXT: s_mov_b32 s7, s9
1367 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1368 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1369 ; GFX90A-NEXT: image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc da
1370 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1371 ; GFX90A-NEXT: ; return to shader part epilog
1373 ; GFX10-LABEL: atomic_add_i32_1darray:
1374 ; GFX10: ; %bb.0: ; %main_body
1375 ; GFX10-NEXT: s_mov_b32 s0, s2
1376 ; GFX10-NEXT: s_mov_b32 s1, s3
1377 ; GFX10-NEXT: s_mov_b32 s2, s4
1378 ; GFX10-NEXT: s_mov_b32 s3, s5
1379 ; GFX10-NEXT: s_mov_b32 s4, s6
1380 ; GFX10-NEXT: s_mov_b32 s5, s7
1381 ; GFX10-NEXT: s_mov_b32 s6, s8
1382 ; GFX10-NEXT: s_mov_b32 s7, s9
1383 ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc
1384 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1385 ; GFX10-NEXT: ; return to shader part epilog
1387 %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1388 %out = bitcast i32 %v to float
1392 define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) {
1393 ; GFX6-LABEL: atomic_add_i32_2darray:
1394 ; GFX6: ; %bb.0: ; %main_body
1395 ; GFX6-NEXT: s_mov_b32 s0, s2
1396 ; GFX6-NEXT: s_mov_b32 s1, s3
1397 ; GFX6-NEXT: s_mov_b32 s2, s4
1398 ; GFX6-NEXT: s_mov_b32 s3, s5
1399 ; GFX6-NEXT: s_mov_b32 s4, s6
1400 ; GFX6-NEXT: s_mov_b32 s5, s7
1401 ; GFX6-NEXT: s_mov_b32 s6, s8
1402 ; GFX6-NEXT: s_mov_b32 s7, s9
1403 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1404 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1405 ; GFX6-NEXT: ; return to shader part epilog
1407 ; GFX8-LABEL: atomic_add_i32_2darray:
1408 ; GFX8: ; %bb.0: ; %main_body
1409 ; GFX8-NEXT: s_mov_b32 s0, s2
1410 ; GFX8-NEXT: s_mov_b32 s1, s3
1411 ; GFX8-NEXT: s_mov_b32 s2, s4
1412 ; GFX8-NEXT: s_mov_b32 s3, s5
1413 ; GFX8-NEXT: s_mov_b32 s4, s6
1414 ; GFX8-NEXT: s_mov_b32 s5, s7
1415 ; GFX8-NEXT: s_mov_b32 s6, s8
1416 ; GFX8-NEXT: s_mov_b32 s7, s9
1417 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1418 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1419 ; GFX8-NEXT: ; return to shader part epilog
1421 ; GFX900-LABEL: atomic_add_i32_2darray:
1422 ; GFX900: ; %bb.0: ; %main_body
1423 ; GFX900-NEXT: s_mov_b32 s0, s2
1424 ; GFX900-NEXT: s_mov_b32 s1, s3
1425 ; GFX900-NEXT: s_mov_b32 s2, s4
1426 ; GFX900-NEXT: s_mov_b32 s3, s5
1427 ; GFX900-NEXT: s_mov_b32 s4, s6
1428 ; GFX900-NEXT: s_mov_b32 s5, s7
1429 ; GFX900-NEXT: s_mov_b32 s6, s8
1430 ; GFX900-NEXT: s_mov_b32 s7, s9
1431 ; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1432 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1433 ; GFX900-NEXT: ; return to shader part epilog
1435 ; GFX90A-LABEL: atomic_add_i32_2darray:
1436 ; GFX90A: ; %bb.0: ; %main_body
1437 ; GFX90A-NEXT: s_mov_b32 s0, s2
1438 ; GFX90A-NEXT: s_mov_b32 s1, s3
1439 ; GFX90A-NEXT: s_mov_b32 s2, s4
1440 ; GFX90A-NEXT: s_mov_b32 s3, s5
1441 ; GFX90A-NEXT: s_mov_b32 s4, s6
1442 ; GFX90A-NEXT: s_mov_b32 s5, s7
1443 ; GFX90A-NEXT: s_mov_b32 s6, s8
1444 ; GFX90A-NEXT: s_mov_b32 s7, s9
1445 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1446 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1447 ; GFX90A-NEXT: v_mov_b32_e32 v6, v3
1448 ; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da
1449 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1450 ; GFX90A-NEXT: ; return to shader part epilog
1452 ; GFX10-LABEL: atomic_add_i32_2darray:
1453 ; GFX10: ; %bb.0: ; %main_body
1454 ; GFX10-NEXT: s_mov_b32 s0, s2
1455 ; GFX10-NEXT: s_mov_b32 s1, s3
1456 ; GFX10-NEXT: s_mov_b32 s2, s4
1457 ; GFX10-NEXT: s_mov_b32 s3, s5
1458 ; GFX10-NEXT: s_mov_b32 s4, s6
1459 ; GFX10-NEXT: s_mov_b32 s5, s7
1460 ; GFX10-NEXT: s_mov_b32 s6, s8
1461 ; GFX10-NEXT: s_mov_b32 s7, s9
1462 ; GFX10-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc
1463 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1464 ; GFX10-NEXT: ; return to shader part epilog
1466 %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1467 %out = bitcast i32 %v to float
1471 define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) {
1472 ; GFX6-LABEL: atomic_add_i32_2dmsaa:
1473 ; GFX6: ; %bb.0: ; %main_body
1474 ; GFX6-NEXT: s_mov_b32 s0, s2
1475 ; GFX6-NEXT: s_mov_b32 s1, s3
1476 ; GFX6-NEXT: s_mov_b32 s2, s4
1477 ; GFX6-NEXT: s_mov_b32 s3, s5
1478 ; GFX6-NEXT: s_mov_b32 s4, s6
1479 ; GFX6-NEXT: s_mov_b32 s5, s7
1480 ; GFX6-NEXT: s_mov_b32 s6, s8
1481 ; GFX6-NEXT: s_mov_b32 s7, s9
1482 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1483 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1484 ; GFX6-NEXT: ; return to shader part epilog
1486 ; GFX8-LABEL: atomic_add_i32_2dmsaa:
1487 ; GFX8: ; %bb.0: ; %main_body
1488 ; GFX8-NEXT: s_mov_b32 s0, s2
1489 ; GFX8-NEXT: s_mov_b32 s1, s3
1490 ; GFX8-NEXT: s_mov_b32 s2, s4
1491 ; GFX8-NEXT: s_mov_b32 s3, s5
1492 ; GFX8-NEXT: s_mov_b32 s4, s6
1493 ; GFX8-NEXT: s_mov_b32 s5, s7
1494 ; GFX8-NEXT: s_mov_b32 s6, s8
1495 ; GFX8-NEXT: s_mov_b32 s7, s9
1496 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1497 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1498 ; GFX8-NEXT: ; return to shader part epilog
1500 ; GFX900-LABEL: atomic_add_i32_2dmsaa:
1501 ; GFX900: ; %bb.0: ; %main_body
1502 ; GFX900-NEXT: s_mov_b32 s0, s2
1503 ; GFX900-NEXT: s_mov_b32 s1, s3
1504 ; GFX900-NEXT: s_mov_b32 s2, s4
1505 ; GFX900-NEXT: s_mov_b32 s3, s5
1506 ; GFX900-NEXT: s_mov_b32 s4, s6
1507 ; GFX900-NEXT: s_mov_b32 s5, s7
1508 ; GFX900-NEXT: s_mov_b32 s6, s8
1509 ; GFX900-NEXT: s_mov_b32 s7, s9
1510 ; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1511 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1512 ; GFX900-NEXT: ; return to shader part epilog
1514 ; GFX90A-LABEL: atomic_add_i32_2dmsaa:
1515 ; GFX90A: ; %bb.0: ; %main_body
1516 ; GFX90A-NEXT: s_mov_b32 s0, s2
1517 ; GFX90A-NEXT: s_mov_b32 s1, s3
1518 ; GFX90A-NEXT: s_mov_b32 s2, s4
1519 ; GFX90A-NEXT: s_mov_b32 s3, s5
1520 ; GFX90A-NEXT: s_mov_b32 s4, s6
1521 ; GFX90A-NEXT: s_mov_b32 s5, s7
1522 ; GFX90A-NEXT: s_mov_b32 s6, s8
1523 ; GFX90A-NEXT: s_mov_b32 s7, s9
1524 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1525 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1526 ; GFX90A-NEXT: v_mov_b32_e32 v6, v3
1527 ; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc
1528 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1529 ; GFX90A-NEXT: ; return to shader part epilog
1531 ; GFX10-LABEL: atomic_add_i32_2dmsaa:
1532 ; GFX10: ; %bb.0: ; %main_body
1533 ; GFX10-NEXT: s_mov_b32 s0, s2
1534 ; GFX10-NEXT: s_mov_b32 s1, s3
1535 ; GFX10-NEXT: s_mov_b32 s2, s4
1536 ; GFX10-NEXT: s_mov_b32 s3, s5
1537 ; GFX10-NEXT: s_mov_b32 s4, s6
1538 ; GFX10-NEXT: s_mov_b32 s5, s7
1539 ; GFX10-NEXT: s_mov_b32 s6, s8
1540 ; GFX10-NEXT: s_mov_b32 s7, s9
1541 ; GFX10-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc
1542 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1543 ; GFX10-NEXT: ; return to shader part epilog
1545 %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1546 %out = bitcast i32 %v to float
1550 define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
1551 ; GFX6-LABEL: atomic_add_i32_2darraymsaa:
1552 ; GFX6: ; %bb.0: ; %main_body
1553 ; GFX6-NEXT: s_mov_b32 s0, s2
1554 ; GFX6-NEXT: s_mov_b32 s1, s3
1555 ; GFX6-NEXT: s_mov_b32 s2, s4
1556 ; GFX6-NEXT: s_mov_b32 s3, s5
1557 ; GFX6-NEXT: s_mov_b32 s4, s6
1558 ; GFX6-NEXT: s_mov_b32 s5, s7
1559 ; GFX6-NEXT: s_mov_b32 s6, s8
1560 ; GFX6-NEXT: s_mov_b32 s7, s9
1561 ; GFX6-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
1562 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1563 ; GFX6-NEXT: ; return to shader part epilog
1565 ; GFX8-LABEL: atomic_add_i32_2darraymsaa:
1566 ; GFX8: ; %bb.0: ; %main_body
1567 ; GFX8-NEXT: s_mov_b32 s0, s2
1568 ; GFX8-NEXT: s_mov_b32 s1, s3
1569 ; GFX8-NEXT: s_mov_b32 s2, s4
1570 ; GFX8-NEXT: s_mov_b32 s3, s5
1571 ; GFX8-NEXT: s_mov_b32 s4, s6
1572 ; GFX8-NEXT: s_mov_b32 s5, s7
1573 ; GFX8-NEXT: s_mov_b32 s6, s8
1574 ; GFX8-NEXT: s_mov_b32 s7, s9
1575 ; GFX8-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
1576 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1577 ; GFX8-NEXT: ; return to shader part epilog
1579 ; GFX900-LABEL: atomic_add_i32_2darraymsaa:
1580 ; GFX900: ; %bb.0: ; %main_body
1581 ; GFX900-NEXT: s_mov_b32 s0, s2
1582 ; GFX900-NEXT: s_mov_b32 s1, s3
1583 ; GFX900-NEXT: s_mov_b32 s2, s4
1584 ; GFX900-NEXT: s_mov_b32 s3, s5
1585 ; GFX900-NEXT: s_mov_b32 s4, s6
1586 ; GFX900-NEXT: s_mov_b32 s5, s7
1587 ; GFX900-NEXT: s_mov_b32 s6, s8
1588 ; GFX900-NEXT: s_mov_b32 s7, s9
1589 ; GFX900-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
1590 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1591 ; GFX900-NEXT: ; return to shader part epilog
1593 ; GFX90A-LABEL: atomic_add_i32_2darraymsaa:
1594 ; GFX90A: ; %bb.0: ; %main_body
1595 ; GFX90A-NEXT: s_mov_b32 s0, s2
1596 ; GFX90A-NEXT: s_mov_b32 s1, s3
1597 ; GFX90A-NEXT: s_mov_b32 s2, s4
1598 ; GFX90A-NEXT: s_mov_b32 s3, s5
1599 ; GFX90A-NEXT: s_mov_b32 s4, s6
1600 ; GFX90A-NEXT: s_mov_b32 s5, s7
1601 ; GFX90A-NEXT: s_mov_b32 s6, s8
1602 ; GFX90A-NEXT: s_mov_b32 s7, s9
1603 ; GFX90A-NEXT: v_mov_b32_e32 v6, v1
1604 ; GFX90A-NEXT: v_mov_b32_e32 v7, v2
1605 ; GFX90A-NEXT: v_mov_b32_e32 v8, v3
1606 ; GFX90A-NEXT: v_mov_b32_e32 v9, v4
1607 ; GFX90A-NEXT: image_atomic_add v0, v[6:9], s[0:7] dmask:0x1 unorm glc da
1608 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1609 ; GFX90A-NEXT: ; return to shader part epilog
1611 ; GFX10-LABEL: atomic_add_i32_2darraymsaa:
1612 ; GFX10: ; %bb.0: ; %main_body
1613 ; GFX10-NEXT: s_mov_b32 s0, s2
1614 ; GFX10-NEXT: s_mov_b32 s1, s3
1615 ; GFX10-NEXT: s_mov_b32 s2, s4
1616 ; GFX10-NEXT: s_mov_b32 s3, s5
1617 ; GFX10-NEXT: s_mov_b32 s4, s6
1618 ; GFX10-NEXT: s_mov_b32 s5, s7
1619 ; GFX10-NEXT: s_mov_b32 s6, s8
1620 ; GFX10-NEXT: s_mov_b32 s7, s9
1621 ; GFX10-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc
1622 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1623 ; GFX10-NEXT: ; return to shader part epilog
1625 %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1626 %out = bitcast i32 %v to float
1630 define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
1631 ; GFX6-LABEL: atomic_add_i32_1d_slc:
1632 ; GFX6: ; %bb.0: ; %main_body
1633 ; GFX6-NEXT: s_mov_b32 s0, s2
1634 ; GFX6-NEXT: s_mov_b32 s1, s3
1635 ; GFX6-NEXT: s_mov_b32 s2, s4
1636 ; GFX6-NEXT: s_mov_b32 s3, s5
1637 ; GFX6-NEXT: s_mov_b32 s4, s6
1638 ; GFX6-NEXT: s_mov_b32 s5, s7
1639 ; GFX6-NEXT: s_mov_b32 s6, s8
1640 ; GFX6-NEXT: s_mov_b32 s7, s9
1641 ; GFX6-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1642 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1643 ; GFX6-NEXT: ; return to shader part epilog
1645 ; GFX8-LABEL: atomic_add_i32_1d_slc:
1646 ; GFX8: ; %bb.0: ; %main_body
1647 ; GFX8-NEXT: s_mov_b32 s0, s2
1648 ; GFX8-NEXT: s_mov_b32 s1, s3
1649 ; GFX8-NEXT: s_mov_b32 s2, s4
1650 ; GFX8-NEXT: s_mov_b32 s3, s5
1651 ; GFX8-NEXT: s_mov_b32 s4, s6
1652 ; GFX8-NEXT: s_mov_b32 s5, s7
1653 ; GFX8-NEXT: s_mov_b32 s6, s8
1654 ; GFX8-NEXT: s_mov_b32 s7, s9
1655 ; GFX8-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1656 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1657 ; GFX8-NEXT: ; return to shader part epilog
1659 ; GFX900-LABEL: atomic_add_i32_1d_slc:
1660 ; GFX900: ; %bb.0: ; %main_body
1661 ; GFX900-NEXT: s_mov_b32 s0, s2
1662 ; GFX900-NEXT: s_mov_b32 s1, s3
1663 ; GFX900-NEXT: s_mov_b32 s2, s4
1664 ; GFX900-NEXT: s_mov_b32 s3, s5
1665 ; GFX900-NEXT: s_mov_b32 s4, s6
1666 ; GFX900-NEXT: s_mov_b32 s5, s7
1667 ; GFX900-NEXT: s_mov_b32 s6, s8
1668 ; GFX900-NEXT: s_mov_b32 s7, s9
1669 ; GFX900-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1670 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1671 ; GFX900-NEXT: ; return to shader part epilog
1673 ; GFX90A-LABEL: atomic_add_i32_1d_slc:
1674 ; GFX90A: ; %bb.0: ; %main_body
1675 ; GFX90A-NEXT: s_mov_b32 s0, s2
1676 ; GFX90A-NEXT: s_mov_b32 s1, s3
1677 ; GFX90A-NEXT: s_mov_b32 s2, s4
1678 ; GFX90A-NEXT: s_mov_b32 s3, s5
1679 ; GFX90A-NEXT: s_mov_b32 s4, s6
1680 ; GFX90A-NEXT: s_mov_b32 s5, s7
1681 ; GFX90A-NEXT: s_mov_b32 s6, s8
1682 ; GFX90A-NEXT: s_mov_b32 s7, s9
1683 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
1684 ; GFX90A-NEXT: image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc slc
1685 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1686 ; GFX90A-NEXT: ; return to shader part epilog
1688 ; GFX10-LABEL: atomic_add_i32_1d_slc:
1689 ; GFX10: ; %bb.0: ; %main_body
1690 ; GFX10-NEXT: s_mov_b32 s0, s2
1691 ; GFX10-NEXT: s_mov_b32 s1, s3
1692 ; GFX10-NEXT: s_mov_b32 s2, s4
1693 ; GFX10-NEXT: s_mov_b32 s3, s5
1694 ; GFX10-NEXT: s_mov_b32 s4, s6
1695 ; GFX10-NEXT: s_mov_b32 s5, s7
1696 ; GFX10-NEXT: s_mov_b32 s6, s8
1697 ; GFX10-NEXT: s_mov_b32 s7, s9
1698 ; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc
1699 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1700 ; GFX10-NEXT: ; return to shader part epilog
1702 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
1703 %out = bitcast i32 %v to float
1707 define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1708 ; GFX6-LABEL: atomic_swap_i64_1d:
1709 ; GFX6: ; %bb.0: ; %main_body
1710 ; GFX6-NEXT: s_mov_b32 s0, s2
1711 ; GFX6-NEXT: s_mov_b32 s1, s3
1712 ; GFX6-NEXT: s_mov_b32 s2, s4
1713 ; GFX6-NEXT: s_mov_b32 s3, s5
1714 ; GFX6-NEXT: s_mov_b32 s4, s6
1715 ; GFX6-NEXT: s_mov_b32 s5, s7
1716 ; GFX6-NEXT: s_mov_b32 s6, s8
1717 ; GFX6-NEXT: s_mov_b32 s7, s9
1718 ; GFX6-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1719 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1720 ; GFX6-NEXT: ; return to shader part epilog
1722 ; GFX8-LABEL: atomic_swap_i64_1d:
1723 ; GFX8: ; %bb.0: ; %main_body
1724 ; GFX8-NEXT: s_mov_b32 s0, s2
1725 ; GFX8-NEXT: s_mov_b32 s1, s3
1726 ; GFX8-NEXT: s_mov_b32 s2, s4
1727 ; GFX8-NEXT: s_mov_b32 s3, s5
1728 ; GFX8-NEXT: s_mov_b32 s4, s6
1729 ; GFX8-NEXT: s_mov_b32 s5, s7
1730 ; GFX8-NEXT: s_mov_b32 s6, s8
1731 ; GFX8-NEXT: s_mov_b32 s7, s9
1732 ; GFX8-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1733 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1734 ; GFX8-NEXT: ; return to shader part epilog
1736 ; GFX900-LABEL: atomic_swap_i64_1d:
1737 ; GFX900: ; %bb.0: ; %main_body
1738 ; GFX900-NEXT: s_mov_b32 s0, s2
1739 ; GFX900-NEXT: s_mov_b32 s1, s3
1740 ; GFX900-NEXT: s_mov_b32 s2, s4
1741 ; GFX900-NEXT: s_mov_b32 s3, s5
1742 ; GFX900-NEXT: s_mov_b32 s4, s6
1743 ; GFX900-NEXT: s_mov_b32 s5, s7
1744 ; GFX900-NEXT: s_mov_b32 s6, s8
1745 ; GFX900-NEXT: s_mov_b32 s7, s9
1746 ; GFX900-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1747 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1748 ; GFX900-NEXT: ; return to shader part epilog
1750 ; GFX90A-LABEL: atomic_swap_i64_1d:
1751 ; GFX90A: ; %bb.0: ; %main_body
1752 ; GFX90A-NEXT: s_mov_b32 s0, s2
1753 ; GFX90A-NEXT: s_mov_b32 s1, s3
1754 ; GFX90A-NEXT: s_mov_b32 s2, s4
1755 ; GFX90A-NEXT: s_mov_b32 s3, s5
1756 ; GFX90A-NEXT: s_mov_b32 s4, s6
1757 ; GFX90A-NEXT: s_mov_b32 s5, s7
1758 ; GFX90A-NEXT: s_mov_b32 s6, s8
1759 ; GFX90A-NEXT: s_mov_b32 s7, s9
1760 ; GFX90A-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1761 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1762 ; GFX90A-NEXT: ; return to shader part epilog
1764 ; GFX10-LABEL: atomic_swap_i64_1d:
1765 ; GFX10: ; %bb.0: ; %main_body
1766 ; GFX10-NEXT: s_mov_b32 s0, s2
1767 ; GFX10-NEXT: s_mov_b32 s1, s3
1768 ; GFX10-NEXT: s_mov_b32 s2, s4
1769 ; GFX10-NEXT: s_mov_b32 s3, s5
1770 ; GFX10-NEXT: s_mov_b32 s4, s6
1771 ; GFX10-NEXT: s_mov_b32 s5, s7
1772 ; GFX10-NEXT: s_mov_b32 s6, s8
1773 ; GFX10-NEXT: s_mov_b32 s7, s9
1774 ; GFX10-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1775 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1776 ; GFX10-NEXT: ; return to shader part epilog
1778 %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1779 %out = bitcast i64 %v to <2 x float>
1780 ret <2 x float> %out
1783 define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1784 ; GFX6-LABEL: atomic_add_i64_1d:
1785 ; GFX6: ; %bb.0: ; %main_body
1786 ; GFX6-NEXT: s_mov_b32 s0, s2
1787 ; GFX6-NEXT: s_mov_b32 s1, s3
1788 ; GFX6-NEXT: s_mov_b32 s2, s4
1789 ; GFX6-NEXT: s_mov_b32 s3, s5
1790 ; GFX6-NEXT: s_mov_b32 s4, s6
1791 ; GFX6-NEXT: s_mov_b32 s5, s7
1792 ; GFX6-NEXT: s_mov_b32 s6, s8
1793 ; GFX6-NEXT: s_mov_b32 s7, s9
1794 ; GFX6-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1795 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1796 ; GFX6-NEXT: ; return to shader part epilog
1798 ; GFX8-LABEL: atomic_add_i64_1d:
1799 ; GFX8: ; %bb.0: ; %main_body
1800 ; GFX8-NEXT: s_mov_b32 s0, s2
1801 ; GFX8-NEXT: s_mov_b32 s1, s3
1802 ; GFX8-NEXT: s_mov_b32 s2, s4
1803 ; GFX8-NEXT: s_mov_b32 s3, s5
1804 ; GFX8-NEXT: s_mov_b32 s4, s6
1805 ; GFX8-NEXT: s_mov_b32 s5, s7
1806 ; GFX8-NEXT: s_mov_b32 s6, s8
1807 ; GFX8-NEXT: s_mov_b32 s7, s9
1808 ; GFX8-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1809 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1810 ; GFX8-NEXT: ; return to shader part epilog
1812 ; GFX900-LABEL: atomic_add_i64_1d:
1813 ; GFX900: ; %bb.0: ; %main_body
1814 ; GFX900-NEXT: s_mov_b32 s0, s2
1815 ; GFX900-NEXT: s_mov_b32 s1, s3
1816 ; GFX900-NEXT: s_mov_b32 s2, s4
1817 ; GFX900-NEXT: s_mov_b32 s3, s5
1818 ; GFX900-NEXT: s_mov_b32 s4, s6
1819 ; GFX900-NEXT: s_mov_b32 s5, s7
1820 ; GFX900-NEXT: s_mov_b32 s6, s8
1821 ; GFX900-NEXT: s_mov_b32 s7, s9
1822 ; GFX900-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1823 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1824 ; GFX900-NEXT: ; return to shader part epilog
1826 ; GFX90A-LABEL: atomic_add_i64_1d:
1827 ; GFX90A: ; %bb.0: ; %main_body
1828 ; GFX90A-NEXT: s_mov_b32 s0, s2
1829 ; GFX90A-NEXT: s_mov_b32 s1, s3
1830 ; GFX90A-NEXT: s_mov_b32 s2, s4
1831 ; GFX90A-NEXT: s_mov_b32 s3, s5
1832 ; GFX90A-NEXT: s_mov_b32 s4, s6
1833 ; GFX90A-NEXT: s_mov_b32 s5, s7
1834 ; GFX90A-NEXT: s_mov_b32 s6, s8
1835 ; GFX90A-NEXT: s_mov_b32 s7, s9
1836 ; GFX90A-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1837 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1838 ; GFX90A-NEXT: ; return to shader part epilog
1840 ; GFX10-LABEL: atomic_add_i64_1d:
1841 ; GFX10: ; %bb.0: ; %main_body
1842 ; GFX10-NEXT: s_mov_b32 s0, s2
1843 ; GFX10-NEXT: s_mov_b32 s1, s3
1844 ; GFX10-NEXT: s_mov_b32 s2, s4
1845 ; GFX10-NEXT: s_mov_b32 s3, s5
1846 ; GFX10-NEXT: s_mov_b32 s4, s6
1847 ; GFX10-NEXT: s_mov_b32 s5, s7
1848 ; GFX10-NEXT: s_mov_b32 s6, s8
1849 ; GFX10-NEXT: s_mov_b32 s7, s9
1850 ; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1851 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1852 ; GFX10-NEXT: ; return to shader part epilog
1854 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1855 %out = bitcast i64 %v to <2 x float>
1856 ret <2 x float> %out
1859 define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1860 ; GFX6-LABEL: atomic_sub_i64_1d:
1861 ; GFX6: ; %bb.0: ; %main_body
1862 ; GFX6-NEXT: s_mov_b32 s0, s2
1863 ; GFX6-NEXT: s_mov_b32 s1, s3
1864 ; GFX6-NEXT: s_mov_b32 s2, s4
1865 ; GFX6-NEXT: s_mov_b32 s3, s5
1866 ; GFX6-NEXT: s_mov_b32 s4, s6
1867 ; GFX6-NEXT: s_mov_b32 s5, s7
1868 ; GFX6-NEXT: s_mov_b32 s6, s8
1869 ; GFX6-NEXT: s_mov_b32 s7, s9
1870 ; GFX6-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1871 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1872 ; GFX6-NEXT: ; return to shader part epilog
1874 ; GFX8-LABEL: atomic_sub_i64_1d:
1875 ; GFX8: ; %bb.0: ; %main_body
1876 ; GFX8-NEXT: s_mov_b32 s0, s2
1877 ; GFX8-NEXT: s_mov_b32 s1, s3
1878 ; GFX8-NEXT: s_mov_b32 s2, s4
1879 ; GFX8-NEXT: s_mov_b32 s3, s5
1880 ; GFX8-NEXT: s_mov_b32 s4, s6
1881 ; GFX8-NEXT: s_mov_b32 s5, s7
1882 ; GFX8-NEXT: s_mov_b32 s6, s8
1883 ; GFX8-NEXT: s_mov_b32 s7, s9
1884 ; GFX8-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1885 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1886 ; GFX8-NEXT: ; return to shader part epilog
1888 ; GFX900-LABEL: atomic_sub_i64_1d:
1889 ; GFX900: ; %bb.0: ; %main_body
1890 ; GFX900-NEXT: s_mov_b32 s0, s2
1891 ; GFX900-NEXT: s_mov_b32 s1, s3
1892 ; GFX900-NEXT: s_mov_b32 s2, s4
1893 ; GFX900-NEXT: s_mov_b32 s3, s5
1894 ; GFX900-NEXT: s_mov_b32 s4, s6
1895 ; GFX900-NEXT: s_mov_b32 s5, s7
1896 ; GFX900-NEXT: s_mov_b32 s6, s8
1897 ; GFX900-NEXT: s_mov_b32 s7, s9
1898 ; GFX900-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1899 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1900 ; GFX900-NEXT: ; return to shader part epilog
1902 ; GFX90A-LABEL: atomic_sub_i64_1d:
1903 ; GFX90A: ; %bb.0: ; %main_body
1904 ; GFX90A-NEXT: s_mov_b32 s0, s2
1905 ; GFX90A-NEXT: s_mov_b32 s1, s3
1906 ; GFX90A-NEXT: s_mov_b32 s2, s4
1907 ; GFX90A-NEXT: s_mov_b32 s3, s5
1908 ; GFX90A-NEXT: s_mov_b32 s4, s6
1909 ; GFX90A-NEXT: s_mov_b32 s5, s7
1910 ; GFX90A-NEXT: s_mov_b32 s6, s8
1911 ; GFX90A-NEXT: s_mov_b32 s7, s9
1912 ; GFX90A-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1913 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1914 ; GFX90A-NEXT: ; return to shader part epilog
1916 ; GFX10-LABEL: atomic_sub_i64_1d:
1917 ; GFX10: ; %bb.0: ; %main_body
1918 ; GFX10-NEXT: s_mov_b32 s0, s2
1919 ; GFX10-NEXT: s_mov_b32 s1, s3
1920 ; GFX10-NEXT: s_mov_b32 s2, s4
1921 ; GFX10-NEXT: s_mov_b32 s3, s5
1922 ; GFX10-NEXT: s_mov_b32 s4, s6
1923 ; GFX10-NEXT: s_mov_b32 s5, s7
1924 ; GFX10-NEXT: s_mov_b32 s6, s8
1925 ; GFX10-NEXT: s_mov_b32 s7, s9
1926 ; GFX10-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1927 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1928 ; GFX10-NEXT: ; return to shader part epilog
1930 %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1931 %out = bitcast i64 %v to <2 x float>
1932 ret <2 x float> %out
1935 define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1936 ; GFX6-LABEL: atomic_smin_i64_1d:
1937 ; GFX6: ; %bb.0: ; %main_body
1938 ; GFX6-NEXT: s_mov_b32 s0, s2
1939 ; GFX6-NEXT: s_mov_b32 s1, s3
1940 ; GFX6-NEXT: s_mov_b32 s2, s4
1941 ; GFX6-NEXT: s_mov_b32 s3, s5
1942 ; GFX6-NEXT: s_mov_b32 s4, s6
1943 ; GFX6-NEXT: s_mov_b32 s5, s7
1944 ; GFX6-NEXT: s_mov_b32 s6, s8
1945 ; GFX6-NEXT: s_mov_b32 s7, s9
1946 ; GFX6-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1947 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1948 ; GFX6-NEXT: ; return to shader part epilog
1950 ; GFX8-LABEL: atomic_smin_i64_1d:
1951 ; GFX8: ; %bb.0: ; %main_body
1952 ; GFX8-NEXT: s_mov_b32 s0, s2
1953 ; GFX8-NEXT: s_mov_b32 s1, s3
1954 ; GFX8-NEXT: s_mov_b32 s2, s4
1955 ; GFX8-NEXT: s_mov_b32 s3, s5
1956 ; GFX8-NEXT: s_mov_b32 s4, s6
1957 ; GFX8-NEXT: s_mov_b32 s5, s7
1958 ; GFX8-NEXT: s_mov_b32 s6, s8
1959 ; GFX8-NEXT: s_mov_b32 s7, s9
1960 ; GFX8-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1961 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1962 ; GFX8-NEXT: ; return to shader part epilog
1964 ; GFX900-LABEL: atomic_smin_i64_1d:
1965 ; GFX900: ; %bb.0: ; %main_body
1966 ; GFX900-NEXT: s_mov_b32 s0, s2
1967 ; GFX900-NEXT: s_mov_b32 s1, s3
1968 ; GFX900-NEXT: s_mov_b32 s2, s4
1969 ; GFX900-NEXT: s_mov_b32 s3, s5
1970 ; GFX900-NEXT: s_mov_b32 s4, s6
1971 ; GFX900-NEXT: s_mov_b32 s5, s7
1972 ; GFX900-NEXT: s_mov_b32 s6, s8
1973 ; GFX900-NEXT: s_mov_b32 s7, s9
1974 ; GFX900-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1975 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1976 ; GFX900-NEXT: ; return to shader part epilog
1978 ; GFX90A-LABEL: atomic_smin_i64_1d:
1979 ; GFX90A: ; %bb.0: ; %main_body
1980 ; GFX90A-NEXT: s_mov_b32 s0, s2
1981 ; GFX90A-NEXT: s_mov_b32 s1, s3
1982 ; GFX90A-NEXT: s_mov_b32 s2, s4
1983 ; GFX90A-NEXT: s_mov_b32 s3, s5
1984 ; GFX90A-NEXT: s_mov_b32 s4, s6
1985 ; GFX90A-NEXT: s_mov_b32 s5, s7
1986 ; GFX90A-NEXT: s_mov_b32 s6, s8
1987 ; GFX90A-NEXT: s_mov_b32 s7, s9
1988 ; GFX90A-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1989 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1990 ; GFX90A-NEXT: ; return to shader part epilog
1992 ; GFX10-LABEL: atomic_smin_i64_1d:
1993 ; GFX10: ; %bb.0: ; %main_body
1994 ; GFX10-NEXT: s_mov_b32 s0, s2
1995 ; GFX10-NEXT: s_mov_b32 s1, s3
1996 ; GFX10-NEXT: s_mov_b32 s2, s4
1997 ; GFX10-NEXT: s_mov_b32 s3, s5
1998 ; GFX10-NEXT: s_mov_b32 s4, s6
1999 ; GFX10-NEXT: s_mov_b32 s5, s7
2000 ; GFX10-NEXT: s_mov_b32 s6, s8
2001 ; GFX10-NEXT: s_mov_b32 s7, s9
2002 ; GFX10-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2003 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2004 ; GFX10-NEXT: ; return to shader part epilog
2006 %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2007 %out = bitcast i64 %v to <2 x float>
2008 ret <2 x float> %out
2011 define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2012 ; GFX6-LABEL: atomic_umin_i64_1d:
2013 ; GFX6: ; %bb.0: ; %main_body
2014 ; GFX6-NEXT: s_mov_b32 s0, s2
2015 ; GFX6-NEXT: s_mov_b32 s1, s3
2016 ; GFX6-NEXT: s_mov_b32 s2, s4
2017 ; GFX6-NEXT: s_mov_b32 s3, s5
2018 ; GFX6-NEXT: s_mov_b32 s4, s6
2019 ; GFX6-NEXT: s_mov_b32 s5, s7
2020 ; GFX6-NEXT: s_mov_b32 s6, s8
2021 ; GFX6-NEXT: s_mov_b32 s7, s9
2022 ; GFX6-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2023 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2024 ; GFX6-NEXT: ; return to shader part epilog
2026 ; GFX8-LABEL: atomic_umin_i64_1d:
2027 ; GFX8: ; %bb.0: ; %main_body
2028 ; GFX8-NEXT: s_mov_b32 s0, s2
2029 ; GFX8-NEXT: s_mov_b32 s1, s3
2030 ; GFX8-NEXT: s_mov_b32 s2, s4
2031 ; GFX8-NEXT: s_mov_b32 s3, s5
2032 ; GFX8-NEXT: s_mov_b32 s4, s6
2033 ; GFX8-NEXT: s_mov_b32 s5, s7
2034 ; GFX8-NEXT: s_mov_b32 s6, s8
2035 ; GFX8-NEXT: s_mov_b32 s7, s9
2036 ; GFX8-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2037 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2038 ; GFX8-NEXT: ; return to shader part epilog
2040 ; GFX900-LABEL: atomic_umin_i64_1d:
2041 ; GFX900: ; %bb.0: ; %main_body
2042 ; GFX900-NEXT: s_mov_b32 s0, s2
2043 ; GFX900-NEXT: s_mov_b32 s1, s3
2044 ; GFX900-NEXT: s_mov_b32 s2, s4
2045 ; GFX900-NEXT: s_mov_b32 s3, s5
2046 ; GFX900-NEXT: s_mov_b32 s4, s6
2047 ; GFX900-NEXT: s_mov_b32 s5, s7
2048 ; GFX900-NEXT: s_mov_b32 s6, s8
2049 ; GFX900-NEXT: s_mov_b32 s7, s9
2050 ; GFX900-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2051 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2052 ; GFX900-NEXT: ; return to shader part epilog
2054 ; GFX90A-LABEL: atomic_umin_i64_1d:
2055 ; GFX90A: ; %bb.0: ; %main_body
2056 ; GFX90A-NEXT: s_mov_b32 s0, s2
2057 ; GFX90A-NEXT: s_mov_b32 s1, s3
2058 ; GFX90A-NEXT: s_mov_b32 s2, s4
2059 ; GFX90A-NEXT: s_mov_b32 s3, s5
2060 ; GFX90A-NEXT: s_mov_b32 s4, s6
2061 ; GFX90A-NEXT: s_mov_b32 s5, s7
2062 ; GFX90A-NEXT: s_mov_b32 s6, s8
2063 ; GFX90A-NEXT: s_mov_b32 s7, s9
2064 ; GFX90A-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2065 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2066 ; GFX90A-NEXT: ; return to shader part epilog
2068 ; GFX10-LABEL: atomic_umin_i64_1d:
2069 ; GFX10: ; %bb.0: ; %main_body
2070 ; GFX10-NEXT: s_mov_b32 s0, s2
2071 ; GFX10-NEXT: s_mov_b32 s1, s3
2072 ; GFX10-NEXT: s_mov_b32 s2, s4
2073 ; GFX10-NEXT: s_mov_b32 s3, s5
2074 ; GFX10-NEXT: s_mov_b32 s4, s6
2075 ; GFX10-NEXT: s_mov_b32 s5, s7
2076 ; GFX10-NEXT: s_mov_b32 s6, s8
2077 ; GFX10-NEXT: s_mov_b32 s7, s9
2078 ; GFX10-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2079 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2080 ; GFX10-NEXT: ; return to shader part epilog
2082 %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2083 %out = bitcast i64 %v to <2 x float>
2084 ret <2 x float> %out
2087 define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2088 ; GFX6-LABEL: atomic_smax_i64_1d:
2089 ; GFX6: ; %bb.0: ; %main_body
2090 ; GFX6-NEXT: s_mov_b32 s0, s2
2091 ; GFX6-NEXT: s_mov_b32 s1, s3
2092 ; GFX6-NEXT: s_mov_b32 s2, s4
2093 ; GFX6-NEXT: s_mov_b32 s3, s5
2094 ; GFX6-NEXT: s_mov_b32 s4, s6
2095 ; GFX6-NEXT: s_mov_b32 s5, s7
2096 ; GFX6-NEXT: s_mov_b32 s6, s8
2097 ; GFX6-NEXT: s_mov_b32 s7, s9
2098 ; GFX6-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2099 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2100 ; GFX6-NEXT: ; return to shader part epilog
2102 ; GFX8-LABEL: atomic_smax_i64_1d:
2103 ; GFX8: ; %bb.0: ; %main_body
2104 ; GFX8-NEXT: s_mov_b32 s0, s2
2105 ; GFX8-NEXT: s_mov_b32 s1, s3
2106 ; GFX8-NEXT: s_mov_b32 s2, s4
2107 ; GFX8-NEXT: s_mov_b32 s3, s5
2108 ; GFX8-NEXT: s_mov_b32 s4, s6
2109 ; GFX8-NEXT: s_mov_b32 s5, s7
2110 ; GFX8-NEXT: s_mov_b32 s6, s8
2111 ; GFX8-NEXT: s_mov_b32 s7, s9
2112 ; GFX8-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2113 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2114 ; GFX8-NEXT: ; return to shader part epilog
2116 ; GFX900-LABEL: atomic_smax_i64_1d:
2117 ; GFX900: ; %bb.0: ; %main_body
2118 ; GFX900-NEXT: s_mov_b32 s0, s2
2119 ; GFX900-NEXT: s_mov_b32 s1, s3
2120 ; GFX900-NEXT: s_mov_b32 s2, s4
2121 ; GFX900-NEXT: s_mov_b32 s3, s5
2122 ; GFX900-NEXT: s_mov_b32 s4, s6
2123 ; GFX900-NEXT: s_mov_b32 s5, s7
2124 ; GFX900-NEXT: s_mov_b32 s6, s8
2125 ; GFX900-NEXT: s_mov_b32 s7, s9
2126 ; GFX900-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2127 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2128 ; GFX900-NEXT: ; return to shader part epilog
2130 ; GFX90A-LABEL: atomic_smax_i64_1d:
2131 ; GFX90A: ; %bb.0: ; %main_body
2132 ; GFX90A-NEXT: s_mov_b32 s0, s2
2133 ; GFX90A-NEXT: s_mov_b32 s1, s3
2134 ; GFX90A-NEXT: s_mov_b32 s2, s4
2135 ; GFX90A-NEXT: s_mov_b32 s3, s5
2136 ; GFX90A-NEXT: s_mov_b32 s4, s6
2137 ; GFX90A-NEXT: s_mov_b32 s5, s7
2138 ; GFX90A-NEXT: s_mov_b32 s6, s8
2139 ; GFX90A-NEXT: s_mov_b32 s7, s9
2140 ; GFX90A-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2141 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2142 ; GFX90A-NEXT: ; return to shader part epilog
2144 ; GFX10-LABEL: atomic_smax_i64_1d:
2145 ; GFX10: ; %bb.0: ; %main_body
2146 ; GFX10-NEXT: s_mov_b32 s0, s2
2147 ; GFX10-NEXT: s_mov_b32 s1, s3
2148 ; GFX10-NEXT: s_mov_b32 s2, s4
2149 ; GFX10-NEXT: s_mov_b32 s3, s5
2150 ; GFX10-NEXT: s_mov_b32 s4, s6
2151 ; GFX10-NEXT: s_mov_b32 s5, s7
2152 ; GFX10-NEXT: s_mov_b32 s6, s8
2153 ; GFX10-NEXT: s_mov_b32 s7, s9
2154 ; GFX10-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2155 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2156 ; GFX10-NEXT: ; return to shader part epilog
2158 %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2159 %out = bitcast i64 %v to <2 x float>
2160 ret <2 x float> %out
2163 define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2164 ; GFX6-LABEL: atomic_umax_i64_1d:
2165 ; GFX6: ; %bb.0: ; %main_body
2166 ; GFX6-NEXT: s_mov_b32 s0, s2
2167 ; GFX6-NEXT: s_mov_b32 s1, s3
2168 ; GFX6-NEXT: s_mov_b32 s2, s4
2169 ; GFX6-NEXT: s_mov_b32 s3, s5
2170 ; GFX6-NEXT: s_mov_b32 s4, s6
2171 ; GFX6-NEXT: s_mov_b32 s5, s7
2172 ; GFX6-NEXT: s_mov_b32 s6, s8
2173 ; GFX6-NEXT: s_mov_b32 s7, s9
2174 ; GFX6-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2175 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2176 ; GFX6-NEXT: ; return to shader part epilog
2178 ; GFX8-LABEL: atomic_umax_i64_1d:
2179 ; GFX8: ; %bb.0: ; %main_body
2180 ; GFX8-NEXT: s_mov_b32 s0, s2
2181 ; GFX8-NEXT: s_mov_b32 s1, s3
2182 ; GFX8-NEXT: s_mov_b32 s2, s4
2183 ; GFX8-NEXT: s_mov_b32 s3, s5
2184 ; GFX8-NEXT: s_mov_b32 s4, s6
2185 ; GFX8-NEXT: s_mov_b32 s5, s7
2186 ; GFX8-NEXT: s_mov_b32 s6, s8
2187 ; GFX8-NEXT: s_mov_b32 s7, s9
2188 ; GFX8-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2189 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2190 ; GFX8-NEXT: ; return to shader part epilog
2192 ; GFX900-LABEL: atomic_umax_i64_1d:
2193 ; GFX900: ; %bb.0: ; %main_body
2194 ; GFX900-NEXT: s_mov_b32 s0, s2
2195 ; GFX900-NEXT: s_mov_b32 s1, s3
2196 ; GFX900-NEXT: s_mov_b32 s2, s4
2197 ; GFX900-NEXT: s_mov_b32 s3, s5
2198 ; GFX900-NEXT: s_mov_b32 s4, s6
2199 ; GFX900-NEXT: s_mov_b32 s5, s7
2200 ; GFX900-NEXT: s_mov_b32 s6, s8
2201 ; GFX900-NEXT: s_mov_b32 s7, s9
2202 ; GFX900-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2203 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2204 ; GFX900-NEXT: ; return to shader part epilog
2206 ; GFX90A-LABEL: atomic_umax_i64_1d:
2207 ; GFX90A: ; %bb.0: ; %main_body
2208 ; GFX90A-NEXT: s_mov_b32 s0, s2
2209 ; GFX90A-NEXT: s_mov_b32 s1, s3
2210 ; GFX90A-NEXT: s_mov_b32 s2, s4
2211 ; GFX90A-NEXT: s_mov_b32 s3, s5
2212 ; GFX90A-NEXT: s_mov_b32 s4, s6
2213 ; GFX90A-NEXT: s_mov_b32 s5, s7
2214 ; GFX90A-NEXT: s_mov_b32 s6, s8
2215 ; GFX90A-NEXT: s_mov_b32 s7, s9
2216 ; GFX90A-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2217 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2218 ; GFX90A-NEXT: ; return to shader part epilog
2220 ; GFX10-LABEL: atomic_umax_i64_1d:
2221 ; GFX10: ; %bb.0: ; %main_body
2222 ; GFX10-NEXT: s_mov_b32 s0, s2
2223 ; GFX10-NEXT: s_mov_b32 s1, s3
2224 ; GFX10-NEXT: s_mov_b32 s2, s4
2225 ; GFX10-NEXT: s_mov_b32 s3, s5
2226 ; GFX10-NEXT: s_mov_b32 s4, s6
2227 ; GFX10-NEXT: s_mov_b32 s5, s7
2228 ; GFX10-NEXT: s_mov_b32 s6, s8
2229 ; GFX10-NEXT: s_mov_b32 s7, s9
2230 ; GFX10-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2231 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2232 ; GFX10-NEXT: ; return to shader part epilog
2234 %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2235 %out = bitcast i64 %v to <2 x float>
2236 ret <2 x float> %out
2239 define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2240 ; GFX6-LABEL: atomic_and_i64_1d:
2241 ; GFX6: ; %bb.0: ; %main_body
2242 ; GFX6-NEXT: s_mov_b32 s0, s2
2243 ; GFX6-NEXT: s_mov_b32 s1, s3
2244 ; GFX6-NEXT: s_mov_b32 s2, s4
2245 ; GFX6-NEXT: s_mov_b32 s3, s5
2246 ; GFX6-NEXT: s_mov_b32 s4, s6
2247 ; GFX6-NEXT: s_mov_b32 s5, s7
2248 ; GFX6-NEXT: s_mov_b32 s6, s8
2249 ; GFX6-NEXT: s_mov_b32 s7, s9
2250 ; GFX6-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2251 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2252 ; GFX6-NEXT: ; return to shader part epilog
2254 ; GFX8-LABEL: atomic_and_i64_1d:
2255 ; GFX8: ; %bb.0: ; %main_body
2256 ; GFX8-NEXT: s_mov_b32 s0, s2
2257 ; GFX8-NEXT: s_mov_b32 s1, s3
2258 ; GFX8-NEXT: s_mov_b32 s2, s4
2259 ; GFX8-NEXT: s_mov_b32 s3, s5
2260 ; GFX8-NEXT: s_mov_b32 s4, s6
2261 ; GFX8-NEXT: s_mov_b32 s5, s7
2262 ; GFX8-NEXT: s_mov_b32 s6, s8
2263 ; GFX8-NEXT: s_mov_b32 s7, s9
2264 ; GFX8-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2265 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2266 ; GFX8-NEXT: ; return to shader part epilog
2268 ; GFX900-LABEL: atomic_and_i64_1d:
2269 ; GFX900: ; %bb.0: ; %main_body
2270 ; GFX900-NEXT: s_mov_b32 s0, s2
2271 ; GFX900-NEXT: s_mov_b32 s1, s3
2272 ; GFX900-NEXT: s_mov_b32 s2, s4
2273 ; GFX900-NEXT: s_mov_b32 s3, s5
2274 ; GFX900-NEXT: s_mov_b32 s4, s6
2275 ; GFX900-NEXT: s_mov_b32 s5, s7
2276 ; GFX900-NEXT: s_mov_b32 s6, s8
2277 ; GFX900-NEXT: s_mov_b32 s7, s9
2278 ; GFX900-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2279 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2280 ; GFX900-NEXT: ; return to shader part epilog
2282 ; GFX90A-LABEL: atomic_and_i64_1d:
2283 ; GFX90A: ; %bb.0: ; %main_body
2284 ; GFX90A-NEXT: s_mov_b32 s0, s2
2285 ; GFX90A-NEXT: s_mov_b32 s1, s3
2286 ; GFX90A-NEXT: s_mov_b32 s2, s4
2287 ; GFX90A-NEXT: s_mov_b32 s3, s5
2288 ; GFX90A-NEXT: s_mov_b32 s4, s6
2289 ; GFX90A-NEXT: s_mov_b32 s5, s7
2290 ; GFX90A-NEXT: s_mov_b32 s6, s8
2291 ; GFX90A-NEXT: s_mov_b32 s7, s9
2292 ; GFX90A-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2293 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2294 ; GFX90A-NEXT: ; return to shader part epilog
2296 ; GFX10-LABEL: atomic_and_i64_1d:
2297 ; GFX10: ; %bb.0: ; %main_body
2298 ; GFX10-NEXT: s_mov_b32 s0, s2
2299 ; GFX10-NEXT: s_mov_b32 s1, s3
2300 ; GFX10-NEXT: s_mov_b32 s2, s4
2301 ; GFX10-NEXT: s_mov_b32 s3, s5
2302 ; GFX10-NEXT: s_mov_b32 s4, s6
2303 ; GFX10-NEXT: s_mov_b32 s5, s7
2304 ; GFX10-NEXT: s_mov_b32 s6, s8
2305 ; GFX10-NEXT: s_mov_b32 s7, s9
2306 ; GFX10-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2307 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2308 ; GFX10-NEXT: ; return to shader part epilog
2310 %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2311 %out = bitcast i64 %v to <2 x float>
2312 ret <2 x float> %out
2315 define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2316 ; GFX6-LABEL: atomic_or_i64_1d:
2317 ; GFX6: ; %bb.0: ; %main_body
2318 ; GFX6-NEXT: s_mov_b32 s0, s2
2319 ; GFX6-NEXT: s_mov_b32 s1, s3
2320 ; GFX6-NEXT: s_mov_b32 s2, s4
2321 ; GFX6-NEXT: s_mov_b32 s3, s5
2322 ; GFX6-NEXT: s_mov_b32 s4, s6
2323 ; GFX6-NEXT: s_mov_b32 s5, s7
2324 ; GFX6-NEXT: s_mov_b32 s6, s8
2325 ; GFX6-NEXT: s_mov_b32 s7, s9
2326 ; GFX6-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2327 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2328 ; GFX6-NEXT: ; return to shader part epilog
2330 ; GFX8-LABEL: atomic_or_i64_1d:
2331 ; GFX8: ; %bb.0: ; %main_body
2332 ; GFX8-NEXT: s_mov_b32 s0, s2
2333 ; GFX8-NEXT: s_mov_b32 s1, s3
2334 ; GFX8-NEXT: s_mov_b32 s2, s4
2335 ; GFX8-NEXT: s_mov_b32 s3, s5
2336 ; GFX8-NEXT: s_mov_b32 s4, s6
2337 ; GFX8-NEXT: s_mov_b32 s5, s7
2338 ; GFX8-NEXT: s_mov_b32 s6, s8
2339 ; GFX8-NEXT: s_mov_b32 s7, s9
2340 ; GFX8-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2341 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2342 ; GFX8-NEXT: ; return to shader part epilog
2344 ; GFX900-LABEL: atomic_or_i64_1d:
2345 ; GFX900: ; %bb.0: ; %main_body
2346 ; GFX900-NEXT: s_mov_b32 s0, s2
2347 ; GFX900-NEXT: s_mov_b32 s1, s3
2348 ; GFX900-NEXT: s_mov_b32 s2, s4
2349 ; GFX900-NEXT: s_mov_b32 s3, s5
2350 ; GFX900-NEXT: s_mov_b32 s4, s6
2351 ; GFX900-NEXT: s_mov_b32 s5, s7
2352 ; GFX900-NEXT: s_mov_b32 s6, s8
2353 ; GFX900-NEXT: s_mov_b32 s7, s9
2354 ; GFX900-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2355 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2356 ; GFX900-NEXT: ; return to shader part epilog
2358 ; GFX90A-LABEL: atomic_or_i64_1d:
2359 ; GFX90A: ; %bb.0: ; %main_body
2360 ; GFX90A-NEXT: s_mov_b32 s0, s2
2361 ; GFX90A-NEXT: s_mov_b32 s1, s3
2362 ; GFX90A-NEXT: s_mov_b32 s2, s4
2363 ; GFX90A-NEXT: s_mov_b32 s3, s5
2364 ; GFX90A-NEXT: s_mov_b32 s4, s6
2365 ; GFX90A-NEXT: s_mov_b32 s5, s7
2366 ; GFX90A-NEXT: s_mov_b32 s6, s8
2367 ; GFX90A-NEXT: s_mov_b32 s7, s9
2368 ; GFX90A-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2369 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2370 ; GFX90A-NEXT: ; return to shader part epilog
2372 ; GFX10-LABEL: atomic_or_i64_1d:
2373 ; GFX10: ; %bb.0: ; %main_body
2374 ; GFX10-NEXT: s_mov_b32 s0, s2
2375 ; GFX10-NEXT: s_mov_b32 s1, s3
2376 ; GFX10-NEXT: s_mov_b32 s2, s4
2377 ; GFX10-NEXT: s_mov_b32 s3, s5
2378 ; GFX10-NEXT: s_mov_b32 s4, s6
2379 ; GFX10-NEXT: s_mov_b32 s5, s7
2380 ; GFX10-NEXT: s_mov_b32 s6, s8
2381 ; GFX10-NEXT: s_mov_b32 s7, s9
2382 ; GFX10-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2383 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2384 ; GFX10-NEXT: ; return to shader part epilog
2386 %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2387 %out = bitcast i64 %v to <2 x float>
2388 ret <2 x float> %out
2391 define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2392 ; GFX6-LABEL: atomic_xor_i64_1d:
2393 ; GFX6: ; %bb.0: ; %main_body
2394 ; GFX6-NEXT: s_mov_b32 s0, s2
2395 ; GFX6-NEXT: s_mov_b32 s1, s3
2396 ; GFX6-NEXT: s_mov_b32 s2, s4
2397 ; GFX6-NEXT: s_mov_b32 s3, s5
2398 ; GFX6-NEXT: s_mov_b32 s4, s6
2399 ; GFX6-NEXT: s_mov_b32 s5, s7
2400 ; GFX6-NEXT: s_mov_b32 s6, s8
2401 ; GFX6-NEXT: s_mov_b32 s7, s9
2402 ; GFX6-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2403 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2404 ; GFX6-NEXT: ; return to shader part epilog
2406 ; GFX8-LABEL: atomic_xor_i64_1d:
2407 ; GFX8: ; %bb.0: ; %main_body
2408 ; GFX8-NEXT: s_mov_b32 s0, s2
2409 ; GFX8-NEXT: s_mov_b32 s1, s3
2410 ; GFX8-NEXT: s_mov_b32 s2, s4
2411 ; GFX8-NEXT: s_mov_b32 s3, s5
2412 ; GFX8-NEXT: s_mov_b32 s4, s6
2413 ; GFX8-NEXT: s_mov_b32 s5, s7
2414 ; GFX8-NEXT: s_mov_b32 s6, s8
2415 ; GFX8-NEXT: s_mov_b32 s7, s9
2416 ; GFX8-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2417 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2418 ; GFX8-NEXT: ; return to shader part epilog
2420 ; GFX900-LABEL: atomic_xor_i64_1d:
2421 ; GFX900: ; %bb.0: ; %main_body
2422 ; GFX900-NEXT: s_mov_b32 s0, s2
2423 ; GFX900-NEXT: s_mov_b32 s1, s3
2424 ; GFX900-NEXT: s_mov_b32 s2, s4
2425 ; GFX900-NEXT: s_mov_b32 s3, s5
2426 ; GFX900-NEXT: s_mov_b32 s4, s6
2427 ; GFX900-NEXT: s_mov_b32 s5, s7
2428 ; GFX900-NEXT: s_mov_b32 s6, s8
2429 ; GFX900-NEXT: s_mov_b32 s7, s9
2430 ; GFX900-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2431 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2432 ; GFX900-NEXT: ; return to shader part epilog
2434 ; GFX90A-LABEL: atomic_xor_i64_1d:
2435 ; GFX90A: ; %bb.0: ; %main_body
2436 ; GFX90A-NEXT: s_mov_b32 s0, s2
2437 ; GFX90A-NEXT: s_mov_b32 s1, s3
2438 ; GFX90A-NEXT: s_mov_b32 s2, s4
2439 ; GFX90A-NEXT: s_mov_b32 s3, s5
2440 ; GFX90A-NEXT: s_mov_b32 s4, s6
2441 ; GFX90A-NEXT: s_mov_b32 s5, s7
2442 ; GFX90A-NEXT: s_mov_b32 s6, s8
2443 ; GFX90A-NEXT: s_mov_b32 s7, s9
2444 ; GFX90A-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2445 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2446 ; GFX90A-NEXT: ; return to shader part epilog
2448 ; GFX10-LABEL: atomic_xor_i64_1d:
2449 ; GFX10: ; %bb.0: ; %main_body
2450 ; GFX10-NEXT: s_mov_b32 s0, s2
2451 ; GFX10-NEXT: s_mov_b32 s1, s3
2452 ; GFX10-NEXT: s_mov_b32 s2, s4
2453 ; GFX10-NEXT: s_mov_b32 s3, s5
2454 ; GFX10-NEXT: s_mov_b32 s4, s6
2455 ; GFX10-NEXT: s_mov_b32 s5, s7
2456 ; GFX10-NEXT: s_mov_b32 s6, s8
2457 ; GFX10-NEXT: s_mov_b32 s7, s9
2458 ; GFX10-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2459 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2460 ; GFX10-NEXT: ; return to shader part epilog
2462 %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2463 %out = bitcast i64 %v to <2 x float>
2464 ret <2 x float> %out
2467 define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2468 ; GFX6-LABEL: atomic_inc_i64_1d:
2469 ; GFX6: ; %bb.0: ; %main_body
2470 ; GFX6-NEXT: s_mov_b32 s0, s2
2471 ; GFX6-NEXT: s_mov_b32 s1, s3
2472 ; GFX6-NEXT: s_mov_b32 s2, s4
2473 ; GFX6-NEXT: s_mov_b32 s3, s5
2474 ; GFX6-NEXT: s_mov_b32 s4, s6
2475 ; GFX6-NEXT: s_mov_b32 s5, s7
2476 ; GFX6-NEXT: s_mov_b32 s6, s8
2477 ; GFX6-NEXT: s_mov_b32 s7, s9
2478 ; GFX6-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2479 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2480 ; GFX6-NEXT: ; return to shader part epilog
2482 ; GFX8-LABEL: atomic_inc_i64_1d:
2483 ; GFX8: ; %bb.0: ; %main_body
2484 ; GFX8-NEXT: s_mov_b32 s0, s2
2485 ; GFX8-NEXT: s_mov_b32 s1, s3
2486 ; GFX8-NEXT: s_mov_b32 s2, s4
2487 ; GFX8-NEXT: s_mov_b32 s3, s5
2488 ; GFX8-NEXT: s_mov_b32 s4, s6
2489 ; GFX8-NEXT: s_mov_b32 s5, s7
2490 ; GFX8-NEXT: s_mov_b32 s6, s8
2491 ; GFX8-NEXT: s_mov_b32 s7, s9
2492 ; GFX8-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2493 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2494 ; GFX8-NEXT: ; return to shader part epilog
2496 ; GFX900-LABEL: atomic_inc_i64_1d:
2497 ; GFX900: ; %bb.0: ; %main_body
2498 ; GFX900-NEXT: s_mov_b32 s0, s2
2499 ; GFX900-NEXT: s_mov_b32 s1, s3
2500 ; GFX900-NEXT: s_mov_b32 s2, s4
2501 ; GFX900-NEXT: s_mov_b32 s3, s5
2502 ; GFX900-NEXT: s_mov_b32 s4, s6
2503 ; GFX900-NEXT: s_mov_b32 s5, s7
2504 ; GFX900-NEXT: s_mov_b32 s6, s8
2505 ; GFX900-NEXT: s_mov_b32 s7, s9
2506 ; GFX900-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2507 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2508 ; GFX900-NEXT: ; return to shader part epilog
2510 ; GFX90A-LABEL: atomic_inc_i64_1d:
2511 ; GFX90A: ; %bb.0: ; %main_body
2512 ; GFX90A-NEXT: s_mov_b32 s0, s2
2513 ; GFX90A-NEXT: s_mov_b32 s1, s3
2514 ; GFX90A-NEXT: s_mov_b32 s2, s4
2515 ; GFX90A-NEXT: s_mov_b32 s3, s5
2516 ; GFX90A-NEXT: s_mov_b32 s4, s6
2517 ; GFX90A-NEXT: s_mov_b32 s5, s7
2518 ; GFX90A-NEXT: s_mov_b32 s6, s8
2519 ; GFX90A-NEXT: s_mov_b32 s7, s9
2520 ; GFX90A-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2521 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2522 ; GFX90A-NEXT: ; return to shader part epilog
2524 ; GFX10-LABEL: atomic_inc_i64_1d:
2525 ; GFX10: ; %bb.0: ; %main_body
2526 ; GFX10-NEXT: s_mov_b32 s0, s2
2527 ; GFX10-NEXT: s_mov_b32 s1, s3
2528 ; GFX10-NEXT: s_mov_b32 s2, s4
2529 ; GFX10-NEXT: s_mov_b32 s3, s5
2530 ; GFX10-NEXT: s_mov_b32 s4, s6
2531 ; GFX10-NEXT: s_mov_b32 s5, s7
2532 ; GFX10-NEXT: s_mov_b32 s6, s8
2533 ; GFX10-NEXT: s_mov_b32 s7, s9
2534 ; GFX10-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2535 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2536 ; GFX10-NEXT: ; return to shader part epilog
2538 %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2539 %out = bitcast i64 %v to <2 x float>
2540 ret <2 x float> %out
2543 define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2544 ; GFX6-LABEL: atomic_dec_i64_1d:
2545 ; GFX6: ; %bb.0: ; %main_body
2546 ; GFX6-NEXT: s_mov_b32 s0, s2
2547 ; GFX6-NEXT: s_mov_b32 s1, s3
2548 ; GFX6-NEXT: s_mov_b32 s2, s4
2549 ; GFX6-NEXT: s_mov_b32 s3, s5
2550 ; GFX6-NEXT: s_mov_b32 s4, s6
2551 ; GFX6-NEXT: s_mov_b32 s5, s7
2552 ; GFX6-NEXT: s_mov_b32 s6, s8
2553 ; GFX6-NEXT: s_mov_b32 s7, s9
2554 ; GFX6-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2555 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2556 ; GFX6-NEXT: ; return to shader part epilog
2558 ; GFX8-LABEL: atomic_dec_i64_1d:
2559 ; GFX8: ; %bb.0: ; %main_body
2560 ; GFX8-NEXT: s_mov_b32 s0, s2
2561 ; GFX8-NEXT: s_mov_b32 s1, s3
2562 ; GFX8-NEXT: s_mov_b32 s2, s4
2563 ; GFX8-NEXT: s_mov_b32 s3, s5
2564 ; GFX8-NEXT: s_mov_b32 s4, s6
2565 ; GFX8-NEXT: s_mov_b32 s5, s7
2566 ; GFX8-NEXT: s_mov_b32 s6, s8
2567 ; GFX8-NEXT: s_mov_b32 s7, s9
2568 ; GFX8-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2569 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2570 ; GFX8-NEXT: ; return to shader part epilog
2572 ; GFX900-LABEL: atomic_dec_i64_1d:
2573 ; GFX900: ; %bb.0: ; %main_body
2574 ; GFX900-NEXT: s_mov_b32 s0, s2
2575 ; GFX900-NEXT: s_mov_b32 s1, s3
2576 ; GFX900-NEXT: s_mov_b32 s2, s4
2577 ; GFX900-NEXT: s_mov_b32 s3, s5
2578 ; GFX900-NEXT: s_mov_b32 s4, s6
2579 ; GFX900-NEXT: s_mov_b32 s5, s7
2580 ; GFX900-NEXT: s_mov_b32 s6, s8
2581 ; GFX900-NEXT: s_mov_b32 s7, s9
2582 ; GFX900-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2583 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2584 ; GFX900-NEXT: ; return to shader part epilog
2586 ; GFX90A-LABEL: atomic_dec_i64_1d:
2587 ; GFX90A: ; %bb.0: ; %main_body
2588 ; GFX90A-NEXT: s_mov_b32 s0, s2
2589 ; GFX90A-NEXT: s_mov_b32 s1, s3
2590 ; GFX90A-NEXT: s_mov_b32 s2, s4
2591 ; GFX90A-NEXT: s_mov_b32 s3, s5
2592 ; GFX90A-NEXT: s_mov_b32 s4, s6
2593 ; GFX90A-NEXT: s_mov_b32 s5, s7
2594 ; GFX90A-NEXT: s_mov_b32 s6, s8
2595 ; GFX90A-NEXT: s_mov_b32 s7, s9
2596 ; GFX90A-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2597 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2598 ; GFX90A-NEXT: ; return to shader part epilog
2600 ; GFX10-LABEL: atomic_dec_i64_1d:
2601 ; GFX10: ; %bb.0: ; %main_body
2602 ; GFX10-NEXT: s_mov_b32 s0, s2
2603 ; GFX10-NEXT: s_mov_b32 s1, s3
2604 ; GFX10-NEXT: s_mov_b32 s2, s4
2605 ; GFX10-NEXT: s_mov_b32 s3, s5
2606 ; GFX10-NEXT: s_mov_b32 s4, s6
2607 ; GFX10-NEXT: s_mov_b32 s5, s7
2608 ; GFX10-NEXT: s_mov_b32 s6, s8
2609 ; GFX10-NEXT: s_mov_b32 s7, s9
2610 ; GFX10-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2611 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2612 ; GFX10-NEXT: ; return to shader part epilog
2614 %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2615 %out = bitcast i64 %v to <2 x float>
2616 ret <2 x float> %out
2619 define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
2620 ; GFX6-LABEL: atomic_cmpswap_i64_1d:
2621 ; GFX6: ; %bb.0: ; %main_body
2622 ; GFX6-NEXT: s_mov_b32 s0, s2
2623 ; GFX6-NEXT: s_mov_b32 s1, s3
2624 ; GFX6-NEXT: s_mov_b32 s2, s4
2625 ; GFX6-NEXT: s_mov_b32 s3, s5
2626 ; GFX6-NEXT: s_mov_b32 s4, s6
2627 ; GFX6-NEXT: s_mov_b32 s5, s7
2628 ; GFX6-NEXT: s_mov_b32 s6, s8
2629 ; GFX6-NEXT: s_mov_b32 s7, s9
2630 ; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2631 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2632 ; GFX6-NEXT: ; return to shader part epilog
2634 ; GFX8-LABEL: atomic_cmpswap_i64_1d:
2635 ; GFX8: ; %bb.0: ; %main_body
2636 ; GFX8-NEXT: s_mov_b32 s0, s2
2637 ; GFX8-NEXT: s_mov_b32 s1, s3
2638 ; GFX8-NEXT: s_mov_b32 s2, s4
2639 ; GFX8-NEXT: s_mov_b32 s3, s5
2640 ; GFX8-NEXT: s_mov_b32 s4, s6
2641 ; GFX8-NEXT: s_mov_b32 s5, s7
2642 ; GFX8-NEXT: s_mov_b32 s6, s8
2643 ; GFX8-NEXT: s_mov_b32 s7, s9
2644 ; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2645 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2646 ; GFX8-NEXT: ; return to shader part epilog
2648 ; GFX900-LABEL: atomic_cmpswap_i64_1d:
2649 ; GFX900: ; %bb.0: ; %main_body
2650 ; GFX900-NEXT: s_mov_b32 s0, s2
2651 ; GFX900-NEXT: s_mov_b32 s1, s3
2652 ; GFX900-NEXT: s_mov_b32 s2, s4
2653 ; GFX900-NEXT: s_mov_b32 s3, s5
2654 ; GFX900-NEXT: s_mov_b32 s4, s6
2655 ; GFX900-NEXT: s_mov_b32 s5, s7
2656 ; GFX900-NEXT: s_mov_b32 s6, s8
2657 ; GFX900-NEXT: s_mov_b32 s7, s9
2658 ; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2659 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2660 ; GFX900-NEXT: ; return to shader part epilog
2662 ; GFX90A-LABEL: atomic_cmpswap_i64_1d:
2663 ; GFX90A: ; %bb.0: ; %main_body
2664 ; GFX90A-NEXT: s_mov_b32 s0, s2
2665 ; GFX90A-NEXT: s_mov_b32 s1, s3
2666 ; GFX90A-NEXT: s_mov_b32 s2, s4
2667 ; GFX90A-NEXT: s_mov_b32 s3, s5
2668 ; GFX90A-NEXT: s_mov_b32 s4, s6
2669 ; GFX90A-NEXT: s_mov_b32 s5, s7
2670 ; GFX90A-NEXT: s_mov_b32 s6, s8
2671 ; GFX90A-NEXT: s_mov_b32 s7, s9
2672 ; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2673 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2674 ; GFX90A-NEXT: ; return to shader part epilog
2676 ; GFX10-LABEL: atomic_cmpswap_i64_1d:
2677 ; GFX10: ; %bb.0: ; %main_body
2678 ; GFX10-NEXT: s_mov_b32 s0, s2
2679 ; GFX10-NEXT: s_mov_b32 s1, s3
2680 ; GFX10-NEXT: s_mov_b32 s2, s4
2681 ; GFX10-NEXT: s_mov_b32 s3, s5
2682 ; GFX10-NEXT: s_mov_b32 s4, s6
2683 ; GFX10-NEXT: s_mov_b32 s5, s7
2684 ; GFX10-NEXT: s_mov_b32 s6, s8
2685 ; GFX10-NEXT: s_mov_b32 s7, s9
2686 ; GFX10-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
2687 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2688 ; GFX10-NEXT: ; return to shader part epilog
2690 %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2691 %out = bitcast i64 %v to <2 x float>
2692 ret <2 x float> %out
2695 define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
2696 ; GFX6-LABEL: atomic_cmpswap_i64_1d_no_return:
2697 ; GFX6: ; %bb.0: ; %main_body
2698 ; GFX6-NEXT: s_mov_b32 s0, s2
2699 ; GFX6-NEXT: s_mov_b32 s1, s3
2700 ; GFX6-NEXT: s_mov_b32 s2, s4
2701 ; GFX6-NEXT: s_mov_b32 s3, s5
2702 ; GFX6-NEXT: s_mov_b32 s4, s6
2703 ; GFX6-NEXT: s_mov_b32 s5, s7
2704 ; GFX6-NEXT: s_mov_b32 s6, s8
2705 ; GFX6-NEXT: s_mov_b32 s7, s9
2706 ; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2707 ; GFX6-NEXT: s_endpgm
2709 ; GFX8-LABEL: atomic_cmpswap_i64_1d_no_return:
2710 ; GFX8: ; %bb.0: ; %main_body
2711 ; GFX8-NEXT: s_mov_b32 s0, s2
2712 ; GFX8-NEXT: s_mov_b32 s1, s3
2713 ; GFX8-NEXT: s_mov_b32 s2, s4
2714 ; GFX8-NEXT: s_mov_b32 s3, s5
2715 ; GFX8-NEXT: s_mov_b32 s4, s6
2716 ; GFX8-NEXT: s_mov_b32 s5, s7
2717 ; GFX8-NEXT: s_mov_b32 s6, s8
2718 ; GFX8-NEXT: s_mov_b32 s7, s9
2719 ; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2720 ; GFX8-NEXT: s_endpgm
2722 ; GFX900-LABEL: atomic_cmpswap_i64_1d_no_return:
2723 ; GFX900: ; %bb.0: ; %main_body
2724 ; GFX900-NEXT: s_mov_b32 s0, s2
2725 ; GFX900-NEXT: s_mov_b32 s1, s3
2726 ; GFX900-NEXT: s_mov_b32 s2, s4
2727 ; GFX900-NEXT: s_mov_b32 s3, s5
2728 ; GFX900-NEXT: s_mov_b32 s4, s6
2729 ; GFX900-NEXT: s_mov_b32 s5, s7
2730 ; GFX900-NEXT: s_mov_b32 s6, s8
2731 ; GFX900-NEXT: s_mov_b32 s7, s9
2732 ; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2733 ; GFX900-NEXT: s_endpgm
2735 ; GFX90A-LABEL: atomic_cmpswap_i64_1d_no_return:
2736 ; GFX90A: ; %bb.0: ; %main_body
2737 ; GFX90A-NEXT: s_mov_b32 s0, s2
2738 ; GFX90A-NEXT: s_mov_b32 s1, s3
2739 ; GFX90A-NEXT: s_mov_b32 s2, s4
2740 ; GFX90A-NEXT: s_mov_b32 s3, s5
2741 ; GFX90A-NEXT: s_mov_b32 s4, s6
2742 ; GFX90A-NEXT: s_mov_b32 s5, s7
2743 ; GFX90A-NEXT: s_mov_b32 s6, s8
2744 ; GFX90A-NEXT: s_mov_b32 s7, s9
2745 ; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2746 ; GFX90A-NEXT: s_endpgm
2748 ; GFX10-LABEL: atomic_cmpswap_i64_1d_no_return:
2749 ; GFX10: ; %bb.0: ; %main_body
2750 ; GFX10-NEXT: s_mov_b32 s0, s2
2751 ; GFX10-NEXT: s_mov_b32 s1, s3
2752 ; GFX10-NEXT: s_mov_b32 s2, s4
2753 ; GFX10-NEXT: s_mov_b32 s3, s5
2754 ; GFX10-NEXT: s_mov_b32 s4, s6
2755 ; GFX10-NEXT: s_mov_b32 s5, s7
2756 ; GFX10-NEXT: s_mov_b32 s6, s8
2757 ; GFX10-NEXT: s_mov_b32 s7, s9
2758 ; GFX10-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
2759 ; GFX10-NEXT: s_endpgm
2761 %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2765 define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t) {
2766 ; GFX6-LABEL: atomic_add_i64_2d:
2767 ; GFX6: ; %bb.0: ; %main_body
2768 ; GFX6-NEXT: s_mov_b32 s0, s2
2769 ; GFX6-NEXT: s_mov_b32 s1, s3
2770 ; GFX6-NEXT: s_mov_b32 s2, s4
2771 ; GFX6-NEXT: s_mov_b32 s3, s5
2772 ; GFX6-NEXT: s_mov_b32 s4, s6
2773 ; GFX6-NEXT: s_mov_b32 s5, s7
2774 ; GFX6-NEXT: s_mov_b32 s6, s8
2775 ; GFX6-NEXT: s_mov_b32 s7, s9
2776 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
2777 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2778 ; GFX6-NEXT: ; return to shader part epilog
2780 ; GFX8-LABEL: atomic_add_i64_2d:
2781 ; GFX8: ; %bb.0: ; %main_body
2782 ; GFX8-NEXT: s_mov_b32 s0, s2
2783 ; GFX8-NEXT: s_mov_b32 s1, s3
2784 ; GFX8-NEXT: s_mov_b32 s2, s4
2785 ; GFX8-NEXT: s_mov_b32 s3, s5
2786 ; GFX8-NEXT: s_mov_b32 s4, s6
2787 ; GFX8-NEXT: s_mov_b32 s5, s7
2788 ; GFX8-NEXT: s_mov_b32 s6, s8
2789 ; GFX8-NEXT: s_mov_b32 s7, s9
2790 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
2791 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2792 ; GFX8-NEXT: ; return to shader part epilog
2794 ; GFX900-LABEL: atomic_add_i64_2d:
2795 ; GFX900: ; %bb.0: ; %main_body
2796 ; GFX900-NEXT: s_mov_b32 s0, s2
2797 ; GFX900-NEXT: s_mov_b32 s1, s3
2798 ; GFX900-NEXT: s_mov_b32 s2, s4
2799 ; GFX900-NEXT: s_mov_b32 s3, s5
2800 ; GFX900-NEXT: s_mov_b32 s4, s6
2801 ; GFX900-NEXT: s_mov_b32 s5, s7
2802 ; GFX900-NEXT: s_mov_b32 s6, s8
2803 ; GFX900-NEXT: s_mov_b32 s7, s9
2804 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
2805 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2806 ; GFX900-NEXT: ; return to shader part epilog
2808 ; GFX90A-LABEL: atomic_add_i64_2d:
2809 ; GFX90A: ; %bb.0: ; %main_body
2810 ; GFX90A-NEXT: s_mov_b32 s0, s2
2811 ; GFX90A-NEXT: s_mov_b32 s1, s3
2812 ; GFX90A-NEXT: s_mov_b32 s2, s4
2813 ; GFX90A-NEXT: s_mov_b32 s3, s5
2814 ; GFX90A-NEXT: s_mov_b32 s4, s6
2815 ; GFX90A-NEXT: s_mov_b32 s5, s7
2816 ; GFX90A-NEXT: s_mov_b32 s6, s8
2817 ; GFX90A-NEXT: s_mov_b32 s7, s9
2818 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
2819 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2820 ; GFX90A-NEXT: ; return to shader part epilog
2822 ; GFX10-LABEL: atomic_add_i64_2d:
2823 ; GFX10: ; %bb.0: ; %main_body
2824 ; GFX10-NEXT: s_mov_b32 s0, s2
2825 ; GFX10-NEXT: s_mov_b32 s1, s3
2826 ; GFX10-NEXT: s_mov_b32 s2, s4
2827 ; GFX10-NEXT: s_mov_b32 s3, s5
2828 ; GFX10-NEXT: s_mov_b32 s4, s6
2829 ; GFX10-NEXT: s_mov_b32 s5, s7
2830 ; GFX10-NEXT: s_mov_b32 s6, s8
2831 ; GFX10-NEXT: s_mov_b32 s7, s9
2832 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc
2833 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2834 ; GFX10-NEXT: ; return to shader part epilog
2836 %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
2837 %out = bitcast i64 %v to <2 x float>
2838 ret <2 x float> %out
2841 define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %r) {
2842 ; GFX6-LABEL: atomic_add_i64_3d:
2843 ; GFX6: ; %bb.0: ; %main_body
2844 ; GFX6-NEXT: s_mov_b32 s0, s2
2845 ; GFX6-NEXT: s_mov_b32 s1, s3
2846 ; GFX6-NEXT: s_mov_b32 s2, s4
2847 ; GFX6-NEXT: s_mov_b32 s3, s5
2848 ; GFX6-NEXT: s_mov_b32 s4, s6
2849 ; GFX6-NEXT: s_mov_b32 s5, s7
2850 ; GFX6-NEXT: s_mov_b32 s6, s8
2851 ; GFX6-NEXT: s_mov_b32 s7, s9
2852 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
2853 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2854 ; GFX6-NEXT: ; return to shader part epilog
2856 ; GFX8-LABEL: atomic_add_i64_3d:
2857 ; GFX8: ; %bb.0: ; %main_body
2858 ; GFX8-NEXT: s_mov_b32 s0, s2
2859 ; GFX8-NEXT: s_mov_b32 s1, s3
2860 ; GFX8-NEXT: s_mov_b32 s2, s4
2861 ; GFX8-NEXT: s_mov_b32 s3, s5
2862 ; GFX8-NEXT: s_mov_b32 s4, s6
2863 ; GFX8-NEXT: s_mov_b32 s5, s7
2864 ; GFX8-NEXT: s_mov_b32 s6, s8
2865 ; GFX8-NEXT: s_mov_b32 s7, s9
2866 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
2867 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2868 ; GFX8-NEXT: ; return to shader part epilog
2870 ; GFX900-LABEL: atomic_add_i64_3d:
2871 ; GFX900: ; %bb.0: ; %main_body
2872 ; GFX900-NEXT: s_mov_b32 s0, s2
2873 ; GFX900-NEXT: s_mov_b32 s1, s3
2874 ; GFX900-NEXT: s_mov_b32 s2, s4
2875 ; GFX900-NEXT: s_mov_b32 s3, s5
2876 ; GFX900-NEXT: s_mov_b32 s4, s6
2877 ; GFX900-NEXT: s_mov_b32 s5, s7
2878 ; GFX900-NEXT: s_mov_b32 s6, s8
2879 ; GFX900-NEXT: s_mov_b32 s7, s9
2880 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
2881 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2882 ; GFX900-NEXT: ; return to shader part epilog
2884 ; GFX90A-LABEL: atomic_add_i64_3d:
2885 ; GFX90A: ; %bb.0: ; %main_body
2886 ; GFX90A-NEXT: s_mov_b32 s0, s2
2887 ; GFX90A-NEXT: s_mov_b32 s1, s3
2888 ; GFX90A-NEXT: s_mov_b32 s2, s4
2889 ; GFX90A-NEXT: s_mov_b32 s3, s5
2890 ; GFX90A-NEXT: s_mov_b32 s4, s6
2891 ; GFX90A-NEXT: s_mov_b32 s5, s7
2892 ; GFX90A-NEXT: s_mov_b32 s6, s8
2893 ; GFX90A-NEXT: s_mov_b32 s7, s9
2894 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
2895 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2896 ; GFX90A-NEXT: ; return to shader part epilog
2898 ; GFX10-LABEL: atomic_add_i64_3d:
2899 ; GFX10: ; %bb.0: ; %main_body
2900 ; GFX10-NEXT: s_mov_b32 s0, s2
2901 ; GFX10-NEXT: s_mov_b32 s1, s3
2902 ; GFX10-NEXT: s_mov_b32 s2, s4
2903 ; GFX10-NEXT: s_mov_b32 s3, s5
2904 ; GFX10-NEXT: s_mov_b32 s4, s6
2905 ; GFX10-NEXT: s_mov_b32 s5, s7
2906 ; GFX10-NEXT: s_mov_b32 s6, s8
2907 ; GFX10-NEXT: s_mov_b32 s7, s9
2908 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc
2909 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2910 ; GFX10-NEXT: ; return to shader part epilog
2912 %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
2913 %out = bitcast i64 %v to <2 x float>
2914 ret <2 x float> %out
2917 define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %face) {
2918 ; GFX6-LABEL: atomic_add_i64_cube:
2919 ; GFX6: ; %bb.0: ; %main_body
2920 ; GFX6-NEXT: s_mov_b32 s0, s2
2921 ; GFX6-NEXT: s_mov_b32 s1, s3
2922 ; GFX6-NEXT: s_mov_b32 s2, s4
2923 ; GFX6-NEXT: s_mov_b32 s3, s5
2924 ; GFX6-NEXT: s_mov_b32 s4, s6
2925 ; GFX6-NEXT: s_mov_b32 s5, s7
2926 ; GFX6-NEXT: s_mov_b32 s6, s8
2927 ; GFX6-NEXT: s_mov_b32 s7, s9
2928 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
2929 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2930 ; GFX6-NEXT: ; return to shader part epilog
2932 ; GFX8-LABEL: atomic_add_i64_cube:
2933 ; GFX8: ; %bb.0: ; %main_body
2934 ; GFX8-NEXT: s_mov_b32 s0, s2
2935 ; GFX8-NEXT: s_mov_b32 s1, s3
2936 ; GFX8-NEXT: s_mov_b32 s2, s4
2937 ; GFX8-NEXT: s_mov_b32 s3, s5
2938 ; GFX8-NEXT: s_mov_b32 s4, s6
2939 ; GFX8-NEXT: s_mov_b32 s5, s7
2940 ; GFX8-NEXT: s_mov_b32 s6, s8
2941 ; GFX8-NEXT: s_mov_b32 s7, s9
2942 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
2943 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2944 ; GFX8-NEXT: ; return to shader part epilog
2946 ; GFX900-LABEL: atomic_add_i64_cube:
2947 ; GFX900: ; %bb.0: ; %main_body
2948 ; GFX900-NEXT: s_mov_b32 s0, s2
2949 ; GFX900-NEXT: s_mov_b32 s1, s3
2950 ; GFX900-NEXT: s_mov_b32 s2, s4
2951 ; GFX900-NEXT: s_mov_b32 s3, s5
2952 ; GFX900-NEXT: s_mov_b32 s4, s6
2953 ; GFX900-NEXT: s_mov_b32 s5, s7
2954 ; GFX900-NEXT: s_mov_b32 s6, s8
2955 ; GFX900-NEXT: s_mov_b32 s7, s9
2956 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
2957 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2958 ; GFX900-NEXT: ; return to shader part epilog
2960 ; GFX90A-LABEL: atomic_add_i64_cube:
2961 ; GFX90A: ; %bb.0: ; %main_body
2962 ; GFX90A-NEXT: s_mov_b32 s0, s2
2963 ; GFX90A-NEXT: s_mov_b32 s1, s3
2964 ; GFX90A-NEXT: s_mov_b32 s2, s4
2965 ; GFX90A-NEXT: s_mov_b32 s3, s5
2966 ; GFX90A-NEXT: s_mov_b32 s4, s6
2967 ; GFX90A-NEXT: s_mov_b32 s5, s7
2968 ; GFX90A-NEXT: s_mov_b32 s6, s8
2969 ; GFX90A-NEXT: s_mov_b32 s7, s9
2970 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
2971 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2972 ; GFX90A-NEXT: ; return to shader part epilog
2974 ; GFX10-LABEL: atomic_add_i64_cube:
2975 ; GFX10: ; %bb.0: ; %main_body
2976 ; GFX10-NEXT: s_mov_b32 s0, s2
2977 ; GFX10-NEXT: s_mov_b32 s1, s3
2978 ; GFX10-NEXT: s_mov_b32 s2, s4
2979 ; GFX10-NEXT: s_mov_b32 s3, s5
2980 ; GFX10-NEXT: s_mov_b32 s4, s6
2981 ; GFX10-NEXT: s_mov_b32 s5, s7
2982 ; GFX10-NEXT: s_mov_b32 s6, s8
2983 ; GFX10-NEXT: s_mov_b32 s7, s9
2984 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc
2985 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2986 ; GFX10-NEXT: ; return to shader part epilog
2988 %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
2989 %out = bitcast i64 %v to <2 x float>
2990 ret <2 x float> %out
2993 define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %slice) {
2994 ; GFX6-LABEL: atomic_add_i64_1darray:
2995 ; GFX6: ; %bb.0: ; %main_body
2996 ; GFX6-NEXT: s_mov_b32 s0, s2
2997 ; GFX6-NEXT: s_mov_b32 s1, s3
2998 ; GFX6-NEXT: s_mov_b32 s2, s4
2999 ; GFX6-NEXT: s_mov_b32 s3, s5
3000 ; GFX6-NEXT: s_mov_b32 s4, s6
3001 ; GFX6-NEXT: s_mov_b32 s5, s7
3002 ; GFX6-NEXT: s_mov_b32 s6, s8
3003 ; GFX6-NEXT: s_mov_b32 s7, s9
3004 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3005 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3006 ; GFX6-NEXT: ; return to shader part epilog
3008 ; GFX8-LABEL: atomic_add_i64_1darray:
3009 ; GFX8: ; %bb.0: ; %main_body
3010 ; GFX8-NEXT: s_mov_b32 s0, s2
3011 ; GFX8-NEXT: s_mov_b32 s1, s3
3012 ; GFX8-NEXT: s_mov_b32 s2, s4
3013 ; GFX8-NEXT: s_mov_b32 s3, s5
3014 ; GFX8-NEXT: s_mov_b32 s4, s6
3015 ; GFX8-NEXT: s_mov_b32 s5, s7
3016 ; GFX8-NEXT: s_mov_b32 s6, s8
3017 ; GFX8-NEXT: s_mov_b32 s7, s9
3018 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3019 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3020 ; GFX8-NEXT: ; return to shader part epilog
3022 ; GFX900-LABEL: atomic_add_i64_1darray:
3023 ; GFX900: ; %bb.0: ; %main_body
3024 ; GFX900-NEXT: s_mov_b32 s0, s2
3025 ; GFX900-NEXT: s_mov_b32 s1, s3
3026 ; GFX900-NEXT: s_mov_b32 s2, s4
3027 ; GFX900-NEXT: s_mov_b32 s3, s5
3028 ; GFX900-NEXT: s_mov_b32 s4, s6
3029 ; GFX900-NEXT: s_mov_b32 s5, s7
3030 ; GFX900-NEXT: s_mov_b32 s6, s8
3031 ; GFX900-NEXT: s_mov_b32 s7, s9
3032 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3033 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3034 ; GFX900-NEXT: ; return to shader part epilog
3036 ; GFX90A-LABEL: atomic_add_i64_1darray:
3037 ; GFX90A: ; %bb.0: ; %main_body
3038 ; GFX90A-NEXT: s_mov_b32 s0, s2
3039 ; GFX90A-NEXT: s_mov_b32 s1, s3
3040 ; GFX90A-NEXT: s_mov_b32 s2, s4
3041 ; GFX90A-NEXT: s_mov_b32 s3, s5
3042 ; GFX90A-NEXT: s_mov_b32 s4, s6
3043 ; GFX90A-NEXT: s_mov_b32 s5, s7
3044 ; GFX90A-NEXT: s_mov_b32 s6, s8
3045 ; GFX90A-NEXT: s_mov_b32 s7, s9
3046 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3047 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3048 ; GFX90A-NEXT: ; return to shader part epilog
3050 ; GFX10-LABEL: atomic_add_i64_1darray:
3051 ; GFX10: ; %bb.0: ; %main_body
3052 ; GFX10-NEXT: s_mov_b32 s0, s2
3053 ; GFX10-NEXT: s_mov_b32 s1, s3
3054 ; GFX10-NEXT: s_mov_b32 s2, s4
3055 ; GFX10-NEXT: s_mov_b32 s3, s5
3056 ; GFX10-NEXT: s_mov_b32 s4, s6
3057 ; GFX10-NEXT: s_mov_b32 s5, s7
3058 ; GFX10-NEXT: s_mov_b32 s6, s8
3059 ; GFX10-NEXT: s_mov_b32 s7, s9
3060 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc
3061 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3062 ; GFX10-NEXT: ; return to shader part epilog
3064 %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
3065 %out = bitcast i64 %v to <2 x float>
3066 ret <2 x float> %out
3069 define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice) {
3070 ; GFX6-LABEL: atomic_add_i64_2darray:
3071 ; GFX6: ; %bb.0: ; %main_body
3072 ; GFX6-NEXT: s_mov_b32 s0, s2
3073 ; GFX6-NEXT: s_mov_b32 s1, s3
3074 ; GFX6-NEXT: s_mov_b32 s2, s4
3075 ; GFX6-NEXT: s_mov_b32 s3, s5
3076 ; GFX6-NEXT: s_mov_b32 s4, s6
3077 ; GFX6-NEXT: s_mov_b32 s5, s7
3078 ; GFX6-NEXT: s_mov_b32 s6, s8
3079 ; GFX6-NEXT: s_mov_b32 s7, s9
3080 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3081 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3082 ; GFX6-NEXT: ; return to shader part epilog
3084 ; GFX8-LABEL: atomic_add_i64_2darray:
3085 ; GFX8: ; %bb.0: ; %main_body
3086 ; GFX8-NEXT: s_mov_b32 s0, s2
3087 ; GFX8-NEXT: s_mov_b32 s1, s3
3088 ; GFX8-NEXT: s_mov_b32 s2, s4
3089 ; GFX8-NEXT: s_mov_b32 s3, s5
3090 ; GFX8-NEXT: s_mov_b32 s4, s6
3091 ; GFX8-NEXT: s_mov_b32 s5, s7
3092 ; GFX8-NEXT: s_mov_b32 s6, s8
3093 ; GFX8-NEXT: s_mov_b32 s7, s9
3094 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3095 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3096 ; GFX8-NEXT: ; return to shader part epilog
3098 ; GFX900-LABEL: atomic_add_i64_2darray:
3099 ; GFX900: ; %bb.0: ; %main_body
3100 ; GFX900-NEXT: s_mov_b32 s0, s2
3101 ; GFX900-NEXT: s_mov_b32 s1, s3
3102 ; GFX900-NEXT: s_mov_b32 s2, s4
3103 ; GFX900-NEXT: s_mov_b32 s3, s5
3104 ; GFX900-NEXT: s_mov_b32 s4, s6
3105 ; GFX900-NEXT: s_mov_b32 s5, s7
3106 ; GFX900-NEXT: s_mov_b32 s6, s8
3107 ; GFX900-NEXT: s_mov_b32 s7, s9
3108 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3109 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3110 ; GFX900-NEXT: ; return to shader part epilog
3112 ; GFX90A-LABEL: atomic_add_i64_2darray:
3113 ; GFX90A: ; %bb.0: ; %main_body
3114 ; GFX90A-NEXT: s_mov_b32 s0, s2
3115 ; GFX90A-NEXT: s_mov_b32 s1, s3
3116 ; GFX90A-NEXT: s_mov_b32 s2, s4
3117 ; GFX90A-NEXT: s_mov_b32 s3, s5
3118 ; GFX90A-NEXT: s_mov_b32 s4, s6
3119 ; GFX90A-NEXT: s_mov_b32 s5, s7
3120 ; GFX90A-NEXT: s_mov_b32 s6, s8
3121 ; GFX90A-NEXT: s_mov_b32 s7, s9
3122 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3123 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3124 ; GFX90A-NEXT: ; return to shader part epilog
3126 ; GFX10-LABEL: atomic_add_i64_2darray:
3127 ; GFX10: ; %bb.0: ; %main_body
3128 ; GFX10-NEXT: s_mov_b32 s0, s2
3129 ; GFX10-NEXT: s_mov_b32 s1, s3
3130 ; GFX10-NEXT: s_mov_b32 s2, s4
3131 ; GFX10-NEXT: s_mov_b32 s3, s5
3132 ; GFX10-NEXT: s_mov_b32 s4, s6
3133 ; GFX10-NEXT: s_mov_b32 s5, s7
3134 ; GFX10-NEXT: s_mov_b32 s6, s8
3135 ; GFX10-NEXT: s_mov_b32 s7, s9
3136 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc
3137 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3138 ; GFX10-NEXT: ; return to shader part epilog
3140 %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
3141 %out = bitcast i64 %v to <2 x float>
3142 ret <2 x float> %out
3145 define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %fragid) {
3146 ; GFX6-LABEL: atomic_add_i64_2dmsaa:
3147 ; GFX6: ; %bb.0: ; %main_body
3148 ; GFX6-NEXT: s_mov_b32 s0, s2
3149 ; GFX6-NEXT: s_mov_b32 s1, s3
3150 ; GFX6-NEXT: s_mov_b32 s2, s4
3151 ; GFX6-NEXT: s_mov_b32 s3, s5
3152 ; GFX6-NEXT: s_mov_b32 s4, s6
3153 ; GFX6-NEXT: s_mov_b32 s5, s7
3154 ; GFX6-NEXT: s_mov_b32 s6, s8
3155 ; GFX6-NEXT: s_mov_b32 s7, s9
3156 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3157 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3158 ; GFX6-NEXT: ; return to shader part epilog
3160 ; GFX8-LABEL: atomic_add_i64_2dmsaa:
3161 ; GFX8: ; %bb.0: ; %main_body
3162 ; GFX8-NEXT: s_mov_b32 s0, s2
3163 ; GFX8-NEXT: s_mov_b32 s1, s3
3164 ; GFX8-NEXT: s_mov_b32 s2, s4
3165 ; GFX8-NEXT: s_mov_b32 s3, s5
3166 ; GFX8-NEXT: s_mov_b32 s4, s6
3167 ; GFX8-NEXT: s_mov_b32 s5, s7
3168 ; GFX8-NEXT: s_mov_b32 s6, s8
3169 ; GFX8-NEXT: s_mov_b32 s7, s9
3170 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3171 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3172 ; GFX8-NEXT: ; return to shader part epilog
3174 ; GFX900-LABEL: atomic_add_i64_2dmsaa:
3175 ; GFX900: ; %bb.0: ; %main_body
3176 ; GFX900-NEXT: s_mov_b32 s0, s2
3177 ; GFX900-NEXT: s_mov_b32 s1, s3
3178 ; GFX900-NEXT: s_mov_b32 s2, s4
3179 ; GFX900-NEXT: s_mov_b32 s3, s5
3180 ; GFX900-NEXT: s_mov_b32 s4, s6
3181 ; GFX900-NEXT: s_mov_b32 s5, s7
3182 ; GFX900-NEXT: s_mov_b32 s6, s8
3183 ; GFX900-NEXT: s_mov_b32 s7, s9
3184 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3185 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3186 ; GFX900-NEXT: ; return to shader part epilog
3188 ; GFX90A-LABEL: atomic_add_i64_2dmsaa:
3189 ; GFX90A: ; %bb.0: ; %main_body
3190 ; GFX90A-NEXT: s_mov_b32 s0, s2
3191 ; GFX90A-NEXT: s_mov_b32 s1, s3
3192 ; GFX90A-NEXT: s_mov_b32 s2, s4
3193 ; GFX90A-NEXT: s_mov_b32 s3, s5
3194 ; GFX90A-NEXT: s_mov_b32 s4, s6
3195 ; GFX90A-NEXT: s_mov_b32 s5, s7
3196 ; GFX90A-NEXT: s_mov_b32 s6, s8
3197 ; GFX90A-NEXT: s_mov_b32 s7, s9
3198 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3199 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3200 ; GFX90A-NEXT: ; return to shader part epilog
3202 ; GFX10-LABEL: atomic_add_i64_2dmsaa:
3203 ; GFX10: ; %bb.0: ; %main_body
3204 ; GFX10-NEXT: s_mov_b32 s0, s2
3205 ; GFX10-NEXT: s_mov_b32 s1, s3
3206 ; GFX10-NEXT: s_mov_b32 s2, s4
3207 ; GFX10-NEXT: s_mov_b32 s3, s5
3208 ; GFX10-NEXT: s_mov_b32 s4, s6
3209 ; GFX10-NEXT: s_mov_b32 s5, s7
3210 ; GFX10-NEXT: s_mov_b32 s6, s8
3211 ; GFX10-NEXT: s_mov_b32 s7, s9
3212 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc
3213 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3214 ; GFX10-NEXT: ; return to shader part epilog
3216 %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
3217 %out = bitcast i64 %v to <2 x float>
3218 ret <2 x float> %out
3221 define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
3222 ; GFX6-LABEL: atomic_add_i64_2darraymsaa:
3223 ; GFX6: ; %bb.0: ; %main_body
3224 ; GFX6-NEXT: s_mov_b32 s0, s2
3225 ; GFX6-NEXT: s_mov_b32 s1, s3
3226 ; GFX6-NEXT: s_mov_b32 s2, s4
3227 ; GFX6-NEXT: s_mov_b32 s3, s5
3228 ; GFX6-NEXT: s_mov_b32 s4, s6
3229 ; GFX6-NEXT: s_mov_b32 s5, s7
3230 ; GFX6-NEXT: s_mov_b32 s6, s8
3231 ; GFX6-NEXT: s_mov_b32 s7, s9
3232 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3233 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3234 ; GFX6-NEXT: ; return to shader part epilog
3236 ; GFX8-LABEL: atomic_add_i64_2darraymsaa:
3237 ; GFX8: ; %bb.0: ; %main_body
3238 ; GFX8-NEXT: s_mov_b32 s0, s2
3239 ; GFX8-NEXT: s_mov_b32 s1, s3
3240 ; GFX8-NEXT: s_mov_b32 s2, s4
3241 ; GFX8-NEXT: s_mov_b32 s3, s5
3242 ; GFX8-NEXT: s_mov_b32 s4, s6
3243 ; GFX8-NEXT: s_mov_b32 s5, s7
3244 ; GFX8-NEXT: s_mov_b32 s6, s8
3245 ; GFX8-NEXT: s_mov_b32 s7, s9
3246 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3247 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3248 ; GFX8-NEXT: ; return to shader part epilog
3250 ; GFX900-LABEL: atomic_add_i64_2darraymsaa:
3251 ; GFX900: ; %bb.0: ; %main_body
3252 ; GFX900-NEXT: s_mov_b32 s0, s2
3253 ; GFX900-NEXT: s_mov_b32 s1, s3
3254 ; GFX900-NEXT: s_mov_b32 s2, s4
3255 ; GFX900-NEXT: s_mov_b32 s3, s5
3256 ; GFX900-NEXT: s_mov_b32 s4, s6
3257 ; GFX900-NEXT: s_mov_b32 s5, s7
3258 ; GFX900-NEXT: s_mov_b32 s6, s8
3259 ; GFX900-NEXT: s_mov_b32 s7, s9
3260 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3261 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3262 ; GFX900-NEXT: ; return to shader part epilog
3264 ; GFX90A-LABEL: atomic_add_i64_2darraymsaa:
3265 ; GFX90A: ; %bb.0: ; %main_body
3266 ; GFX90A-NEXT: s_mov_b32 s0, s2
3267 ; GFX90A-NEXT: s_mov_b32 s1, s3
3268 ; GFX90A-NEXT: s_mov_b32 s2, s4
3269 ; GFX90A-NEXT: s_mov_b32 s3, s5
3270 ; GFX90A-NEXT: s_mov_b32 s4, s6
3271 ; GFX90A-NEXT: s_mov_b32 s5, s7
3272 ; GFX90A-NEXT: s_mov_b32 s6, s8
3273 ; GFX90A-NEXT: s_mov_b32 s7, s9
3274 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3275 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3276 ; GFX90A-NEXT: ; return to shader part epilog
3278 ; GFX10-LABEL: atomic_add_i64_2darraymsaa:
3279 ; GFX10: ; %bb.0: ; %main_body
3280 ; GFX10-NEXT: s_mov_b32 s0, s2
3281 ; GFX10-NEXT: s_mov_b32 s1, s3
3282 ; GFX10-NEXT: s_mov_b32 s2, s4
3283 ; GFX10-NEXT: s_mov_b32 s3, s5
3284 ; GFX10-NEXT: s_mov_b32 s4, s6
3285 ; GFX10-NEXT: s_mov_b32 s5, s7
3286 ; GFX10-NEXT: s_mov_b32 s6, s8
3287 ; GFX10-NEXT: s_mov_b32 s7, s9
3288 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc
3289 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3290 ; GFX10-NEXT: ; return to shader part epilog
3292 %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
3293 %out = bitcast i64 %v to <2 x float>
3294 ret <2 x float> %out
3297 define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
3298 ; GFX6-LABEL: atomic_add_i64_1d_slc:
3299 ; GFX6: ; %bb.0: ; %main_body
3300 ; GFX6-NEXT: s_mov_b32 s0, s2
3301 ; GFX6-NEXT: s_mov_b32 s1, s3
3302 ; GFX6-NEXT: s_mov_b32 s2, s4
3303 ; GFX6-NEXT: s_mov_b32 s3, s5
3304 ; GFX6-NEXT: s_mov_b32 s4, s6
3305 ; GFX6-NEXT: s_mov_b32 s5, s7
3306 ; GFX6-NEXT: s_mov_b32 s6, s8
3307 ; GFX6-NEXT: s_mov_b32 s7, s9
3308 ; GFX6-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3309 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3310 ; GFX6-NEXT: ; return to shader part epilog
3312 ; GFX8-LABEL: atomic_add_i64_1d_slc:
3313 ; GFX8: ; %bb.0: ; %main_body
3314 ; GFX8-NEXT: s_mov_b32 s0, s2
3315 ; GFX8-NEXT: s_mov_b32 s1, s3
3316 ; GFX8-NEXT: s_mov_b32 s2, s4
3317 ; GFX8-NEXT: s_mov_b32 s3, s5
3318 ; GFX8-NEXT: s_mov_b32 s4, s6
3319 ; GFX8-NEXT: s_mov_b32 s5, s7
3320 ; GFX8-NEXT: s_mov_b32 s6, s8
3321 ; GFX8-NEXT: s_mov_b32 s7, s9
3322 ; GFX8-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3323 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3324 ; GFX8-NEXT: ; return to shader part epilog
3326 ; GFX900-LABEL: atomic_add_i64_1d_slc:
3327 ; GFX900: ; %bb.0: ; %main_body
3328 ; GFX900-NEXT: s_mov_b32 s0, s2
3329 ; GFX900-NEXT: s_mov_b32 s1, s3
3330 ; GFX900-NEXT: s_mov_b32 s2, s4
3331 ; GFX900-NEXT: s_mov_b32 s3, s5
3332 ; GFX900-NEXT: s_mov_b32 s4, s6
3333 ; GFX900-NEXT: s_mov_b32 s5, s7
3334 ; GFX900-NEXT: s_mov_b32 s6, s8
3335 ; GFX900-NEXT: s_mov_b32 s7, s9
3336 ; GFX900-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3337 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3338 ; GFX900-NEXT: ; return to shader part epilog
3340 ; GFX90A-LABEL: atomic_add_i64_1d_slc:
3341 ; GFX90A: ; %bb.0: ; %main_body
3342 ; GFX90A-NEXT: s_mov_b32 s0, s2
3343 ; GFX90A-NEXT: s_mov_b32 s1, s3
3344 ; GFX90A-NEXT: s_mov_b32 s2, s4
3345 ; GFX90A-NEXT: s_mov_b32 s3, s5
3346 ; GFX90A-NEXT: s_mov_b32 s4, s6
3347 ; GFX90A-NEXT: s_mov_b32 s5, s7
3348 ; GFX90A-NEXT: s_mov_b32 s6, s8
3349 ; GFX90A-NEXT: s_mov_b32 s7, s9
3350 ; GFX90A-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3351 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3352 ; GFX90A-NEXT: ; return to shader part epilog
3354 ; GFX10-LABEL: atomic_add_i64_1d_slc:
3355 ; GFX10: ; %bb.0: ; %main_body
3356 ; GFX10-NEXT: s_mov_b32 s0, s2
3357 ; GFX10-NEXT: s_mov_b32 s1, s3
3358 ; GFX10-NEXT: s_mov_b32 s2, s4
3359 ; GFX10-NEXT: s_mov_b32 s3, s5
3360 ; GFX10-NEXT: s_mov_b32 s4, s6
3361 ; GFX10-NEXT: s_mov_b32 s5, s7
3362 ; GFX10-NEXT: s_mov_b32 s6, s8
3363 ; GFX10-NEXT: s_mov_b32 s7, s9
3364 ; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc
3365 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3366 ; GFX10-NEXT: ; return to shader part epilog
3368 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
3369 %out = bitcast i64 %v to <2 x float>
3370 ret <2 x float> %out
3373 declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3374 declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3375 declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3376 declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3377 declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3378 declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3379 declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3380 declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3381 declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3382 declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3383 declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3384 declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3385 declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3386 declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3387 declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3388 declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3389 declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3390 declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3391 declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3392 declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3394 declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3395 declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3396 declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3397 declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3398 declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3399 declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3400 declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3401 declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3402 declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3403 declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3404 declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3405 declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3406 declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3407 declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3408 declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3409 declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3410 declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3411 declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3412 declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3413 declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3415 attributes #0 = { nounwind }