1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX900 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - %s | FileCheck -check-prefix=GFX90A %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10PLUS %s
7 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s | FileCheck -check-prefix=GFX10PLUS %s
9 define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
10 ; GFX6-LABEL: atomic_swap_i32_1d:
11 ; GFX6: ; %bb.0: ; %main_body
12 ; GFX6-NEXT: s_mov_b32 s0, s2
13 ; GFX6-NEXT: s_mov_b32 s1, s3
14 ; GFX6-NEXT: s_mov_b32 s2, s4
15 ; GFX6-NEXT: s_mov_b32 s3, s5
16 ; GFX6-NEXT: s_mov_b32 s4, s6
17 ; GFX6-NEXT: s_mov_b32 s5, s7
18 ; GFX6-NEXT: s_mov_b32 s6, s8
19 ; GFX6-NEXT: s_mov_b32 s7, s9
20 ; GFX6-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
21 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
22 ; GFX6-NEXT: ; return to shader part epilog
24 ; GFX8-LABEL: atomic_swap_i32_1d:
25 ; GFX8: ; %bb.0: ; %main_body
26 ; GFX8-NEXT: s_mov_b32 s0, s2
27 ; GFX8-NEXT: s_mov_b32 s1, s3
28 ; GFX8-NEXT: s_mov_b32 s2, s4
29 ; GFX8-NEXT: s_mov_b32 s3, s5
30 ; GFX8-NEXT: s_mov_b32 s4, s6
31 ; GFX8-NEXT: s_mov_b32 s5, s7
32 ; GFX8-NEXT: s_mov_b32 s6, s8
33 ; GFX8-NEXT: s_mov_b32 s7, s9
34 ; GFX8-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
35 ; GFX8-NEXT: s_waitcnt vmcnt(0)
36 ; GFX8-NEXT: ; return to shader part epilog
38 ; GFX900-LABEL: atomic_swap_i32_1d:
39 ; GFX900: ; %bb.0: ; %main_body
40 ; GFX900-NEXT: s_mov_b32 s0, s2
41 ; GFX900-NEXT: s_mov_b32 s1, s3
42 ; GFX900-NEXT: s_mov_b32 s2, s4
43 ; GFX900-NEXT: s_mov_b32 s3, s5
44 ; GFX900-NEXT: s_mov_b32 s4, s6
45 ; GFX900-NEXT: s_mov_b32 s5, s7
46 ; GFX900-NEXT: s_mov_b32 s6, s8
47 ; GFX900-NEXT: s_mov_b32 s7, s9
48 ; GFX900-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
49 ; GFX900-NEXT: s_waitcnt vmcnt(0)
50 ; GFX900-NEXT: ; return to shader part epilog
52 ; GFX90A-LABEL: atomic_swap_i32_1d:
53 ; GFX90A: ; %bb.0: ; %main_body
54 ; GFX90A-NEXT: s_mov_b32 s0, s2
55 ; GFX90A-NEXT: s_mov_b32 s1, s3
56 ; GFX90A-NEXT: s_mov_b32 s2, s4
57 ; GFX90A-NEXT: s_mov_b32 s3, s5
58 ; GFX90A-NEXT: s_mov_b32 s4, s6
59 ; GFX90A-NEXT: s_mov_b32 s5, s7
60 ; GFX90A-NEXT: s_mov_b32 s6, s8
61 ; GFX90A-NEXT: s_mov_b32 s7, s9
62 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
63 ; GFX90A-NEXT: image_atomic_swap v0, v2, s[0:7] dmask:0x1 unorm glc
64 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
65 ; GFX90A-NEXT: ; return to shader part epilog
67 ; GFX10PLUS-LABEL: atomic_swap_i32_1d:
68 ; GFX10PLUS: ; %bb.0: ; %main_body
69 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
70 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
71 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
72 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
73 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
74 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
75 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
76 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
77 ; GFX10PLUS-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
78 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
79 ; GFX10PLUS-NEXT: ; return to shader part epilog
81 %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
82 %out = bitcast i32 %v to float
86 define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
87 ; GFX6-LABEL: atomic_add_i32_1d:
88 ; GFX6: ; %bb.0: ; %main_body
89 ; GFX6-NEXT: s_mov_b32 s0, s2
90 ; GFX6-NEXT: s_mov_b32 s1, s3
91 ; GFX6-NEXT: s_mov_b32 s2, s4
92 ; GFX6-NEXT: s_mov_b32 s3, s5
93 ; GFX6-NEXT: s_mov_b32 s4, s6
94 ; GFX6-NEXT: s_mov_b32 s5, s7
95 ; GFX6-NEXT: s_mov_b32 s6, s8
96 ; GFX6-NEXT: s_mov_b32 s7, s9
97 ; GFX6-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
98 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
99 ; GFX6-NEXT: ; return to shader part epilog
101 ; GFX8-LABEL: atomic_add_i32_1d:
102 ; GFX8: ; %bb.0: ; %main_body
103 ; GFX8-NEXT: s_mov_b32 s0, s2
104 ; GFX8-NEXT: s_mov_b32 s1, s3
105 ; GFX8-NEXT: s_mov_b32 s2, s4
106 ; GFX8-NEXT: s_mov_b32 s3, s5
107 ; GFX8-NEXT: s_mov_b32 s4, s6
108 ; GFX8-NEXT: s_mov_b32 s5, s7
109 ; GFX8-NEXT: s_mov_b32 s6, s8
110 ; GFX8-NEXT: s_mov_b32 s7, s9
111 ; GFX8-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
112 ; GFX8-NEXT: s_waitcnt vmcnt(0)
113 ; GFX8-NEXT: ; return to shader part epilog
115 ; GFX900-LABEL: atomic_add_i32_1d:
116 ; GFX900: ; %bb.0: ; %main_body
117 ; GFX900-NEXT: s_mov_b32 s0, s2
118 ; GFX900-NEXT: s_mov_b32 s1, s3
119 ; GFX900-NEXT: s_mov_b32 s2, s4
120 ; GFX900-NEXT: s_mov_b32 s3, s5
121 ; GFX900-NEXT: s_mov_b32 s4, s6
122 ; GFX900-NEXT: s_mov_b32 s5, s7
123 ; GFX900-NEXT: s_mov_b32 s6, s8
124 ; GFX900-NEXT: s_mov_b32 s7, s9
125 ; GFX900-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
126 ; GFX900-NEXT: s_waitcnt vmcnt(0)
127 ; GFX900-NEXT: ; return to shader part epilog
129 ; GFX90A-LABEL: atomic_add_i32_1d:
130 ; GFX90A: ; %bb.0: ; %main_body
131 ; GFX90A-NEXT: s_mov_b32 s0, s2
132 ; GFX90A-NEXT: s_mov_b32 s1, s3
133 ; GFX90A-NEXT: s_mov_b32 s2, s4
134 ; GFX90A-NEXT: s_mov_b32 s3, s5
135 ; GFX90A-NEXT: s_mov_b32 s4, s6
136 ; GFX90A-NEXT: s_mov_b32 s5, s7
137 ; GFX90A-NEXT: s_mov_b32 s6, s8
138 ; GFX90A-NEXT: s_mov_b32 s7, s9
139 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
140 ; GFX90A-NEXT: image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc
141 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
142 ; GFX90A-NEXT: ; return to shader part epilog
144 ; GFX10PLUS-LABEL: atomic_add_i32_1d:
145 ; GFX10PLUS: ; %bb.0: ; %main_body
146 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
147 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
148 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
149 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
150 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
151 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
152 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
153 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
154 ; GFX10PLUS-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
155 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
156 ; GFX10PLUS-NEXT: ; return to shader part epilog
158 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
159 %out = bitcast i32 %v to float
163 define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
164 ; GFX6-LABEL: atomic_sub_i32_1d:
165 ; GFX6: ; %bb.0: ; %main_body
166 ; GFX6-NEXT: s_mov_b32 s0, s2
167 ; GFX6-NEXT: s_mov_b32 s1, s3
168 ; GFX6-NEXT: s_mov_b32 s2, s4
169 ; GFX6-NEXT: s_mov_b32 s3, s5
170 ; GFX6-NEXT: s_mov_b32 s4, s6
171 ; GFX6-NEXT: s_mov_b32 s5, s7
172 ; GFX6-NEXT: s_mov_b32 s6, s8
173 ; GFX6-NEXT: s_mov_b32 s7, s9
174 ; GFX6-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
175 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
176 ; GFX6-NEXT: ; return to shader part epilog
178 ; GFX8-LABEL: atomic_sub_i32_1d:
179 ; GFX8: ; %bb.0: ; %main_body
180 ; GFX8-NEXT: s_mov_b32 s0, s2
181 ; GFX8-NEXT: s_mov_b32 s1, s3
182 ; GFX8-NEXT: s_mov_b32 s2, s4
183 ; GFX8-NEXT: s_mov_b32 s3, s5
184 ; GFX8-NEXT: s_mov_b32 s4, s6
185 ; GFX8-NEXT: s_mov_b32 s5, s7
186 ; GFX8-NEXT: s_mov_b32 s6, s8
187 ; GFX8-NEXT: s_mov_b32 s7, s9
188 ; GFX8-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
189 ; GFX8-NEXT: s_waitcnt vmcnt(0)
190 ; GFX8-NEXT: ; return to shader part epilog
192 ; GFX900-LABEL: atomic_sub_i32_1d:
193 ; GFX900: ; %bb.0: ; %main_body
194 ; GFX900-NEXT: s_mov_b32 s0, s2
195 ; GFX900-NEXT: s_mov_b32 s1, s3
196 ; GFX900-NEXT: s_mov_b32 s2, s4
197 ; GFX900-NEXT: s_mov_b32 s3, s5
198 ; GFX900-NEXT: s_mov_b32 s4, s6
199 ; GFX900-NEXT: s_mov_b32 s5, s7
200 ; GFX900-NEXT: s_mov_b32 s6, s8
201 ; GFX900-NEXT: s_mov_b32 s7, s9
202 ; GFX900-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
203 ; GFX900-NEXT: s_waitcnt vmcnt(0)
204 ; GFX900-NEXT: ; return to shader part epilog
206 ; GFX90A-LABEL: atomic_sub_i32_1d:
207 ; GFX90A: ; %bb.0: ; %main_body
208 ; GFX90A-NEXT: s_mov_b32 s0, s2
209 ; GFX90A-NEXT: s_mov_b32 s1, s3
210 ; GFX90A-NEXT: s_mov_b32 s2, s4
211 ; GFX90A-NEXT: s_mov_b32 s3, s5
212 ; GFX90A-NEXT: s_mov_b32 s4, s6
213 ; GFX90A-NEXT: s_mov_b32 s5, s7
214 ; GFX90A-NEXT: s_mov_b32 s6, s8
215 ; GFX90A-NEXT: s_mov_b32 s7, s9
216 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
217 ; GFX90A-NEXT: image_atomic_sub v0, v2, s[0:7] dmask:0x1 unorm glc
218 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
219 ; GFX90A-NEXT: ; return to shader part epilog
221 ; GFX10PLUS-LABEL: atomic_sub_i32_1d:
222 ; GFX10PLUS: ; %bb.0: ; %main_body
223 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
224 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
225 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
226 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
227 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
228 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
229 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
230 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
231 ; GFX10PLUS-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
232 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
233 ; GFX10PLUS-NEXT: ; return to shader part epilog
235 %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
236 %out = bitcast i32 %v to float
240 define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
241 ; GFX6-LABEL: atomic_smin_i32_1d:
242 ; GFX6: ; %bb.0: ; %main_body
243 ; GFX6-NEXT: s_mov_b32 s0, s2
244 ; GFX6-NEXT: s_mov_b32 s1, s3
245 ; GFX6-NEXT: s_mov_b32 s2, s4
246 ; GFX6-NEXT: s_mov_b32 s3, s5
247 ; GFX6-NEXT: s_mov_b32 s4, s6
248 ; GFX6-NEXT: s_mov_b32 s5, s7
249 ; GFX6-NEXT: s_mov_b32 s6, s8
250 ; GFX6-NEXT: s_mov_b32 s7, s9
251 ; GFX6-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
252 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
253 ; GFX6-NEXT: ; return to shader part epilog
255 ; GFX8-LABEL: atomic_smin_i32_1d:
256 ; GFX8: ; %bb.0: ; %main_body
257 ; GFX8-NEXT: s_mov_b32 s0, s2
258 ; GFX8-NEXT: s_mov_b32 s1, s3
259 ; GFX8-NEXT: s_mov_b32 s2, s4
260 ; GFX8-NEXT: s_mov_b32 s3, s5
261 ; GFX8-NEXT: s_mov_b32 s4, s6
262 ; GFX8-NEXT: s_mov_b32 s5, s7
263 ; GFX8-NEXT: s_mov_b32 s6, s8
264 ; GFX8-NEXT: s_mov_b32 s7, s9
265 ; GFX8-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
266 ; GFX8-NEXT: s_waitcnt vmcnt(0)
267 ; GFX8-NEXT: ; return to shader part epilog
269 ; GFX900-LABEL: atomic_smin_i32_1d:
270 ; GFX900: ; %bb.0: ; %main_body
271 ; GFX900-NEXT: s_mov_b32 s0, s2
272 ; GFX900-NEXT: s_mov_b32 s1, s3
273 ; GFX900-NEXT: s_mov_b32 s2, s4
274 ; GFX900-NEXT: s_mov_b32 s3, s5
275 ; GFX900-NEXT: s_mov_b32 s4, s6
276 ; GFX900-NEXT: s_mov_b32 s5, s7
277 ; GFX900-NEXT: s_mov_b32 s6, s8
278 ; GFX900-NEXT: s_mov_b32 s7, s9
279 ; GFX900-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
280 ; GFX900-NEXT: s_waitcnt vmcnt(0)
281 ; GFX900-NEXT: ; return to shader part epilog
283 ; GFX90A-LABEL: atomic_smin_i32_1d:
284 ; GFX90A: ; %bb.0: ; %main_body
285 ; GFX90A-NEXT: s_mov_b32 s0, s2
286 ; GFX90A-NEXT: s_mov_b32 s1, s3
287 ; GFX90A-NEXT: s_mov_b32 s2, s4
288 ; GFX90A-NEXT: s_mov_b32 s3, s5
289 ; GFX90A-NEXT: s_mov_b32 s4, s6
290 ; GFX90A-NEXT: s_mov_b32 s5, s7
291 ; GFX90A-NEXT: s_mov_b32 s6, s8
292 ; GFX90A-NEXT: s_mov_b32 s7, s9
293 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
294 ; GFX90A-NEXT: image_atomic_smin v0, v2, s[0:7] dmask:0x1 unorm glc
295 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
296 ; GFX90A-NEXT: ; return to shader part epilog
298 ; GFX10PLUS-LABEL: atomic_smin_i32_1d:
299 ; GFX10PLUS: ; %bb.0: ; %main_body
300 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
301 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
302 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
303 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
304 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
305 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
306 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
307 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
308 ; GFX10PLUS-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
309 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
310 ; GFX10PLUS-NEXT: ; return to shader part epilog
312 %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
313 %out = bitcast i32 %v to float
317 define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
318 ; GFX6-LABEL: atomic_umin_i32_1d:
319 ; GFX6: ; %bb.0: ; %main_body
320 ; GFX6-NEXT: s_mov_b32 s0, s2
321 ; GFX6-NEXT: s_mov_b32 s1, s3
322 ; GFX6-NEXT: s_mov_b32 s2, s4
323 ; GFX6-NEXT: s_mov_b32 s3, s5
324 ; GFX6-NEXT: s_mov_b32 s4, s6
325 ; GFX6-NEXT: s_mov_b32 s5, s7
326 ; GFX6-NEXT: s_mov_b32 s6, s8
327 ; GFX6-NEXT: s_mov_b32 s7, s9
328 ; GFX6-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
329 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
330 ; GFX6-NEXT: ; return to shader part epilog
332 ; GFX8-LABEL: atomic_umin_i32_1d:
333 ; GFX8: ; %bb.0: ; %main_body
334 ; GFX8-NEXT: s_mov_b32 s0, s2
335 ; GFX8-NEXT: s_mov_b32 s1, s3
336 ; GFX8-NEXT: s_mov_b32 s2, s4
337 ; GFX8-NEXT: s_mov_b32 s3, s5
338 ; GFX8-NEXT: s_mov_b32 s4, s6
339 ; GFX8-NEXT: s_mov_b32 s5, s7
340 ; GFX8-NEXT: s_mov_b32 s6, s8
341 ; GFX8-NEXT: s_mov_b32 s7, s9
342 ; GFX8-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
343 ; GFX8-NEXT: s_waitcnt vmcnt(0)
344 ; GFX8-NEXT: ; return to shader part epilog
346 ; GFX900-LABEL: atomic_umin_i32_1d:
347 ; GFX900: ; %bb.0: ; %main_body
348 ; GFX900-NEXT: s_mov_b32 s0, s2
349 ; GFX900-NEXT: s_mov_b32 s1, s3
350 ; GFX900-NEXT: s_mov_b32 s2, s4
351 ; GFX900-NEXT: s_mov_b32 s3, s5
352 ; GFX900-NEXT: s_mov_b32 s4, s6
353 ; GFX900-NEXT: s_mov_b32 s5, s7
354 ; GFX900-NEXT: s_mov_b32 s6, s8
355 ; GFX900-NEXT: s_mov_b32 s7, s9
356 ; GFX900-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
357 ; GFX900-NEXT: s_waitcnt vmcnt(0)
358 ; GFX900-NEXT: ; return to shader part epilog
360 ; GFX90A-LABEL: atomic_umin_i32_1d:
361 ; GFX90A: ; %bb.0: ; %main_body
362 ; GFX90A-NEXT: s_mov_b32 s0, s2
363 ; GFX90A-NEXT: s_mov_b32 s1, s3
364 ; GFX90A-NEXT: s_mov_b32 s2, s4
365 ; GFX90A-NEXT: s_mov_b32 s3, s5
366 ; GFX90A-NEXT: s_mov_b32 s4, s6
367 ; GFX90A-NEXT: s_mov_b32 s5, s7
368 ; GFX90A-NEXT: s_mov_b32 s6, s8
369 ; GFX90A-NEXT: s_mov_b32 s7, s9
370 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
371 ; GFX90A-NEXT: image_atomic_umin v0, v2, s[0:7] dmask:0x1 unorm glc
372 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
373 ; GFX90A-NEXT: ; return to shader part epilog
375 ; GFX10PLUS-LABEL: atomic_umin_i32_1d:
376 ; GFX10PLUS: ; %bb.0: ; %main_body
377 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
378 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
379 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
380 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
381 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
382 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
383 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
384 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
385 ; GFX10PLUS-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
386 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
387 ; GFX10PLUS-NEXT: ; return to shader part epilog
389 %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
390 %out = bitcast i32 %v to float
394 define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
395 ; GFX6-LABEL: atomic_smax_i32_1d:
396 ; GFX6: ; %bb.0: ; %main_body
397 ; GFX6-NEXT: s_mov_b32 s0, s2
398 ; GFX6-NEXT: s_mov_b32 s1, s3
399 ; GFX6-NEXT: s_mov_b32 s2, s4
400 ; GFX6-NEXT: s_mov_b32 s3, s5
401 ; GFX6-NEXT: s_mov_b32 s4, s6
402 ; GFX6-NEXT: s_mov_b32 s5, s7
403 ; GFX6-NEXT: s_mov_b32 s6, s8
404 ; GFX6-NEXT: s_mov_b32 s7, s9
405 ; GFX6-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
406 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
407 ; GFX6-NEXT: ; return to shader part epilog
409 ; GFX8-LABEL: atomic_smax_i32_1d:
410 ; GFX8: ; %bb.0: ; %main_body
411 ; GFX8-NEXT: s_mov_b32 s0, s2
412 ; GFX8-NEXT: s_mov_b32 s1, s3
413 ; GFX8-NEXT: s_mov_b32 s2, s4
414 ; GFX8-NEXT: s_mov_b32 s3, s5
415 ; GFX8-NEXT: s_mov_b32 s4, s6
416 ; GFX8-NEXT: s_mov_b32 s5, s7
417 ; GFX8-NEXT: s_mov_b32 s6, s8
418 ; GFX8-NEXT: s_mov_b32 s7, s9
419 ; GFX8-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
420 ; GFX8-NEXT: s_waitcnt vmcnt(0)
421 ; GFX8-NEXT: ; return to shader part epilog
423 ; GFX900-LABEL: atomic_smax_i32_1d:
424 ; GFX900: ; %bb.0: ; %main_body
425 ; GFX900-NEXT: s_mov_b32 s0, s2
426 ; GFX900-NEXT: s_mov_b32 s1, s3
427 ; GFX900-NEXT: s_mov_b32 s2, s4
428 ; GFX900-NEXT: s_mov_b32 s3, s5
429 ; GFX900-NEXT: s_mov_b32 s4, s6
430 ; GFX900-NEXT: s_mov_b32 s5, s7
431 ; GFX900-NEXT: s_mov_b32 s6, s8
432 ; GFX900-NEXT: s_mov_b32 s7, s9
433 ; GFX900-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
434 ; GFX900-NEXT: s_waitcnt vmcnt(0)
435 ; GFX900-NEXT: ; return to shader part epilog
437 ; GFX90A-LABEL: atomic_smax_i32_1d:
438 ; GFX90A: ; %bb.0: ; %main_body
439 ; GFX90A-NEXT: s_mov_b32 s0, s2
440 ; GFX90A-NEXT: s_mov_b32 s1, s3
441 ; GFX90A-NEXT: s_mov_b32 s2, s4
442 ; GFX90A-NEXT: s_mov_b32 s3, s5
443 ; GFX90A-NEXT: s_mov_b32 s4, s6
444 ; GFX90A-NEXT: s_mov_b32 s5, s7
445 ; GFX90A-NEXT: s_mov_b32 s6, s8
446 ; GFX90A-NEXT: s_mov_b32 s7, s9
447 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
448 ; GFX90A-NEXT: image_atomic_smax v0, v2, s[0:7] dmask:0x1 unorm glc
449 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
450 ; GFX90A-NEXT: ; return to shader part epilog
452 ; GFX10PLUS-LABEL: atomic_smax_i32_1d:
453 ; GFX10PLUS: ; %bb.0: ; %main_body
454 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
455 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
456 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
457 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
458 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
459 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
460 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
461 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
462 ; GFX10PLUS-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
463 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
464 ; GFX10PLUS-NEXT: ; return to shader part epilog
466 %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
467 %out = bitcast i32 %v to float
471 define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
472 ; GFX6-LABEL: atomic_umax_i32_1d:
473 ; GFX6: ; %bb.0: ; %main_body
474 ; GFX6-NEXT: s_mov_b32 s0, s2
475 ; GFX6-NEXT: s_mov_b32 s1, s3
476 ; GFX6-NEXT: s_mov_b32 s2, s4
477 ; GFX6-NEXT: s_mov_b32 s3, s5
478 ; GFX6-NEXT: s_mov_b32 s4, s6
479 ; GFX6-NEXT: s_mov_b32 s5, s7
480 ; GFX6-NEXT: s_mov_b32 s6, s8
481 ; GFX6-NEXT: s_mov_b32 s7, s9
482 ; GFX6-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
483 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
484 ; GFX6-NEXT: ; return to shader part epilog
486 ; GFX8-LABEL: atomic_umax_i32_1d:
487 ; GFX8: ; %bb.0: ; %main_body
488 ; GFX8-NEXT: s_mov_b32 s0, s2
489 ; GFX8-NEXT: s_mov_b32 s1, s3
490 ; GFX8-NEXT: s_mov_b32 s2, s4
491 ; GFX8-NEXT: s_mov_b32 s3, s5
492 ; GFX8-NEXT: s_mov_b32 s4, s6
493 ; GFX8-NEXT: s_mov_b32 s5, s7
494 ; GFX8-NEXT: s_mov_b32 s6, s8
495 ; GFX8-NEXT: s_mov_b32 s7, s9
496 ; GFX8-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
497 ; GFX8-NEXT: s_waitcnt vmcnt(0)
498 ; GFX8-NEXT: ; return to shader part epilog
500 ; GFX900-LABEL: atomic_umax_i32_1d:
501 ; GFX900: ; %bb.0: ; %main_body
502 ; GFX900-NEXT: s_mov_b32 s0, s2
503 ; GFX900-NEXT: s_mov_b32 s1, s3
504 ; GFX900-NEXT: s_mov_b32 s2, s4
505 ; GFX900-NEXT: s_mov_b32 s3, s5
506 ; GFX900-NEXT: s_mov_b32 s4, s6
507 ; GFX900-NEXT: s_mov_b32 s5, s7
508 ; GFX900-NEXT: s_mov_b32 s6, s8
509 ; GFX900-NEXT: s_mov_b32 s7, s9
510 ; GFX900-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
511 ; GFX900-NEXT: s_waitcnt vmcnt(0)
512 ; GFX900-NEXT: ; return to shader part epilog
514 ; GFX90A-LABEL: atomic_umax_i32_1d:
515 ; GFX90A: ; %bb.0: ; %main_body
516 ; GFX90A-NEXT: s_mov_b32 s0, s2
517 ; GFX90A-NEXT: s_mov_b32 s1, s3
518 ; GFX90A-NEXT: s_mov_b32 s2, s4
519 ; GFX90A-NEXT: s_mov_b32 s3, s5
520 ; GFX90A-NEXT: s_mov_b32 s4, s6
521 ; GFX90A-NEXT: s_mov_b32 s5, s7
522 ; GFX90A-NEXT: s_mov_b32 s6, s8
523 ; GFX90A-NEXT: s_mov_b32 s7, s9
524 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
525 ; GFX90A-NEXT: image_atomic_umax v0, v2, s[0:7] dmask:0x1 unorm glc
526 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
527 ; GFX90A-NEXT: ; return to shader part epilog
529 ; GFX10PLUS-LABEL: atomic_umax_i32_1d:
530 ; GFX10PLUS: ; %bb.0: ; %main_body
531 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
532 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
533 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
534 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
535 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
536 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
537 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
538 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
539 ; GFX10PLUS-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
540 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
541 ; GFX10PLUS-NEXT: ; return to shader part epilog
543 %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
544 %out = bitcast i32 %v to float
548 define amdgpu_ps float @atomic_and_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
549 ; GFX6-LABEL: atomic_and_i32_1d:
550 ; GFX6: ; %bb.0: ; %main_body
551 ; GFX6-NEXT: s_mov_b32 s0, s2
552 ; GFX6-NEXT: s_mov_b32 s1, s3
553 ; GFX6-NEXT: s_mov_b32 s2, s4
554 ; GFX6-NEXT: s_mov_b32 s3, s5
555 ; GFX6-NEXT: s_mov_b32 s4, s6
556 ; GFX6-NEXT: s_mov_b32 s5, s7
557 ; GFX6-NEXT: s_mov_b32 s6, s8
558 ; GFX6-NEXT: s_mov_b32 s7, s9
559 ; GFX6-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
560 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
561 ; GFX6-NEXT: ; return to shader part epilog
563 ; GFX8-LABEL: atomic_and_i32_1d:
564 ; GFX8: ; %bb.0: ; %main_body
565 ; GFX8-NEXT: s_mov_b32 s0, s2
566 ; GFX8-NEXT: s_mov_b32 s1, s3
567 ; GFX8-NEXT: s_mov_b32 s2, s4
568 ; GFX8-NEXT: s_mov_b32 s3, s5
569 ; GFX8-NEXT: s_mov_b32 s4, s6
570 ; GFX8-NEXT: s_mov_b32 s5, s7
571 ; GFX8-NEXT: s_mov_b32 s6, s8
572 ; GFX8-NEXT: s_mov_b32 s7, s9
573 ; GFX8-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
574 ; GFX8-NEXT: s_waitcnt vmcnt(0)
575 ; GFX8-NEXT: ; return to shader part epilog
577 ; GFX900-LABEL: atomic_and_i32_1d:
578 ; GFX900: ; %bb.0: ; %main_body
579 ; GFX900-NEXT: s_mov_b32 s0, s2
580 ; GFX900-NEXT: s_mov_b32 s1, s3
581 ; GFX900-NEXT: s_mov_b32 s2, s4
582 ; GFX900-NEXT: s_mov_b32 s3, s5
583 ; GFX900-NEXT: s_mov_b32 s4, s6
584 ; GFX900-NEXT: s_mov_b32 s5, s7
585 ; GFX900-NEXT: s_mov_b32 s6, s8
586 ; GFX900-NEXT: s_mov_b32 s7, s9
587 ; GFX900-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
588 ; GFX900-NEXT: s_waitcnt vmcnt(0)
589 ; GFX900-NEXT: ; return to shader part epilog
591 ; GFX90A-LABEL: atomic_and_i32_1d:
592 ; GFX90A: ; %bb.0: ; %main_body
593 ; GFX90A-NEXT: s_mov_b32 s0, s2
594 ; GFX90A-NEXT: s_mov_b32 s1, s3
595 ; GFX90A-NEXT: s_mov_b32 s2, s4
596 ; GFX90A-NEXT: s_mov_b32 s3, s5
597 ; GFX90A-NEXT: s_mov_b32 s4, s6
598 ; GFX90A-NEXT: s_mov_b32 s5, s7
599 ; GFX90A-NEXT: s_mov_b32 s6, s8
600 ; GFX90A-NEXT: s_mov_b32 s7, s9
601 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
602 ; GFX90A-NEXT: image_atomic_and v0, v2, s[0:7] dmask:0x1 unorm glc
603 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
604 ; GFX90A-NEXT: ; return to shader part epilog
606 ; GFX10PLUS-LABEL: atomic_and_i32_1d:
607 ; GFX10PLUS: ; %bb.0: ; %main_body
608 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
609 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
610 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
611 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
612 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
613 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
614 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
615 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
616 ; GFX10PLUS-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
617 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
618 ; GFX10PLUS-NEXT: ; return to shader part epilog
620 %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
621 %out = bitcast i32 %v to float
625 define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
626 ; GFX6-LABEL: atomic_or_i32_1d:
627 ; GFX6: ; %bb.0: ; %main_body
628 ; GFX6-NEXT: s_mov_b32 s0, s2
629 ; GFX6-NEXT: s_mov_b32 s1, s3
630 ; GFX6-NEXT: s_mov_b32 s2, s4
631 ; GFX6-NEXT: s_mov_b32 s3, s5
632 ; GFX6-NEXT: s_mov_b32 s4, s6
633 ; GFX6-NEXT: s_mov_b32 s5, s7
634 ; GFX6-NEXT: s_mov_b32 s6, s8
635 ; GFX6-NEXT: s_mov_b32 s7, s9
636 ; GFX6-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
637 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
638 ; GFX6-NEXT: ; return to shader part epilog
640 ; GFX8-LABEL: atomic_or_i32_1d:
641 ; GFX8: ; %bb.0: ; %main_body
642 ; GFX8-NEXT: s_mov_b32 s0, s2
643 ; GFX8-NEXT: s_mov_b32 s1, s3
644 ; GFX8-NEXT: s_mov_b32 s2, s4
645 ; GFX8-NEXT: s_mov_b32 s3, s5
646 ; GFX8-NEXT: s_mov_b32 s4, s6
647 ; GFX8-NEXT: s_mov_b32 s5, s7
648 ; GFX8-NEXT: s_mov_b32 s6, s8
649 ; GFX8-NEXT: s_mov_b32 s7, s9
650 ; GFX8-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
651 ; GFX8-NEXT: s_waitcnt vmcnt(0)
652 ; GFX8-NEXT: ; return to shader part epilog
654 ; GFX900-LABEL: atomic_or_i32_1d:
655 ; GFX900: ; %bb.0: ; %main_body
656 ; GFX900-NEXT: s_mov_b32 s0, s2
657 ; GFX900-NEXT: s_mov_b32 s1, s3
658 ; GFX900-NEXT: s_mov_b32 s2, s4
659 ; GFX900-NEXT: s_mov_b32 s3, s5
660 ; GFX900-NEXT: s_mov_b32 s4, s6
661 ; GFX900-NEXT: s_mov_b32 s5, s7
662 ; GFX900-NEXT: s_mov_b32 s6, s8
663 ; GFX900-NEXT: s_mov_b32 s7, s9
664 ; GFX900-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
665 ; GFX900-NEXT: s_waitcnt vmcnt(0)
666 ; GFX900-NEXT: ; return to shader part epilog
668 ; GFX90A-LABEL: atomic_or_i32_1d:
669 ; GFX90A: ; %bb.0: ; %main_body
670 ; GFX90A-NEXT: s_mov_b32 s0, s2
671 ; GFX90A-NEXT: s_mov_b32 s1, s3
672 ; GFX90A-NEXT: s_mov_b32 s2, s4
673 ; GFX90A-NEXT: s_mov_b32 s3, s5
674 ; GFX90A-NEXT: s_mov_b32 s4, s6
675 ; GFX90A-NEXT: s_mov_b32 s5, s7
676 ; GFX90A-NEXT: s_mov_b32 s6, s8
677 ; GFX90A-NEXT: s_mov_b32 s7, s9
678 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
679 ; GFX90A-NEXT: image_atomic_or v0, v2, s[0:7] dmask:0x1 unorm glc
680 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
681 ; GFX90A-NEXT: ; return to shader part epilog
683 ; GFX10PLUS-LABEL: atomic_or_i32_1d:
684 ; GFX10PLUS: ; %bb.0: ; %main_body
685 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
686 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
687 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
688 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
689 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
690 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
691 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
692 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
693 ; GFX10PLUS-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
694 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
695 ; GFX10PLUS-NEXT: ; return to shader part epilog
697 %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
698 %out = bitcast i32 %v to float
702 define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
703 ; GFX6-LABEL: atomic_xor_i32_1d:
704 ; GFX6: ; %bb.0: ; %main_body
705 ; GFX6-NEXT: s_mov_b32 s0, s2
706 ; GFX6-NEXT: s_mov_b32 s1, s3
707 ; GFX6-NEXT: s_mov_b32 s2, s4
708 ; GFX6-NEXT: s_mov_b32 s3, s5
709 ; GFX6-NEXT: s_mov_b32 s4, s6
710 ; GFX6-NEXT: s_mov_b32 s5, s7
711 ; GFX6-NEXT: s_mov_b32 s6, s8
712 ; GFX6-NEXT: s_mov_b32 s7, s9
713 ; GFX6-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
714 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
715 ; GFX6-NEXT: ; return to shader part epilog
717 ; GFX8-LABEL: atomic_xor_i32_1d:
718 ; GFX8: ; %bb.0: ; %main_body
719 ; GFX8-NEXT: s_mov_b32 s0, s2
720 ; GFX8-NEXT: s_mov_b32 s1, s3
721 ; GFX8-NEXT: s_mov_b32 s2, s4
722 ; GFX8-NEXT: s_mov_b32 s3, s5
723 ; GFX8-NEXT: s_mov_b32 s4, s6
724 ; GFX8-NEXT: s_mov_b32 s5, s7
725 ; GFX8-NEXT: s_mov_b32 s6, s8
726 ; GFX8-NEXT: s_mov_b32 s7, s9
727 ; GFX8-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
728 ; GFX8-NEXT: s_waitcnt vmcnt(0)
729 ; GFX8-NEXT: ; return to shader part epilog
731 ; GFX900-LABEL: atomic_xor_i32_1d:
732 ; GFX900: ; %bb.0: ; %main_body
733 ; GFX900-NEXT: s_mov_b32 s0, s2
734 ; GFX900-NEXT: s_mov_b32 s1, s3
735 ; GFX900-NEXT: s_mov_b32 s2, s4
736 ; GFX900-NEXT: s_mov_b32 s3, s5
737 ; GFX900-NEXT: s_mov_b32 s4, s6
738 ; GFX900-NEXT: s_mov_b32 s5, s7
739 ; GFX900-NEXT: s_mov_b32 s6, s8
740 ; GFX900-NEXT: s_mov_b32 s7, s9
741 ; GFX900-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
742 ; GFX900-NEXT: s_waitcnt vmcnt(0)
743 ; GFX900-NEXT: ; return to shader part epilog
745 ; GFX90A-LABEL: atomic_xor_i32_1d:
746 ; GFX90A: ; %bb.0: ; %main_body
747 ; GFX90A-NEXT: s_mov_b32 s0, s2
748 ; GFX90A-NEXT: s_mov_b32 s1, s3
749 ; GFX90A-NEXT: s_mov_b32 s2, s4
750 ; GFX90A-NEXT: s_mov_b32 s3, s5
751 ; GFX90A-NEXT: s_mov_b32 s4, s6
752 ; GFX90A-NEXT: s_mov_b32 s5, s7
753 ; GFX90A-NEXT: s_mov_b32 s6, s8
754 ; GFX90A-NEXT: s_mov_b32 s7, s9
755 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
756 ; GFX90A-NEXT: image_atomic_xor v0, v2, s[0:7] dmask:0x1 unorm glc
757 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
758 ; GFX90A-NEXT: ; return to shader part epilog
760 ; GFX10PLUS-LABEL: atomic_xor_i32_1d:
761 ; GFX10PLUS: ; %bb.0: ; %main_body
762 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
763 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
764 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
765 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
766 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
767 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
768 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
769 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
770 ; GFX10PLUS-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
771 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
772 ; GFX10PLUS-NEXT: ; return to shader part epilog
774 %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
775 %out = bitcast i32 %v to float
779 define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
780 ; GFX6-LABEL: atomic_inc_i32_1d:
781 ; GFX6: ; %bb.0: ; %main_body
782 ; GFX6-NEXT: s_mov_b32 s0, s2
783 ; GFX6-NEXT: s_mov_b32 s1, s3
784 ; GFX6-NEXT: s_mov_b32 s2, s4
785 ; GFX6-NEXT: s_mov_b32 s3, s5
786 ; GFX6-NEXT: s_mov_b32 s4, s6
787 ; GFX6-NEXT: s_mov_b32 s5, s7
788 ; GFX6-NEXT: s_mov_b32 s6, s8
789 ; GFX6-NEXT: s_mov_b32 s7, s9
790 ; GFX6-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
791 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
792 ; GFX6-NEXT: ; return to shader part epilog
794 ; GFX8-LABEL: atomic_inc_i32_1d:
795 ; GFX8: ; %bb.0: ; %main_body
796 ; GFX8-NEXT: s_mov_b32 s0, s2
797 ; GFX8-NEXT: s_mov_b32 s1, s3
798 ; GFX8-NEXT: s_mov_b32 s2, s4
799 ; GFX8-NEXT: s_mov_b32 s3, s5
800 ; GFX8-NEXT: s_mov_b32 s4, s6
801 ; GFX8-NEXT: s_mov_b32 s5, s7
802 ; GFX8-NEXT: s_mov_b32 s6, s8
803 ; GFX8-NEXT: s_mov_b32 s7, s9
804 ; GFX8-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
805 ; GFX8-NEXT: s_waitcnt vmcnt(0)
806 ; GFX8-NEXT: ; return to shader part epilog
808 ; GFX900-LABEL: atomic_inc_i32_1d:
809 ; GFX900: ; %bb.0: ; %main_body
810 ; GFX900-NEXT: s_mov_b32 s0, s2
811 ; GFX900-NEXT: s_mov_b32 s1, s3
812 ; GFX900-NEXT: s_mov_b32 s2, s4
813 ; GFX900-NEXT: s_mov_b32 s3, s5
814 ; GFX900-NEXT: s_mov_b32 s4, s6
815 ; GFX900-NEXT: s_mov_b32 s5, s7
816 ; GFX900-NEXT: s_mov_b32 s6, s8
817 ; GFX900-NEXT: s_mov_b32 s7, s9
818 ; GFX900-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
819 ; GFX900-NEXT: s_waitcnt vmcnt(0)
820 ; GFX900-NEXT: ; return to shader part epilog
822 ; GFX90A-LABEL: atomic_inc_i32_1d:
823 ; GFX90A: ; %bb.0: ; %main_body
824 ; GFX90A-NEXT: s_mov_b32 s0, s2
825 ; GFX90A-NEXT: s_mov_b32 s1, s3
826 ; GFX90A-NEXT: s_mov_b32 s2, s4
827 ; GFX90A-NEXT: s_mov_b32 s3, s5
828 ; GFX90A-NEXT: s_mov_b32 s4, s6
829 ; GFX90A-NEXT: s_mov_b32 s5, s7
830 ; GFX90A-NEXT: s_mov_b32 s6, s8
831 ; GFX90A-NEXT: s_mov_b32 s7, s9
832 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
833 ; GFX90A-NEXT: image_atomic_inc v0, v2, s[0:7] dmask:0x1 unorm glc
834 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
835 ; GFX90A-NEXT: ; return to shader part epilog
837 ; GFX10PLUS-LABEL: atomic_inc_i32_1d:
838 ; GFX10PLUS: ; %bb.0: ; %main_body
839 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
840 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
841 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
842 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
843 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
844 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
845 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
846 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
847 ; GFX10PLUS-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
848 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
849 ; GFX10PLUS-NEXT: ; return to shader part epilog
851 %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
852 %out = bitcast i32 %v to float
856 define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
857 ; GFX6-LABEL: atomic_dec_i32_1d:
858 ; GFX6: ; %bb.0: ; %main_body
859 ; GFX6-NEXT: s_mov_b32 s0, s2
860 ; GFX6-NEXT: s_mov_b32 s1, s3
861 ; GFX6-NEXT: s_mov_b32 s2, s4
862 ; GFX6-NEXT: s_mov_b32 s3, s5
863 ; GFX6-NEXT: s_mov_b32 s4, s6
864 ; GFX6-NEXT: s_mov_b32 s5, s7
865 ; GFX6-NEXT: s_mov_b32 s6, s8
866 ; GFX6-NEXT: s_mov_b32 s7, s9
867 ; GFX6-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
868 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
869 ; GFX6-NEXT: ; return to shader part epilog
871 ; GFX8-LABEL: atomic_dec_i32_1d:
872 ; GFX8: ; %bb.0: ; %main_body
873 ; GFX8-NEXT: s_mov_b32 s0, s2
874 ; GFX8-NEXT: s_mov_b32 s1, s3
875 ; GFX8-NEXT: s_mov_b32 s2, s4
876 ; GFX8-NEXT: s_mov_b32 s3, s5
877 ; GFX8-NEXT: s_mov_b32 s4, s6
878 ; GFX8-NEXT: s_mov_b32 s5, s7
879 ; GFX8-NEXT: s_mov_b32 s6, s8
880 ; GFX8-NEXT: s_mov_b32 s7, s9
881 ; GFX8-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
882 ; GFX8-NEXT: s_waitcnt vmcnt(0)
883 ; GFX8-NEXT: ; return to shader part epilog
885 ; GFX900-LABEL: atomic_dec_i32_1d:
886 ; GFX900: ; %bb.0: ; %main_body
887 ; GFX900-NEXT: s_mov_b32 s0, s2
888 ; GFX900-NEXT: s_mov_b32 s1, s3
889 ; GFX900-NEXT: s_mov_b32 s2, s4
890 ; GFX900-NEXT: s_mov_b32 s3, s5
891 ; GFX900-NEXT: s_mov_b32 s4, s6
892 ; GFX900-NEXT: s_mov_b32 s5, s7
893 ; GFX900-NEXT: s_mov_b32 s6, s8
894 ; GFX900-NEXT: s_mov_b32 s7, s9
895 ; GFX900-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
896 ; GFX900-NEXT: s_waitcnt vmcnt(0)
897 ; GFX900-NEXT: ; return to shader part epilog
899 ; GFX90A-LABEL: atomic_dec_i32_1d:
900 ; GFX90A: ; %bb.0: ; %main_body
901 ; GFX90A-NEXT: s_mov_b32 s0, s2
902 ; GFX90A-NEXT: s_mov_b32 s1, s3
903 ; GFX90A-NEXT: s_mov_b32 s2, s4
904 ; GFX90A-NEXT: s_mov_b32 s3, s5
905 ; GFX90A-NEXT: s_mov_b32 s4, s6
906 ; GFX90A-NEXT: s_mov_b32 s5, s7
907 ; GFX90A-NEXT: s_mov_b32 s6, s8
908 ; GFX90A-NEXT: s_mov_b32 s7, s9
909 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
910 ; GFX90A-NEXT: image_atomic_dec v0, v2, s[0:7] dmask:0x1 unorm glc
911 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
912 ; GFX90A-NEXT: ; return to shader part epilog
914 ; GFX10PLUS-LABEL: atomic_dec_i32_1d:
915 ; GFX10PLUS: ; %bb.0: ; %main_body
916 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
917 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
918 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
919 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
920 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
921 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
922 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
923 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
924 ; GFX10PLUS-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
925 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
926 ; GFX10PLUS-NEXT: ; return to shader part epilog
928 %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
929 %out = bitcast i32 %v to float
933 define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
934 ; GFX6-LABEL: atomic_cmpswap_i32_1d:
935 ; GFX6: ; %bb.0: ; %main_body
936 ; GFX6-NEXT: s_mov_b32 s0, s2
937 ; GFX6-NEXT: s_mov_b32 s1, s3
938 ; GFX6-NEXT: s_mov_b32 s2, s4
939 ; GFX6-NEXT: s_mov_b32 s3, s5
940 ; GFX6-NEXT: s_mov_b32 s4, s6
941 ; GFX6-NEXT: s_mov_b32 s5, s7
942 ; GFX6-NEXT: s_mov_b32 s6, s8
943 ; GFX6-NEXT: s_mov_b32 s7, s9
944 ; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
945 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
946 ; GFX6-NEXT: ; return to shader part epilog
948 ; GFX8-LABEL: atomic_cmpswap_i32_1d:
949 ; GFX8: ; %bb.0: ; %main_body
950 ; GFX8-NEXT: s_mov_b32 s0, s2
951 ; GFX8-NEXT: s_mov_b32 s1, s3
952 ; GFX8-NEXT: s_mov_b32 s2, s4
953 ; GFX8-NEXT: s_mov_b32 s3, s5
954 ; GFX8-NEXT: s_mov_b32 s4, s6
955 ; GFX8-NEXT: s_mov_b32 s5, s7
956 ; GFX8-NEXT: s_mov_b32 s6, s8
957 ; GFX8-NEXT: s_mov_b32 s7, s9
958 ; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
959 ; GFX8-NEXT: s_waitcnt vmcnt(0)
960 ; GFX8-NEXT: ; return to shader part epilog
962 ; GFX900-LABEL: atomic_cmpswap_i32_1d:
963 ; GFX900: ; %bb.0: ; %main_body
964 ; GFX900-NEXT: s_mov_b32 s0, s2
965 ; GFX900-NEXT: s_mov_b32 s1, s3
966 ; GFX900-NEXT: s_mov_b32 s2, s4
967 ; GFX900-NEXT: s_mov_b32 s3, s5
968 ; GFX900-NEXT: s_mov_b32 s4, s6
969 ; GFX900-NEXT: s_mov_b32 s5, s7
970 ; GFX900-NEXT: s_mov_b32 s6, s8
971 ; GFX900-NEXT: s_mov_b32 s7, s9
972 ; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
973 ; GFX900-NEXT: s_waitcnt vmcnt(0)
974 ; GFX900-NEXT: ; return to shader part epilog
976 ; GFX90A-LABEL: atomic_cmpswap_i32_1d:
977 ; GFX90A: ; %bb.0: ; %main_body
978 ; GFX90A-NEXT: s_mov_b32 s0, s2
979 ; GFX90A-NEXT: s_mov_b32 s1, s3
980 ; GFX90A-NEXT: s_mov_b32 s2, s4
981 ; GFX90A-NEXT: s_mov_b32 s3, s5
982 ; GFX90A-NEXT: s_mov_b32 s4, s6
983 ; GFX90A-NEXT: s_mov_b32 s5, s7
984 ; GFX90A-NEXT: s_mov_b32 s6, s8
985 ; GFX90A-NEXT: s_mov_b32 s7, s9
986 ; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
987 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
988 ; GFX90A-NEXT: ; return to shader part epilog
990 ; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d:
991 ; GFX10PLUS: ; %bb.0: ; %main_body
992 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
993 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
994 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
995 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
996 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
997 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
998 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
999 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1000 ; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1001 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1002 ; GFX10PLUS-NEXT: ; return to shader part epilog
1004 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1005 %out = bitcast i32 %v to float
1009 define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
1010 ; GFX6-LABEL: atomic_cmpswap_i32_1d_no_return:
1011 ; GFX6: ; %bb.0: ; %main_body
1012 ; GFX6-NEXT: s_mov_b32 s0, s2
1013 ; GFX6-NEXT: s_mov_b32 s1, s3
1014 ; GFX6-NEXT: s_mov_b32 s2, s4
1015 ; GFX6-NEXT: s_mov_b32 s3, s5
1016 ; GFX6-NEXT: s_mov_b32 s4, s6
1017 ; GFX6-NEXT: s_mov_b32 s5, s7
1018 ; GFX6-NEXT: s_mov_b32 s6, s8
1019 ; GFX6-NEXT: s_mov_b32 s7, s9
1020 ; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1021 ; GFX6-NEXT: s_endpgm
1023 ; GFX8-LABEL: atomic_cmpswap_i32_1d_no_return:
1024 ; GFX8: ; %bb.0: ; %main_body
1025 ; GFX8-NEXT: s_mov_b32 s0, s2
1026 ; GFX8-NEXT: s_mov_b32 s1, s3
1027 ; GFX8-NEXT: s_mov_b32 s2, s4
1028 ; GFX8-NEXT: s_mov_b32 s3, s5
1029 ; GFX8-NEXT: s_mov_b32 s4, s6
1030 ; GFX8-NEXT: s_mov_b32 s5, s7
1031 ; GFX8-NEXT: s_mov_b32 s6, s8
1032 ; GFX8-NEXT: s_mov_b32 s7, s9
1033 ; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1034 ; GFX8-NEXT: s_endpgm
1036 ; GFX900-LABEL: atomic_cmpswap_i32_1d_no_return:
1037 ; GFX900: ; %bb.0: ; %main_body
1038 ; GFX900-NEXT: s_mov_b32 s0, s2
1039 ; GFX900-NEXT: s_mov_b32 s1, s3
1040 ; GFX900-NEXT: s_mov_b32 s2, s4
1041 ; GFX900-NEXT: s_mov_b32 s3, s5
1042 ; GFX900-NEXT: s_mov_b32 s4, s6
1043 ; GFX900-NEXT: s_mov_b32 s5, s7
1044 ; GFX900-NEXT: s_mov_b32 s6, s8
1045 ; GFX900-NEXT: s_mov_b32 s7, s9
1046 ; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1047 ; GFX900-NEXT: s_endpgm
1049 ; GFX90A-LABEL: atomic_cmpswap_i32_1d_no_return:
1050 ; GFX90A: ; %bb.0: ; %main_body
1051 ; GFX90A-NEXT: s_mov_b32 s0, s2
1052 ; GFX90A-NEXT: s_mov_b32 s1, s3
1053 ; GFX90A-NEXT: s_mov_b32 s2, s4
1054 ; GFX90A-NEXT: s_mov_b32 s3, s5
1055 ; GFX90A-NEXT: s_mov_b32 s4, s6
1056 ; GFX90A-NEXT: s_mov_b32 s5, s7
1057 ; GFX90A-NEXT: s_mov_b32 s6, s8
1058 ; GFX90A-NEXT: s_mov_b32 s7, s9
1059 ; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1060 ; GFX90A-NEXT: s_endpgm
1062 ; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d_no_return:
1063 ; GFX10PLUS: ; %bb.0: ; %main_body
1064 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1065 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1066 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1067 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1068 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1069 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1070 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1071 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1072 ; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1073 ; GFX10PLUS-NEXT: s_endpgm
1075 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1079 define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) {
1080 ; GFX6-LABEL: atomic_add_i32_2d:
1081 ; GFX6: ; %bb.0: ; %main_body
1082 ; GFX6-NEXT: s_mov_b32 s0, s2
1083 ; GFX6-NEXT: s_mov_b32 s1, s3
1084 ; GFX6-NEXT: s_mov_b32 s2, s4
1085 ; GFX6-NEXT: s_mov_b32 s3, s5
1086 ; GFX6-NEXT: s_mov_b32 s4, s6
1087 ; GFX6-NEXT: s_mov_b32 s5, s7
1088 ; GFX6-NEXT: s_mov_b32 s6, s8
1089 ; GFX6-NEXT: s_mov_b32 s7, s9
1090 ; GFX6-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
1091 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1092 ; GFX6-NEXT: ; return to shader part epilog
1094 ; GFX8-LABEL: atomic_add_i32_2d:
1095 ; GFX8: ; %bb.0: ; %main_body
1096 ; GFX8-NEXT: s_mov_b32 s0, s2
1097 ; GFX8-NEXT: s_mov_b32 s1, s3
1098 ; GFX8-NEXT: s_mov_b32 s2, s4
1099 ; GFX8-NEXT: s_mov_b32 s3, s5
1100 ; GFX8-NEXT: s_mov_b32 s4, s6
1101 ; GFX8-NEXT: s_mov_b32 s5, s7
1102 ; GFX8-NEXT: s_mov_b32 s6, s8
1103 ; GFX8-NEXT: s_mov_b32 s7, s9
1104 ; GFX8-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
1105 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1106 ; GFX8-NEXT: ; return to shader part epilog
1108 ; GFX900-LABEL: atomic_add_i32_2d:
1109 ; GFX900: ; %bb.0: ; %main_body
1110 ; GFX900-NEXT: s_mov_b32 s0, s2
1111 ; GFX900-NEXT: s_mov_b32 s1, s3
1112 ; GFX900-NEXT: s_mov_b32 s2, s4
1113 ; GFX900-NEXT: s_mov_b32 s3, s5
1114 ; GFX900-NEXT: s_mov_b32 s4, s6
1115 ; GFX900-NEXT: s_mov_b32 s5, s7
1116 ; GFX900-NEXT: s_mov_b32 s6, s8
1117 ; GFX900-NEXT: s_mov_b32 s7, s9
1118 ; GFX900-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
1119 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1120 ; GFX900-NEXT: ; return to shader part epilog
1122 ; GFX90A-LABEL: atomic_add_i32_2d:
1123 ; GFX90A: ; %bb.0: ; %main_body
1124 ; GFX90A-NEXT: s_mov_b32 s0, s2
1125 ; GFX90A-NEXT: s_mov_b32 s1, s3
1126 ; GFX90A-NEXT: s_mov_b32 s2, s4
1127 ; GFX90A-NEXT: s_mov_b32 s3, s5
1128 ; GFX90A-NEXT: s_mov_b32 s4, s6
1129 ; GFX90A-NEXT: s_mov_b32 s5, s7
1130 ; GFX90A-NEXT: s_mov_b32 s6, s8
1131 ; GFX90A-NEXT: s_mov_b32 s7, s9
1132 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1133 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1134 ; GFX90A-NEXT: image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc
1135 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1136 ; GFX90A-NEXT: ; return to shader part epilog
1138 ; GFX10PLUS-LABEL: atomic_add_i32_2d:
1139 ; GFX10PLUS: ; %bb.0: ; %main_body
1140 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1141 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1142 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1143 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1144 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1145 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1146 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1147 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1148 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc
1149 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1150 ; GFX10PLUS-NEXT: ; return to shader part epilog
1152 %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
1153 %out = bitcast i32 %v to float
1157 define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) {
1158 ; GFX6-LABEL: atomic_add_i32_3d:
1159 ; GFX6: ; %bb.0: ; %main_body
1160 ; GFX6-NEXT: s_mov_b32 s0, s2
1161 ; GFX6-NEXT: s_mov_b32 s1, s3
1162 ; GFX6-NEXT: s_mov_b32 s2, s4
1163 ; GFX6-NEXT: s_mov_b32 s3, s5
1164 ; GFX6-NEXT: s_mov_b32 s4, s6
1165 ; GFX6-NEXT: s_mov_b32 s5, s7
1166 ; GFX6-NEXT: s_mov_b32 s6, s8
1167 ; GFX6-NEXT: s_mov_b32 s7, s9
1168 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1169 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1170 ; GFX6-NEXT: ; return to shader part epilog
1172 ; GFX8-LABEL: atomic_add_i32_3d:
1173 ; GFX8: ; %bb.0: ; %main_body
1174 ; GFX8-NEXT: s_mov_b32 s0, s2
1175 ; GFX8-NEXT: s_mov_b32 s1, s3
1176 ; GFX8-NEXT: s_mov_b32 s2, s4
1177 ; GFX8-NEXT: s_mov_b32 s3, s5
1178 ; GFX8-NEXT: s_mov_b32 s4, s6
1179 ; GFX8-NEXT: s_mov_b32 s5, s7
1180 ; GFX8-NEXT: s_mov_b32 s6, s8
1181 ; GFX8-NEXT: s_mov_b32 s7, s9
1182 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1183 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1184 ; GFX8-NEXT: ; return to shader part epilog
1186 ; GFX900-LABEL: atomic_add_i32_3d:
1187 ; GFX900: ; %bb.0: ; %main_body
1188 ; GFX900-NEXT: s_mov_b32 s0, s2
1189 ; GFX900-NEXT: s_mov_b32 s1, s3
1190 ; GFX900-NEXT: s_mov_b32 s2, s4
1191 ; GFX900-NEXT: s_mov_b32 s3, s5
1192 ; GFX900-NEXT: s_mov_b32 s4, s6
1193 ; GFX900-NEXT: s_mov_b32 s5, s7
1194 ; GFX900-NEXT: s_mov_b32 s6, s8
1195 ; GFX900-NEXT: s_mov_b32 s7, s9
1196 ; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1197 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1198 ; GFX900-NEXT: ; return to shader part epilog
1200 ; GFX90A-LABEL: atomic_add_i32_3d:
1201 ; GFX90A: ; %bb.0: ; %main_body
1202 ; GFX90A-NEXT: s_mov_b32 s0, s2
1203 ; GFX90A-NEXT: s_mov_b32 s1, s3
1204 ; GFX90A-NEXT: s_mov_b32 s2, s4
1205 ; GFX90A-NEXT: s_mov_b32 s3, s5
1206 ; GFX90A-NEXT: s_mov_b32 s4, s6
1207 ; GFX90A-NEXT: s_mov_b32 s5, s7
1208 ; GFX90A-NEXT: s_mov_b32 s6, s8
1209 ; GFX90A-NEXT: s_mov_b32 s7, s9
1210 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1211 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1212 ; GFX90A-NEXT: v_mov_b32_e32 v6, v3
1213 ; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc
1214 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1215 ; GFX90A-NEXT: ; return to shader part epilog
1217 ; GFX10PLUS-LABEL: atomic_add_i32_3d:
1218 ; GFX10PLUS: ; %bb.0: ; %main_body
1219 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1220 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1221 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1222 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1223 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1224 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1225 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1226 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1227 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc
1228 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1229 ; GFX10PLUS-NEXT: ; return to shader part epilog
1231 %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
1232 %out = bitcast i32 %v to float
1236 define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) {
1237 ; GFX6-LABEL: atomic_add_i32_cube:
1238 ; GFX6: ; %bb.0: ; %main_body
1239 ; GFX6-NEXT: s_mov_b32 s0, s2
1240 ; GFX6-NEXT: s_mov_b32 s1, s3
1241 ; GFX6-NEXT: s_mov_b32 s2, s4
1242 ; GFX6-NEXT: s_mov_b32 s3, s5
1243 ; GFX6-NEXT: s_mov_b32 s4, s6
1244 ; GFX6-NEXT: s_mov_b32 s5, s7
1245 ; GFX6-NEXT: s_mov_b32 s6, s8
1246 ; GFX6-NEXT: s_mov_b32 s7, s9
1247 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1248 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1249 ; GFX6-NEXT: ; return to shader part epilog
1251 ; GFX8-LABEL: atomic_add_i32_cube:
1252 ; GFX8: ; %bb.0: ; %main_body
1253 ; GFX8-NEXT: s_mov_b32 s0, s2
1254 ; GFX8-NEXT: s_mov_b32 s1, s3
1255 ; GFX8-NEXT: s_mov_b32 s2, s4
1256 ; GFX8-NEXT: s_mov_b32 s3, s5
1257 ; GFX8-NEXT: s_mov_b32 s4, s6
1258 ; GFX8-NEXT: s_mov_b32 s5, s7
1259 ; GFX8-NEXT: s_mov_b32 s6, s8
1260 ; GFX8-NEXT: s_mov_b32 s7, s9
1261 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1262 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1263 ; GFX8-NEXT: ; return to shader part epilog
1265 ; GFX900-LABEL: atomic_add_i32_cube:
1266 ; GFX900: ; %bb.0: ; %main_body
1267 ; GFX900-NEXT: s_mov_b32 s0, s2
1268 ; GFX900-NEXT: s_mov_b32 s1, s3
1269 ; GFX900-NEXT: s_mov_b32 s2, s4
1270 ; GFX900-NEXT: s_mov_b32 s3, s5
1271 ; GFX900-NEXT: s_mov_b32 s4, s6
1272 ; GFX900-NEXT: s_mov_b32 s5, s7
1273 ; GFX900-NEXT: s_mov_b32 s6, s8
1274 ; GFX900-NEXT: s_mov_b32 s7, s9
1275 ; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1276 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1277 ; GFX900-NEXT: ; return to shader part epilog
1279 ; GFX90A-LABEL: atomic_add_i32_cube:
1280 ; GFX90A: ; %bb.0: ; %main_body
1281 ; GFX90A-NEXT: s_mov_b32 s0, s2
1282 ; GFX90A-NEXT: s_mov_b32 s1, s3
1283 ; GFX90A-NEXT: s_mov_b32 s2, s4
1284 ; GFX90A-NEXT: s_mov_b32 s3, s5
1285 ; GFX90A-NEXT: s_mov_b32 s4, s6
1286 ; GFX90A-NEXT: s_mov_b32 s5, s7
1287 ; GFX90A-NEXT: s_mov_b32 s6, s8
1288 ; GFX90A-NEXT: s_mov_b32 s7, s9
1289 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1290 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1291 ; GFX90A-NEXT: v_mov_b32_e32 v6, v3
1292 ; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da
1293 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1294 ; GFX90A-NEXT: ; return to shader part epilog
1296 ; GFX10PLUS-LABEL: atomic_add_i32_cube:
1297 ; GFX10PLUS: ; %bb.0: ; %main_body
1298 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1299 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1300 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1301 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1302 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1303 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1304 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1305 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1306 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc
1307 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1308 ; GFX10PLUS-NEXT: ; return to shader part epilog
1310 %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
1311 %out = bitcast i32 %v to float
1315 define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) {
1316 ; GFX6-LABEL: atomic_add_i32_1darray:
1317 ; GFX6: ; %bb.0: ; %main_body
1318 ; GFX6-NEXT: s_mov_b32 s0, s2
1319 ; GFX6-NEXT: s_mov_b32 s1, s3
1320 ; GFX6-NEXT: s_mov_b32 s2, s4
1321 ; GFX6-NEXT: s_mov_b32 s3, s5
1322 ; GFX6-NEXT: s_mov_b32 s4, s6
1323 ; GFX6-NEXT: s_mov_b32 s5, s7
1324 ; GFX6-NEXT: s_mov_b32 s6, s8
1325 ; GFX6-NEXT: s_mov_b32 s7, s9
1326 ; GFX6-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
1327 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1328 ; GFX6-NEXT: ; return to shader part epilog
1330 ; GFX8-LABEL: atomic_add_i32_1darray:
1331 ; GFX8: ; %bb.0: ; %main_body
1332 ; GFX8-NEXT: s_mov_b32 s0, s2
1333 ; GFX8-NEXT: s_mov_b32 s1, s3
1334 ; GFX8-NEXT: s_mov_b32 s2, s4
1335 ; GFX8-NEXT: s_mov_b32 s3, s5
1336 ; GFX8-NEXT: s_mov_b32 s4, s6
1337 ; GFX8-NEXT: s_mov_b32 s5, s7
1338 ; GFX8-NEXT: s_mov_b32 s6, s8
1339 ; GFX8-NEXT: s_mov_b32 s7, s9
1340 ; GFX8-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
1341 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1342 ; GFX8-NEXT: ; return to shader part epilog
1344 ; GFX900-LABEL: atomic_add_i32_1darray:
1345 ; GFX900: ; %bb.0: ; %main_body
1346 ; GFX900-NEXT: s_mov_b32 s0, s2
1347 ; GFX900-NEXT: s_mov_b32 s1, s3
1348 ; GFX900-NEXT: s_mov_b32 s2, s4
1349 ; GFX900-NEXT: s_mov_b32 s3, s5
1350 ; GFX900-NEXT: s_mov_b32 s4, s6
1351 ; GFX900-NEXT: s_mov_b32 s5, s7
1352 ; GFX900-NEXT: s_mov_b32 s6, s8
1353 ; GFX900-NEXT: s_mov_b32 s7, s9
1354 ; GFX900-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
1355 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1356 ; GFX900-NEXT: ; return to shader part epilog
1358 ; GFX90A-LABEL: atomic_add_i32_1darray:
1359 ; GFX90A: ; %bb.0: ; %main_body
1360 ; GFX90A-NEXT: s_mov_b32 s0, s2
1361 ; GFX90A-NEXT: s_mov_b32 s1, s3
1362 ; GFX90A-NEXT: s_mov_b32 s2, s4
1363 ; GFX90A-NEXT: s_mov_b32 s3, s5
1364 ; GFX90A-NEXT: s_mov_b32 s4, s6
1365 ; GFX90A-NEXT: s_mov_b32 s5, s7
1366 ; GFX90A-NEXT: s_mov_b32 s6, s8
1367 ; GFX90A-NEXT: s_mov_b32 s7, s9
1368 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1369 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1370 ; GFX90A-NEXT: image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc da
1371 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1372 ; GFX90A-NEXT: ; return to shader part epilog
1374 ; GFX10PLUS-LABEL: atomic_add_i32_1darray:
1375 ; GFX10PLUS: ; %bb.0: ; %main_body
1376 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1377 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1378 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1379 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1380 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1381 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1382 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1383 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1384 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc
1385 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1386 ; GFX10PLUS-NEXT: ; return to shader part epilog
1388 %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1389 %out = bitcast i32 %v to float
1393 define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) {
1394 ; GFX6-LABEL: atomic_add_i32_2darray:
1395 ; GFX6: ; %bb.0: ; %main_body
1396 ; GFX6-NEXT: s_mov_b32 s0, s2
1397 ; GFX6-NEXT: s_mov_b32 s1, s3
1398 ; GFX6-NEXT: s_mov_b32 s2, s4
1399 ; GFX6-NEXT: s_mov_b32 s3, s5
1400 ; GFX6-NEXT: s_mov_b32 s4, s6
1401 ; GFX6-NEXT: s_mov_b32 s5, s7
1402 ; GFX6-NEXT: s_mov_b32 s6, s8
1403 ; GFX6-NEXT: s_mov_b32 s7, s9
1404 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1405 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1406 ; GFX6-NEXT: ; return to shader part epilog
1408 ; GFX8-LABEL: atomic_add_i32_2darray:
1409 ; GFX8: ; %bb.0: ; %main_body
1410 ; GFX8-NEXT: s_mov_b32 s0, s2
1411 ; GFX8-NEXT: s_mov_b32 s1, s3
1412 ; GFX8-NEXT: s_mov_b32 s2, s4
1413 ; GFX8-NEXT: s_mov_b32 s3, s5
1414 ; GFX8-NEXT: s_mov_b32 s4, s6
1415 ; GFX8-NEXT: s_mov_b32 s5, s7
1416 ; GFX8-NEXT: s_mov_b32 s6, s8
1417 ; GFX8-NEXT: s_mov_b32 s7, s9
1418 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1419 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1420 ; GFX8-NEXT: ; return to shader part epilog
1422 ; GFX900-LABEL: atomic_add_i32_2darray:
1423 ; GFX900: ; %bb.0: ; %main_body
1424 ; GFX900-NEXT: s_mov_b32 s0, s2
1425 ; GFX900-NEXT: s_mov_b32 s1, s3
1426 ; GFX900-NEXT: s_mov_b32 s2, s4
1427 ; GFX900-NEXT: s_mov_b32 s3, s5
1428 ; GFX900-NEXT: s_mov_b32 s4, s6
1429 ; GFX900-NEXT: s_mov_b32 s5, s7
1430 ; GFX900-NEXT: s_mov_b32 s6, s8
1431 ; GFX900-NEXT: s_mov_b32 s7, s9
1432 ; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1433 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1434 ; GFX900-NEXT: ; return to shader part epilog
1436 ; GFX90A-LABEL: atomic_add_i32_2darray:
1437 ; GFX90A: ; %bb.0: ; %main_body
1438 ; GFX90A-NEXT: s_mov_b32 s0, s2
1439 ; GFX90A-NEXT: s_mov_b32 s1, s3
1440 ; GFX90A-NEXT: s_mov_b32 s2, s4
1441 ; GFX90A-NEXT: s_mov_b32 s3, s5
1442 ; GFX90A-NEXT: s_mov_b32 s4, s6
1443 ; GFX90A-NEXT: s_mov_b32 s5, s7
1444 ; GFX90A-NEXT: s_mov_b32 s6, s8
1445 ; GFX90A-NEXT: s_mov_b32 s7, s9
1446 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1447 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1448 ; GFX90A-NEXT: v_mov_b32_e32 v6, v3
1449 ; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da
1450 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1451 ; GFX90A-NEXT: ; return to shader part epilog
1453 ; GFX10PLUS-LABEL: atomic_add_i32_2darray:
1454 ; GFX10PLUS: ; %bb.0: ; %main_body
1455 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1456 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1457 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1458 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1459 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1460 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1461 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1462 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1463 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc
1464 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1465 ; GFX10PLUS-NEXT: ; return to shader part epilog
1467 %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1468 %out = bitcast i32 %v to float
1472 define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) {
1473 ; GFX6-LABEL: atomic_add_i32_2dmsaa:
1474 ; GFX6: ; %bb.0: ; %main_body
1475 ; GFX6-NEXT: s_mov_b32 s0, s2
1476 ; GFX6-NEXT: s_mov_b32 s1, s3
1477 ; GFX6-NEXT: s_mov_b32 s2, s4
1478 ; GFX6-NEXT: s_mov_b32 s3, s5
1479 ; GFX6-NEXT: s_mov_b32 s4, s6
1480 ; GFX6-NEXT: s_mov_b32 s5, s7
1481 ; GFX6-NEXT: s_mov_b32 s6, s8
1482 ; GFX6-NEXT: s_mov_b32 s7, s9
1483 ; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1484 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1485 ; GFX6-NEXT: ; return to shader part epilog
1487 ; GFX8-LABEL: atomic_add_i32_2dmsaa:
1488 ; GFX8: ; %bb.0: ; %main_body
1489 ; GFX8-NEXT: s_mov_b32 s0, s2
1490 ; GFX8-NEXT: s_mov_b32 s1, s3
1491 ; GFX8-NEXT: s_mov_b32 s2, s4
1492 ; GFX8-NEXT: s_mov_b32 s3, s5
1493 ; GFX8-NEXT: s_mov_b32 s4, s6
1494 ; GFX8-NEXT: s_mov_b32 s5, s7
1495 ; GFX8-NEXT: s_mov_b32 s6, s8
1496 ; GFX8-NEXT: s_mov_b32 s7, s9
1497 ; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1498 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1499 ; GFX8-NEXT: ; return to shader part epilog
1501 ; GFX900-LABEL: atomic_add_i32_2dmsaa:
1502 ; GFX900: ; %bb.0: ; %main_body
1503 ; GFX900-NEXT: s_mov_b32 s0, s2
1504 ; GFX900-NEXT: s_mov_b32 s1, s3
1505 ; GFX900-NEXT: s_mov_b32 s2, s4
1506 ; GFX900-NEXT: s_mov_b32 s3, s5
1507 ; GFX900-NEXT: s_mov_b32 s4, s6
1508 ; GFX900-NEXT: s_mov_b32 s5, s7
1509 ; GFX900-NEXT: s_mov_b32 s6, s8
1510 ; GFX900-NEXT: s_mov_b32 s7, s9
1511 ; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1512 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1513 ; GFX900-NEXT: ; return to shader part epilog
1515 ; GFX90A-LABEL: atomic_add_i32_2dmsaa:
1516 ; GFX90A: ; %bb.0: ; %main_body
1517 ; GFX90A-NEXT: s_mov_b32 s0, s2
1518 ; GFX90A-NEXT: s_mov_b32 s1, s3
1519 ; GFX90A-NEXT: s_mov_b32 s2, s4
1520 ; GFX90A-NEXT: s_mov_b32 s3, s5
1521 ; GFX90A-NEXT: s_mov_b32 s4, s6
1522 ; GFX90A-NEXT: s_mov_b32 s5, s7
1523 ; GFX90A-NEXT: s_mov_b32 s6, s8
1524 ; GFX90A-NEXT: s_mov_b32 s7, s9
1525 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1526 ; GFX90A-NEXT: v_mov_b32_e32 v5, v2
1527 ; GFX90A-NEXT: v_mov_b32_e32 v6, v3
1528 ; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc
1529 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1530 ; GFX90A-NEXT: ; return to shader part epilog
1532 ; GFX10PLUS-LABEL: atomic_add_i32_2dmsaa:
1533 ; GFX10PLUS: ; %bb.0: ; %main_body
1534 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1535 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1536 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1537 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1538 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1539 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1540 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1541 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1542 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc
1543 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1544 ; GFX10PLUS-NEXT: ; return to shader part epilog
1546 %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1547 %out = bitcast i32 %v to float
1551 define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
1552 ; GFX6-LABEL: atomic_add_i32_2darraymsaa:
1553 ; GFX6: ; %bb.0: ; %main_body
1554 ; GFX6-NEXT: s_mov_b32 s0, s2
1555 ; GFX6-NEXT: s_mov_b32 s1, s3
1556 ; GFX6-NEXT: s_mov_b32 s2, s4
1557 ; GFX6-NEXT: s_mov_b32 s3, s5
1558 ; GFX6-NEXT: s_mov_b32 s4, s6
1559 ; GFX6-NEXT: s_mov_b32 s5, s7
1560 ; GFX6-NEXT: s_mov_b32 s6, s8
1561 ; GFX6-NEXT: s_mov_b32 s7, s9
1562 ; GFX6-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
1563 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1564 ; GFX6-NEXT: ; return to shader part epilog
1566 ; GFX8-LABEL: atomic_add_i32_2darraymsaa:
1567 ; GFX8: ; %bb.0: ; %main_body
1568 ; GFX8-NEXT: s_mov_b32 s0, s2
1569 ; GFX8-NEXT: s_mov_b32 s1, s3
1570 ; GFX8-NEXT: s_mov_b32 s2, s4
1571 ; GFX8-NEXT: s_mov_b32 s3, s5
1572 ; GFX8-NEXT: s_mov_b32 s4, s6
1573 ; GFX8-NEXT: s_mov_b32 s5, s7
1574 ; GFX8-NEXT: s_mov_b32 s6, s8
1575 ; GFX8-NEXT: s_mov_b32 s7, s9
1576 ; GFX8-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
1577 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1578 ; GFX8-NEXT: ; return to shader part epilog
1580 ; GFX900-LABEL: atomic_add_i32_2darraymsaa:
1581 ; GFX900: ; %bb.0: ; %main_body
1582 ; GFX900-NEXT: s_mov_b32 s0, s2
1583 ; GFX900-NEXT: s_mov_b32 s1, s3
1584 ; GFX900-NEXT: s_mov_b32 s2, s4
1585 ; GFX900-NEXT: s_mov_b32 s3, s5
1586 ; GFX900-NEXT: s_mov_b32 s4, s6
1587 ; GFX900-NEXT: s_mov_b32 s5, s7
1588 ; GFX900-NEXT: s_mov_b32 s6, s8
1589 ; GFX900-NEXT: s_mov_b32 s7, s9
1590 ; GFX900-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
1591 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1592 ; GFX900-NEXT: ; return to shader part epilog
1594 ; GFX90A-LABEL: atomic_add_i32_2darraymsaa:
1595 ; GFX90A: ; %bb.0: ; %main_body
1596 ; GFX90A-NEXT: s_mov_b32 s0, s2
1597 ; GFX90A-NEXT: s_mov_b32 s1, s3
1598 ; GFX90A-NEXT: s_mov_b32 s2, s4
1599 ; GFX90A-NEXT: s_mov_b32 s3, s5
1600 ; GFX90A-NEXT: s_mov_b32 s4, s6
1601 ; GFX90A-NEXT: s_mov_b32 s5, s7
1602 ; GFX90A-NEXT: s_mov_b32 s6, s8
1603 ; GFX90A-NEXT: s_mov_b32 s7, s9
1604 ; GFX90A-NEXT: v_mov_b32_e32 v6, v1
1605 ; GFX90A-NEXT: v_mov_b32_e32 v7, v2
1606 ; GFX90A-NEXT: v_mov_b32_e32 v8, v3
1607 ; GFX90A-NEXT: v_mov_b32_e32 v9, v4
1608 ; GFX90A-NEXT: image_atomic_add v0, v[6:9], s[0:7] dmask:0x1 unorm glc da
1609 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1610 ; GFX90A-NEXT: ; return to shader part epilog
1612 ; GFX10PLUS-LABEL: atomic_add_i32_2darraymsaa:
1613 ; GFX10PLUS: ; %bb.0: ; %main_body
1614 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1615 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1616 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1617 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1618 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1619 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1620 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1621 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1622 ; GFX10PLUS-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc
1623 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1624 ; GFX10PLUS-NEXT: ; return to shader part epilog
1626 %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1627 %out = bitcast i32 %v to float
1631 define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
1632 ; GFX6-LABEL: atomic_add_i32_1d_slc:
1633 ; GFX6: ; %bb.0: ; %main_body
1634 ; GFX6-NEXT: s_mov_b32 s0, s2
1635 ; GFX6-NEXT: s_mov_b32 s1, s3
1636 ; GFX6-NEXT: s_mov_b32 s2, s4
1637 ; GFX6-NEXT: s_mov_b32 s3, s5
1638 ; GFX6-NEXT: s_mov_b32 s4, s6
1639 ; GFX6-NEXT: s_mov_b32 s5, s7
1640 ; GFX6-NEXT: s_mov_b32 s6, s8
1641 ; GFX6-NEXT: s_mov_b32 s7, s9
1642 ; GFX6-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1643 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1644 ; GFX6-NEXT: ; return to shader part epilog
1646 ; GFX8-LABEL: atomic_add_i32_1d_slc:
1647 ; GFX8: ; %bb.0: ; %main_body
1648 ; GFX8-NEXT: s_mov_b32 s0, s2
1649 ; GFX8-NEXT: s_mov_b32 s1, s3
1650 ; GFX8-NEXT: s_mov_b32 s2, s4
1651 ; GFX8-NEXT: s_mov_b32 s3, s5
1652 ; GFX8-NEXT: s_mov_b32 s4, s6
1653 ; GFX8-NEXT: s_mov_b32 s5, s7
1654 ; GFX8-NEXT: s_mov_b32 s6, s8
1655 ; GFX8-NEXT: s_mov_b32 s7, s9
1656 ; GFX8-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1657 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1658 ; GFX8-NEXT: ; return to shader part epilog
1660 ; GFX900-LABEL: atomic_add_i32_1d_slc:
1661 ; GFX900: ; %bb.0: ; %main_body
1662 ; GFX900-NEXT: s_mov_b32 s0, s2
1663 ; GFX900-NEXT: s_mov_b32 s1, s3
1664 ; GFX900-NEXT: s_mov_b32 s2, s4
1665 ; GFX900-NEXT: s_mov_b32 s3, s5
1666 ; GFX900-NEXT: s_mov_b32 s4, s6
1667 ; GFX900-NEXT: s_mov_b32 s5, s7
1668 ; GFX900-NEXT: s_mov_b32 s6, s8
1669 ; GFX900-NEXT: s_mov_b32 s7, s9
1670 ; GFX900-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1671 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1672 ; GFX900-NEXT: ; return to shader part epilog
1674 ; GFX90A-LABEL: atomic_add_i32_1d_slc:
1675 ; GFX90A: ; %bb.0: ; %main_body
1676 ; GFX90A-NEXT: s_mov_b32 s0, s2
1677 ; GFX90A-NEXT: s_mov_b32 s1, s3
1678 ; GFX90A-NEXT: s_mov_b32 s2, s4
1679 ; GFX90A-NEXT: s_mov_b32 s3, s5
1680 ; GFX90A-NEXT: s_mov_b32 s4, s6
1681 ; GFX90A-NEXT: s_mov_b32 s5, s7
1682 ; GFX90A-NEXT: s_mov_b32 s6, s8
1683 ; GFX90A-NEXT: s_mov_b32 s7, s9
1684 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
1685 ; GFX90A-NEXT: image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc slc
1686 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1687 ; GFX90A-NEXT: ; return to shader part epilog
1689 ; GFX10PLUS-LABEL: atomic_add_i32_1d_slc:
1690 ; GFX10PLUS: ; %bb.0: ; %main_body
1691 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1692 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1693 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1694 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1695 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1696 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1697 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1698 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1699 ; GFX10PLUS-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc
1700 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1701 ; GFX10PLUS-NEXT: ; return to shader part epilog
1703 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
1704 %out = bitcast i32 %v to float
1708 define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1709 ; GFX6-LABEL: atomic_swap_i64_1d:
1710 ; GFX6: ; %bb.0: ; %main_body
1711 ; GFX6-NEXT: s_mov_b32 s0, s2
1712 ; GFX6-NEXT: s_mov_b32 s1, s3
1713 ; GFX6-NEXT: s_mov_b32 s2, s4
1714 ; GFX6-NEXT: s_mov_b32 s3, s5
1715 ; GFX6-NEXT: s_mov_b32 s4, s6
1716 ; GFX6-NEXT: s_mov_b32 s5, s7
1717 ; GFX6-NEXT: s_mov_b32 s6, s8
1718 ; GFX6-NEXT: s_mov_b32 s7, s9
1719 ; GFX6-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1720 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1721 ; GFX6-NEXT: ; return to shader part epilog
1723 ; GFX8-LABEL: atomic_swap_i64_1d:
1724 ; GFX8: ; %bb.0: ; %main_body
1725 ; GFX8-NEXT: s_mov_b32 s0, s2
1726 ; GFX8-NEXT: s_mov_b32 s1, s3
1727 ; GFX8-NEXT: s_mov_b32 s2, s4
1728 ; GFX8-NEXT: s_mov_b32 s3, s5
1729 ; GFX8-NEXT: s_mov_b32 s4, s6
1730 ; GFX8-NEXT: s_mov_b32 s5, s7
1731 ; GFX8-NEXT: s_mov_b32 s6, s8
1732 ; GFX8-NEXT: s_mov_b32 s7, s9
1733 ; GFX8-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1734 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1735 ; GFX8-NEXT: ; return to shader part epilog
1737 ; GFX900-LABEL: atomic_swap_i64_1d:
1738 ; GFX900: ; %bb.0: ; %main_body
1739 ; GFX900-NEXT: s_mov_b32 s0, s2
1740 ; GFX900-NEXT: s_mov_b32 s1, s3
1741 ; GFX900-NEXT: s_mov_b32 s2, s4
1742 ; GFX900-NEXT: s_mov_b32 s3, s5
1743 ; GFX900-NEXT: s_mov_b32 s4, s6
1744 ; GFX900-NEXT: s_mov_b32 s5, s7
1745 ; GFX900-NEXT: s_mov_b32 s6, s8
1746 ; GFX900-NEXT: s_mov_b32 s7, s9
1747 ; GFX900-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1748 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1749 ; GFX900-NEXT: ; return to shader part epilog
1751 ; GFX90A-LABEL: atomic_swap_i64_1d:
1752 ; GFX90A: ; %bb.0: ; %main_body
1753 ; GFX90A-NEXT: s_mov_b32 s0, s2
1754 ; GFX90A-NEXT: s_mov_b32 s1, s3
1755 ; GFX90A-NEXT: s_mov_b32 s2, s4
1756 ; GFX90A-NEXT: s_mov_b32 s3, s5
1757 ; GFX90A-NEXT: s_mov_b32 s4, s6
1758 ; GFX90A-NEXT: s_mov_b32 s5, s7
1759 ; GFX90A-NEXT: s_mov_b32 s6, s8
1760 ; GFX90A-NEXT: s_mov_b32 s7, s9
1761 ; GFX90A-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1762 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1763 ; GFX90A-NEXT: ; return to shader part epilog
1765 ; GFX10PLUS-LABEL: atomic_swap_i64_1d:
1766 ; GFX10PLUS: ; %bb.0: ; %main_body
1767 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1768 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1769 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1770 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1771 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1772 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1773 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1774 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1775 ; GFX10PLUS-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1776 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1777 ; GFX10PLUS-NEXT: ; return to shader part epilog
1779 %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1780 %out = bitcast i64 %v to <2 x float>
1781 ret <2 x float> %out
1784 define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1785 ; GFX6-LABEL: atomic_add_i64_1d:
1786 ; GFX6: ; %bb.0: ; %main_body
1787 ; GFX6-NEXT: s_mov_b32 s0, s2
1788 ; GFX6-NEXT: s_mov_b32 s1, s3
1789 ; GFX6-NEXT: s_mov_b32 s2, s4
1790 ; GFX6-NEXT: s_mov_b32 s3, s5
1791 ; GFX6-NEXT: s_mov_b32 s4, s6
1792 ; GFX6-NEXT: s_mov_b32 s5, s7
1793 ; GFX6-NEXT: s_mov_b32 s6, s8
1794 ; GFX6-NEXT: s_mov_b32 s7, s9
1795 ; GFX6-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1796 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1797 ; GFX6-NEXT: ; return to shader part epilog
1799 ; GFX8-LABEL: atomic_add_i64_1d:
1800 ; GFX8: ; %bb.0: ; %main_body
1801 ; GFX8-NEXT: s_mov_b32 s0, s2
1802 ; GFX8-NEXT: s_mov_b32 s1, s3
1803 ; GFX8-NEXT: s_mov_b32 s2, s4
1804 ; GFX8-NEXT: s_mov_b32 s3, s5
1805 ; GFX8-NEXT: s_mov_b32 s4, s6
1806 ; GFX8-NEXT: s_mov_b32 s5, s7
1807 ; GFX8-NEXT: s_mov_b32 s6, s8
1808 ; GFX8-NEXT: s_mov_b32 s7, s9
1809 ; GFX8-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1810 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1811 ; GFX8-NEXT: ; return to shader part epilog
1813 ; GFX900-LABEL: atomic_add_i64_1d:
1814 ; GFX900: ; %bb.0: ; %main_body
1815 ; GFX900-NEXT: s_mov_b32 s0, s2
1816 ; GFX900-NEXT: s_mov_b32 s1, s3
1817 ; GFX900-NEXT: s_mov_b32 s2, s4
1818 ; GFX900-NEXT: s_mov_b32 s3, s5
1819 ; GFX900-NEXT: s_mov_b32 s4, s6
1820 ; GFX900-NEXT: s_mov_b32 s5, s7
1821 ; GFX900-NEXT: s_mov_b32 s6, s8
1822 ; GFX900-NEXT: s_mov_b32 s7, s9
1823 ; GFX900-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1824 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1825 ; GFX900-NEXT: ; return to shader part epilog
1827 ; GFX90A-LABEL: atomic_add_i64_1d:
1828 ; GFX90A: ; %bb.0: ; %main_body
1829 ; GFX90A-NEXT: s_mov_b32 s0, s2
1830 ; GFX90A-NEXT: s_mov_b32 s1, s3
1831 ; GFX90A-NEXT: s_mov_b32 s2, s4
1832 ; GFX90A-NEXT: s_mov_b32 s3, s5
1833 ; GFX90A-NEXT: s_mov_b32 s4, s6
1834 ; GFX90A-NEXT: s_mov_b32 s5, s7
1835 ; GFX90A-NEXT: s_mov_b32 s6, s8
1836 ; GFX90A-NEXT: s_mov_b32 s7, s9
1837 ; GFX90A-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1838 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1839 ; GFX90A-NEXT: ; return to shader part epilog
1841 ; GFX10PLUS-LABEL: atomic_add_i64_1d:
1842 ; GFX10PLUS: ; %bb.0: ; %main_body
1843 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1844 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1845 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1846 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1847 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1848 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1849 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1850 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1851 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1852 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1853 ; GFX10PLUS-NEXT: ; return to shader part epilog
1855 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1856 %out = bitcast i64 %v to <2 x float>
1857 ret <2 x float> %out
1860 define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1861 ; GFX6-LABEL: atomic_sub_i64_1d:
1862 ; GFX6: ; %bb.0: ; %main_body
1863 ; GFX6-NEXT: s_mov_b32 s0, s2
1864 ; GFX6-NEXT: s_mov_b32 s1, s3
1865 ; GFX6-NEXT: s_mov_b32 s2, s4
1866 ; GFX6-NEXT: s_mov_b32 s3, s5
1867 ; GFX6-NEXT: s_mov_b32 s4, s6
1868 ; GFX6-NEXT: s_mov_b32 s5, s7
1869 ; GFX6-NEXT: s_mov_b32 s6, s8
1870 ; GFX6-NEXT: s_mov_b32 s7, s9
1871 ; GFX6-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1872 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1873 ; GFX6-NEXT: ; return to shader part epilog
1875 ; GFX8-LABEL: atomic_sub_i64_1d:
1876 ; GFX8: ; %bb.0: ; %main_body
1877 ; GFX8-NEXT: s_mov_b32 s0, s2
1878 ; GFX8-NEXT: s_mov_b32 s1, s3
1879 ; GFX8-NEXT: s_mov_b32 s2, s4
1880 ; GFX8-NEXT: s_mov_b32 s3, s5
1881 ; GFX8-NEXT: s_mov_b32 s4, s6
1882 ; GFX8-NEXT: s_mov_b32 s5, s7
1883 ; GFX8-NEXT: s_mov_b32 s6, s8
1884 ; GFX8-NEXT: s_mov_b32 s7, s9
1885 ; GFX8-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1886 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1887 ; GFX8-NEXT: ; return to shader part epilog
1889 ; GFX900-LABEL: atomic_sub_i64_1d:
1890 ; GFX900: ; %bb.0: ; %main_body
1891 ; GFX900-NEXT: s_mov_b32 s0, s2
1892 ; GFX900-NEXT: s_mov_b32 s1, s3
1893 ; GFX900-NEXT: s_mov_b32 s2, s4
1894 ; GFX900-NEXT: s_mov_b32 s3, s5
1895 ; GFX900-NEXT: s_mov_b32 s4, s6
1896 ; GFX900-NEXT: s_mov_b32 s5, s7
1897 ; GFX900-NEXT: s_mov_b32 s6, s8
1898 ; GFX900-NEXT: s_mov_b32 s7, s9
1899 ; GFX900-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1900 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1901 ; GFX900-NEXT: ; return to shader part epilog
1903 ; GFX90A-LABEL: atomic_sub_i64_1d:
1904 ; GFX90A: ; %bb.0: ; %main_body
1905 ; GFX90A-NEXT: s_mov_b32 s0, s2
1906 ; GFX90A-NEXT: s_mov_b32 s1, s3
1907 ; GFX90A-NEXT: s_mov_b32 s2, s4
1908 ; GFX90A-NEXT: s_mov_b32 s3, s5
1909 ; GFX90A-NEXT: s_mov_b32 s4, s6
1910 ; GFX90A-NEXT: s_mov_b32 s5, s7
1911 ; GFX90A-NEXT: s_mov_b32 s6, s8
1912 ; GFX90A-NEXT: s_mov_b32 s7, s9
1913 ; GFX90A-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1914 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1915 ; GFX90A-NEXT: ; return to shader part epilog
1917 ; GFX10PLUS-LABEL: atomic_sub_i64_1d:
1918 ; GFX10PLUS: ; %bb.0: ; %main_body
1919 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1920 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1921 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1922 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1923 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1924 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1925 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1926 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1927 ; GFX10PLUS-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1928 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1929 ; GFX10PLUS-NEXT: ; return to shader part epilog
1931 %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1932 %out = bitcast i64 %v to <2 x float>
1933 ret <2 x float> %out
1936 define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
1937 ; GFX6-LABEL: atomic_smin_i64_1d:
1938 ; GFX6: ; %bb.0: ; %main_body
1939 ; GFX6-NEXT: s_mov_b32 s0, s2
1940 ; GFX6-NEXT: s_mov_b32 s1, s3
1941 ; GFX6-NEXT: s_mov_b32 s2, s4
1942 ; GFX6-NEXT: s_mov_b32 s3, s5
1943 ; GFX6-NEXT: s_mov_b32 s4, s6
1944 ; GFX6-NEXT: s_mov_b32 s5, s7
1945 ; GFX6-NEXT: s_mov_b32 s6, s8
1946 ; GFX6-NEXT: s_mov_b32 s7, s9
1947 ; GFX6-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1948 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1949 ; GFX6-NEXT: ; return to shader part epilog
1951 ; GFX8-LABEL: atomic_smin_i64_1d:
1952 ; GFX8: ; %bb.0: ; %main_body
1953 ; GFX8-NEXT: s_mov_b32 s0, s2
1954 ; GFX8-NEXT: s_mov_b32 s1, s3
1955 ; GFX8-NEXT: s_mov_b32 s2, s4
1956 ; GFX8-NEXT: s_mov_b32 s3, s5
1957 ; GFX8-NEXT: s_mov_b32 s4, s6
1958 ; GFX8-NEXT: s_mov_b32 s5, s7
1959 ; GFX8-NEXT: s_mov_b32 s6, s8
1960 ; GFX8-NEXT: s_mov_b32 s7, s9
1961 ; GFX8-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1962 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1963 ; GFX8-NEXT: ; return to shader part epilog
1965 ; GFX900-LABEL: atomic_smin_i64_1d:
1966 ; GFX900: ; %bb.0: ; %main_body
1967 ; GFX900-NEXT: s_mov_b32 s0, s2
1968 ; GFX900-NEXT: s_mov_b32 s1, s3
1969 ; GFX900-NEXT: s_mov_b32 s2, s4
1970 ; GFX900-NEXT: s_mov_b32 s3, s5
1971 ; GFX900-NEXT: s_mov_b32 s4, s6
1972 ; GFX900-NEXT: s_mov_b32 s5, s7
1973 ; GFX900-NEXT: s_mov_b32 s6, s8
1974 ; GFX900-NEXT: s_mov_b32 s7, s9
1975 ; GFX900-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1976 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1977 ; GFX900-NEXT: ; return to shader part epilog
1979 ; GFX90A-LABEL: atomic_smin_i64_1d:
1980 ; GFX90A: ; %bb.0: ; %main_body
1981 ; GFX90A-NEXT: s_mov_b32 s0, s2
1982 ; GFX90A-NEXT: s_mov_b32 s1, s3
1983 ; GFX90A-NEXT: s_mov_b32 s2, s4
1984 ; GFX90A-NEXT: s_mov_b32 s3, s5
1985 ; GFX90A-NEXT: s_mov_b32 s4, s6
1986 ; GFX90A-NEXT: s_mov_b32 s5, s7
1987 ; GFX90A-NEXT: s_mov_b32 s6, s8
1988 ; GFX90A-NEXT: s_mov_b32 s7, s9
1989 ; GFX90A-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1990 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1991 ; GFX90A-NEXT: ; return to shader part epilog
1993 ; GFX10PLUS-LABEL: atomic_smin_i64_1d:
1994 ; GFX10PLUS: ; %bb.0: ; %main_body
1995 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1996 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1997 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1998 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1999 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2000 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2001 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2002 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2003 ; GFX10PLUS-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2004 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2005 ; GFX10PLUS-NEXT: ; return to shader part epilog
2007 %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2008 %out = bitcast i64 %v to <2 x float>
2009 ret <2 x float> %out
2012 define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2013 ; GFX6-LABEL: atomic_umin_i64_1d:
2014 ; GFX6: ; %bb.0: ; %main_body
2015 ; GFX6-NEXT: s_mov_b32 s0, s2
2016 ; GFX6-NEXT: s_mov_b32 s1, s3
2017 ; GFX6-NEXT: s_mov_b32 s2, s4
2018 ; GFX6-NEXT: s_mov_b32 s3, s5
2019 ; GFX6-NEXT: s_mov_b32 s4, s6
2020 ; GFX6-NEXT: s_mov_b32 s5, s7
2021 ; GFX6-NEXT: s_mov_b32 s6, s8
2022 ; GFX6-NEXT: s_mov_b32 s7, s9
2023 ; GFX6-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2024 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2025 ; GFX6-NEXT: ; return to shader part epilog
2027 ; GFX8-LABEL: atomic_umin_i64_1d:
2028 ; GFX8: ; %bb.0: ; %main_body
2029 ; GFX8-NEXT: s_mov_b32 s0, s2
2030 ; GFX8-NEXT: s_mov_b32 s1, s3
2031 ; GFX8-NEXT: s_mov_b32 s2, s4
2032 ; GFX8-NEXT: s_mov_b32 s3, s5
2033 ; GFX8-NEXT: s_mov_b32 s4, s6
2034 ; GFX8-NEXT: s_mov_b32 s5, s7
2035 ; GFX8-NEXT: s_mov_b32 s6, s8
2036 ; GFX8-NEXT: s_mov_b32 s7, s9
2037 ; GFX8-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2038 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2039 ; GFX8-NEXT: ; return to shader part epilog
2041 ; GFX900-LABEL: atomic_umin_i64_1d:
2042 ; GFX900: ; %bb.0: ; %main_body
2043 ; GFX900-NEXT: s_mov_b32 s0, s2
2044 ; GFX900-NEXT: s_mov_b32 s1, s3
2045 ; GFX900-NEXT: s_mov_b32 s2, s4
2046 ; GFX900-NEXT: s_mov_b32 s3, s5
2047 ; GFX900-NEXT: s_mov_b32 s4, s6
2048 ; GFX900-NEXT: s_mov_b32 s5, s7
2049 ; GFX900-NEXT: s_mov_b32 s6, s8
2050 ; GFX900-NEXT: s_mov_b32 s7, s9
2051 ; GFX900-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2052 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2053 ; GFX900-NEXT: ; return to shader part epilog
2055 ; GFX90A-LABEL: atomic_umin_i64_1d:
2056 ; GFX90A: ; %bb.0: ; %main_body
2057 ; GFX90A-NEXT: s_mov_b32 s0, s2
2058 ; GFX90A-NEXT: s_mov_b32 s1, s3
2059 ; GFX90A-NEXT: s_mov_b32 s2, s4
2060 ; GFX90A-NEXT: s_mov_b32 s3, s5
2061 ; GFX90A-NEXT: s_mov_b32 s4, s6
2062 ; GFX90A-NEXT: s_mov_b32 s5, s7
2063 ; GFX90A-NEXT: s_mov_b32 s6, s8
2064 ; GFX90A-NEXT: s_mov_b32 s7, s9
2065 ; GFX90A-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2066 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2067 ; GFX90A-NEXT: ; return to shader part epilog
2069 ; GFX10PLUS-LABEL: atomic_umin_i64_1d:
2070 ; GFX10PLUS: ; %bb.0: ; %main_body
2071 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2072 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2073 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2074 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2075 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2076 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2077 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2078 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2079 ; GFX10PLUS-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2080 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2081 ; GFX10PLUS-NEXT: ; return to shader part epilog
2083 %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2084 %out = bitcast i64 %v to <2 x float>
2085 ret <2 x float> %out
2088 define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2089 ; GFX6-LABEL: atomic_smax_i64_1d:
2090 ; GFX6: ; %bb.0: ; %main_body
2091 ; GFX6-NEXT: s_mov_b32 s0, s2
2092 ; GFX6-NEXT: s_mov_b32 s1, s3
2093 ; GFX6-NEXT: s_mov_b32 s2, s4
2094 ; GFX6-NEXT: s_mov_b32 s3, s5
2095 ; GFX6-NEXT: s_mov_b32 s4, s6
2096 ; GFX6-NEXT: s_mov_b32 s5, s7
2097 ; GFX6-NEXT: s_mov_b32 s6, s8
2098 ; GFX6-NEXT: s_mov_b32 s7, s9
2099 ; GFX6-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2100 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2101 ; GFX6-NEXT: ; return to shader part epilog
2103 ; GFX8-LABEL: atomic_smax_i64_1d:
2104 ; GFX8: ; %bb.0: ; %main_body
2105 ; GFX8-NEXT: s_mov_b32 s0, s2
2106 ; GFX8-NEXT: s_mov_b32 s1, s3
2107 ; GFX8-NEXT: s_mov_b32 s2, s4
2108 ; GFX8-NEXT: s_mov_b32 s3, s5
2109 ; GFX8-NEXT: s_mov_b32 s4, s6
2110 ; GFX8-NEXT: s_mov_b32 s5, s7
2111 ; GFX8-NEXT: s_mov_b32 s6, s8
2112 ; GFX8-NEXT: s_mov_b32 s7, s9
2113 ; GFX8-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2114 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2115 ; GFX8-NEXT: ; return to shader part epilog
2117 ; GFX900-LABEL: atomic_smax_i64_1d:
2118 ; GFX900: ; %bb.0: ; %main_body
2119 ; GFX900-NEXT: s_mov_b32 s0, s2
2120 ; GFX900-NEXT: s_mov_b32 s1, s3
2121 ; GFX900-NEXT: s_mov_b32 s2, s4
2122 ; GFX900-NEXT: s_mov_b32 s3, s5
2123 ; GFX900-NEXT: s_mov_b32 s4, s6
2124 ; GFX900-NEXT: s_mov_b32 s5, s7
2125 ; GFX900-NEXT: s_mov_b32 s6, s8
2126 ; GFX900-NEXT: s_mov_b32 s7, s9
2127 ; GFX900-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2128 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2129 ; GFX900-NEXT: ; return to shader part epilog
2131 ; GFX90A-LABEL: atomic_smax_i64_1d:
2132 ; GFX90A: ; %bb.0: ; %main_body
2133 ; GFX90A-NEXT: s_mov_b32 s0, s2
2134 ; GFX90A-NEXT: s_mov_b32 s1, s3
2135 ; GFX90A-NEXT: s_mov_b32 s2, s4
2136 ; GFX90A-NEXT: s_mov_b32 s3, s5
2137 ; GFX90A-NEXT: s_mov_b32 s4, s6
2138 ; GFX90A-NEXT: s_mov_b32 s5, s7
2139 ; GFX90A-NEXT: s_mov_b32 s6, s8
2140 ; GFX90A-NEXT: s_mov_b32 s7, s9
2141 ; GFX90A-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2142 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2143 ; GFX90A-NEXT: ; return to shader part epilog
2145 ; GFX10PLUS-LABEL: atomic_smax_i64_1d:
2146 ; GFX10PLUS: ; %bb.0: ; %main_body
2147 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2148 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2149 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2150 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2151 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2152 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2153 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2154 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2155 ; GFX10PLUS-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2156 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2157 ; GFX10PLUS-NEXT: ; return to shader part epilog
2159 %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2160 %out = bitcast i64 %v to <2 x float>
2161 ret <2 x float> %out
2164 define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2165 ; GFX6-LABEL: atomic_umax_i64_1d:
2166 ; GFX6: ; %bb.0: ; %main_body
2167 ; GFX6-NEXT: s_mov_b32 s0, s2
2168 ; GFX6-NEXT: s_mov_b32 s1, s3
2169 ; GFX6-NEXT: s_mov_b32 s2, s4
2170 ; GFX6-NEXT: s_mov_b32 s3, s5
2171 ; GFX6-NEXT: s_mov_b32 s4, s6
2172 ; GFX6-NEXT: s_mov_b32 s5, s7
2173 ; GFX6-NEXT: s_mov_b32 s6, s8
2174 ; GFX6-NEXT: s_mov_b32 s7, s9
2175 ; GFX6-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2176 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2177 ; GFX6-NEXT: ; return to shader part epilog
2179 ; GFX8-LABEL: atomic_umax_i64_1d:
2180 ; GFX8: ; %bb.0: ; %main_body
2181 ; GFX8-NEXT: s_mov_b32 s0, s2
2182 ; GFX8-NEXT: s_mov_b32 s1, s3
2183 ; GFX8-NEXT: s_mov_b32 s2, s4
2184 ; GFX8-NEXT: s_mov_b32 s3, s5
2185 ; GFX8-NEXT: s_mov_b32 s4, s6
2186 ; GFX8-NEXT: s_mov_b32 s5, s7
2187 ; GFX8-NEXT: s_mov_b32 s6, s8
2188 ; GFX8-NEXT: s_mov_b32 s7, s9
2189 ; GFX8-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2190 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2191 ; GFX8-NEXT: ; return to shader part epilog
2193 ; GFX900-LABEL: atomic_umax_i64_1d:
2194 ; GFX900: ; %bb.0: ; %main_body
2195 ; GFX900-NEXT: s_mov_b32 s0, s2
2196 ; GFX900-NEXT: s_mov_b32 s1, s3
2197 ; GFX900-NEXT: s_mov_b32 s2, s4
2198 ; GFX900-NEXT: s_mov_b32 s3, s5
2199 ; GFX900-NEXT: s_mov_b32 s4, s6
2200 ; GFX900-NEXT: s_mov_b32 s5, s7
2201 ; GFX900-NEXT: s_mov_b32 s6, s8
2202 ; GFX900-NEXT: s_mov_b32 s7, s9
2203 ; GFX900-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2204 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2205 ; GFX900-NEXT: ; return to shader part epilog
2207 ; GFX90A-LABEL: atomic_umax_i64_1d:
2208 ; GFX90A: ; %bb.0: ; %main_body
2209 ; GFX90A-NEXT: s_mov_b32 s0, s2
2210 ; GFX90A-NEXT: s_mov_b32 s1, s3
2211 ; GFX90A-NEXT: s_mov_b32 s2, s4
2212 ; GFX90A-NEXT: s_mov_b32 s3, s5
2213 ; GFX90A-NEXT: s_mov_b32 s4, s6
2214 ; GFX90A-NEXT: s_mov_b32 s5, s7
2215 ; GFX90A-NEXT: s_mov_b32 s6, s8
2216 ; GFX90A-NEXT: s_mov_b32 s7, s9
2217 ; GFX90A-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2218 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2219 ; GFX90A-NEXT: ; return to shader part epilog
2221 ; GFX10PLUS-LABEL: atomic_umax_i64_1d:
2222 ; GFX10PLUS: ; %bb.0: ; %main_body
2223 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2224 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2225 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2226 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2227 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2228 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2229 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2230 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2231 ; GFX10PLUS-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2232 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2233 ; GFX10PLUS-NEXT: ; return to shader part epilog
2235 %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2236 %out = bitcast i64 %v to <2 x float>
2237 ret <2 x float> %out
2240 define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2241 ; GFX6-LABEL: atomic_and_i64_1d:
2242 ; GFX6: ; %bb.0: ; %main_body
2243 ; GFX6-NEXT: s_mov_b32 s0, s2
2244 ; GFX6-NEXT: s_mov_b32 s1, s3
2245 ; GFX6-NEXT: s_mov_b32 s2, s4
2246 ; GFX6-NEXT: s_mov_b32 s3, s5
2247 ; GFX6-NEXT: s_mov_b32 s4, s6
2248 ; GFX6-NEXT: s_mov_b32 s5, s7
2249 ; GFX6-NEXT: s_mov_b32 s6, s8
2250 ; GFX6-NEXT: s_mov_b32 s7, s9
2251 ; GFX6-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2252 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2253 ; GFX6-NEXT: ; return to shader part epilog
2255 ; GFX8-LABEL: atomic_and_i64_1d:
2256 ; GFX8: ; %bb.0: ; %main_body
2257 ; GFX8-NEXT: s_mov_b32 s0, s2
2258 ; GFX8-NEXT: s_mov_b32 s1, s3
2259 ; GFX8-NEXT: s_mov_b32 s2, s4
2260 ; GFX8-NEXT: s_mov_b32 s3, s5
2261 ; GFX8-NEXT: s_mov_b32 s4, s6
2262 ; GFX8-NEXT: s_mov_b32 s5, s7
2263 ; GFX8-NEXT: s_mov_b32 s6, s8
2264 ; GFX8-NEXT: s_mov_b32 s7, s9
2265 ; GFX8-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2266 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2267 ; GFX8-NEXT: ; return to shader part epilog
2269 ; GFX900-LABEL: atomic_and_i64_1d:
2270 ; GFX900: ; %bb.0: ; %main_body
2271 ; GFX900-NEXT: s_mov_b32 s0, s2
2272 ; GFX900-NEXT: s_mov_b32 s1, s3
2273 ; GFX900-NEXT: s_mov_b32 s2, s4
2274 ; GFX900-NEXT: s_mov_b32 s3, s5
2275 ; GFX900-NEXT: s_mov_b32 s4, s6
2276 ; GFX900-NEXT: s_mov_b32 s5, s7
2277 ; GFX900-NEXT: s_mov_b32 s6, s8
2278 ; GFX900-NEXT: s_mov_b32 s7, s9
2279 ; GFX900-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2280 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2281 ; GFX900-NEXT: ; return to shader part epilog
2283 ; GFX90A-LABEL: atomic_and_i64_1d:
2284 ; GFX90A: ; %bb.0: ; %main_body
2285 ; GFX90A-NEXT: s_mov_b32 s0, s2
2286 ; GFX90A-NEXT: s_mov_b32 s1, s3
2287 ; GFX90A-NEXT: s_mov_b32 s2, s4
2288 ; GFX90A-NEXT: s_mov_b32 s3, s5
2289 ; GFX90A-NEXT: s_mov_b32 s4, s6
2290 ; GFX90A-NEXT: s_mov_b32 s5, s7
2291 ; GFX90A-NEXT: s_mov_b32 s6, s8
2292 ; GFX90A-NEXT: s_mov_b32 s7, s9
2293 ; GFX90A-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2294 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2295 ; GFX90A-NEXT: ; return to shader part epilog
2297 ; GFX10PLUS-LABEL: atomic_and_i64_1d:
2298 ; GFX10PLUS: ; %bb.0: ; %main_body
2299 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2300 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2301 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2302 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2303 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2304 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2305 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2306 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2307 ; GFX10PLUS-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2308 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2309 ; GFX10PLUS-NEXT: ; return to shader part epilog
2311 %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2312 %out = bitcast i64 %v to <2 x float>
2313 ret <2 x float> %out
2316 define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2317 ; GFX6-LABEL: atomic_or_i64_1d:
2318 ; GFX6: ; %bb.0: ; %main_body
2319 ; GFX6-NEXT: s_mov_b32 s0, s2
2320 ; GFX6-NEXT: s_mov_b32 s1, s3
2321 ; GFX6-NEXT: s_mov_b32 s2, s4
2322 ; GFX6-NEXT: s_mov_b32 s3, s5
2323 ; GFX6-NEXT: s_mov_b32 s4, s6
2324 ; GFX6-NEXT: s_mov_b32 s5, s7
2325 ; GFX6-NEXT: s_mov_b32 s6, s8
2326 ; GFX6-NEXT: s_mov_b32 s7, s9
2327 ; GFX6-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2328 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2329 ; GFX6-NEXT: ; return to shader part epilog
2331 ; GFX8-LABEL: atomic_or_i64_1d:
2332 ; GFX8: ; %bb.0: ; %main_body
2333 ; GFX8-NEXT: s_mov_b32 s0, s2
2334 ; GFX8-NEXT: s_mov_b32 s1, s3
2335 ; GFX8-NEXT: s_mov_b32 s2, s4
2336 ; GFX8-NEXT: s_mov_b32 s3, s5
2337 ; GFX8-NEXT: s_mov_b32 s4, s6
2338 ; GFX8-NEXT: s_mov_b32 s5, s7
2339 ; GFX8-NEXT: s_mov_b32 s6, s8
2340 ; GFX8-NEXT: s_mov_b32 s7, s9
2341 ; GFX8-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2342 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2343 ; GFX8-NEXT: ; return to shader part epilog
2345 ; GFX900-LABEL: atomic_or_i64_1d:
2346 ; GFX900: ; %bb.0: ; %main_body
2347 ; GFX900-NEXT: s_mov_b32 s0, s2
2348 ; GFX900-NEXT: s_mov_b32 s1, s3
2349 ; GFX900-NEXT: s_mov_b32 s2, s4
2350 ; GFX900-NEXT: s_mov_b32 s3, s5
2351 ; GFX900-NEXT: s_mov_b32 s4, s6
2352 ; GFX900-NEXT: s_mov_b32 s5, s7
2353 ; GFX900-NEXT: s_mov_b32 s6, s8
2354 ; GFX900-NEXT: s_mov_b32 s7, s9
2355 ; GFX900-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2356 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2357 ; GFX900-NEXT: ; return to shader part epilog
2359 ; GFX90A-LABEL: atomic_or_i64_1d:
2360 ; GFX90A: ; %bb.0: ; %main_body
2361 ; GFX90A-NEXT: s_mov_b32 s0, s2
2362 ; GFX90A-NEXT: s_mov_b32 s1, s3
2363 ; GFX90A-NEXT: s_mov_b32 s2, s4
2364 ; GFX90A-NEXT: s_mov_b32 s3, s5
2365 ; GFX90A-NEXT: s_mov_b32 s4, s6
2366 ; GFX90A-NEXT: s_mov_b32 s5, s7
2367 ; GFX90A-NEXT: s_mov_b32 s6, s8
2368 ; GFX90A-NEXT: s_mov_b32 s7, s9
2369 ; GFX90A-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2370 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2371 ; GFX90A-NEXT: ; return to shader part epilog
2373 ; GFX10PLUS-LABEL: atomic_or_i64_1d:
2374 ; GFX10PLUS: ; %bb.0: ; %main_body
2375 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2376 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2377 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2378 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2379 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2380 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2381 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2382 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2383 ; GFX10PLUS-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2384 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2385 ; GFX10PLUS-NEXT: ; return to shader part epilog
2387 %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2388 %out = bitcast i64 %v to <2 x float>
2389 ret <2 x float> %out
2392 define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2393 ; GFX6-LABEL: atomic_xor_i64_1d:
2394 ; GFX6: ; %bb.0: ; %main_body
2395 ; GFX6-NEXT: s_mov_b32 s0, s2
2396 ; GFX6-NEXT: s_mov_b32 s1, s3
2397 ; GFX6-NEXT: s_mov_b32 s2, s4
2398 ; GFX6-NEXT: s_mov_b32 s3, s5
2399 ; GFX6-NEXT: s_mov_b32 s4, s6
2400 ; GFX6-NEXT: s_mov_b32 s5, s7
2401 ; GFX6-NEXT: s_mov_b32 s6, s8
2402 ; GFX6-NEXT: s_mov_b32 s7, s9
2403 ; GFX6-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2404 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2405 ; GFX6-NEXT: ; return to shader part epilog
2407 ; GFX8-LABEL: atomic_xor_i64_1d:
2408 ; GFX8: ; %bb.0: ; %main_body
2409 ; GFX8-NEXT: s_mov_b32 s0, s2
2410 ; GFX8-NEXT: s_mov_b32 s1, s3
2411 ; GFX8-NEXT: s_mov_b32 s2, s4
2412 ; GFX8-NEXT: s_mov_b32 s3, s5
2413 ; GFX8-NEXT: s_mov_b32 s4, s6
2414 ; GFX8-NEXT: s_mov_b32 s5, s7
2415 ; GFX8-NEXT: s_mov_b32 s6, s8
2416 ; GFX8-NEXT: s_mov_b32 s7, s9
2417 ; GFX8-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2418 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2419 ; GFX8-NEXT: ; return to shader part epilog
2421 ; GFX900-LABEL: atomic_xor_i64_1d:
2422 ; GFX900: ; %bb.0: ; %main_body
2423 ; GFX900-NEXT: s_mov_b32 s0, s2
2424 ; GFX900-NEXT: s_mov_b32 s1, s3
2425 ; GFX900-NEXT: s_mov_b32 s2, s4
2426 ; GFX900-NEXT: s_mov_b32 s3, s5
2427 ; GFX900-NEXT: s_mov_b32 s4, s6
2428 ; GFX900-NEXT: s_mov_b32 s5, s7
2429 ; GFX900-NEXT: s_mov_b32 s6, s8
2430 ; GFX900-NEXT: s_mov_b32 s7, s9
2431 ; GFX900-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2432 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2433 ; GFX900-NEXT: ; return to shader part epilog
2435 ; GFX90A-LABEL: atomic_xor_i64_1d:
2436 ; GFX90A: ; %bb.0: ; %main_body
2437 ; GFX90A-NEXT: s_mov_b32 s0, s2
2438 ; GFX90A-NEXT: s_mov_b32 s1, s3
2439 ; GFX90A-NEXT: s_mov_b32 s2, s4
2440 ; GFX90A-NEXT: s_mov_b32 s3, s5
2441 ; GFX90A-NEXT: s_mov_b32 s4, s6
2442 ; GFX90A-NEXT: s_mov_b32 s5, s7
2443 ; GFX90A-NEXT: s_mov_b32 s6, s8
2444 ; GFX90A-NEXT: s_mov_b32 s7, s9
2445 ; GFX90A-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2446 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2447 ; GFX90A-NEXT: ; return to shader part epilog
2449 ; GFX10PLUS-LABEL: atomic_xor_i64_1d:
2450 ; GFX10PLUS: ; %bb.0: ; %main_body
2451 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2452 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2453 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2454 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2455 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2456 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2457 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2458 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2459 ; GFX10PLUS-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2460 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2461 ; GFX10PLUS-NEXT: ; return to shader part epilog
2463 %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2464 %out = bitcast i64 %v to <2 x float>
2465 ret <2 x float> %out
2468 define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2469 ; GFX6-LABEL: atomic_inc_i64_1d:
2470 ; GFX6: ; %bb.0: ; %main_body
2471 ; GFX6-NEXT: s_mov_b32 s0, s2
2472 ; GFX6-NEXT: s_mov_b32 s1, s3
2473 ; GFX6-NEXT: s_mov_b32 s2, s4
2474 ; GFX6-NEXT: s_mov_b32 s3, s5
2475 ; GFX6-NEXT: s_mov_b32 s4, s6
2476 ; GFX6-NEXT: s_mov_b32 s5, s7
2477 ; GFX6-NEXT: s_mov_b32 s6, s8
2478 ; GFX6-NEXT: s_mov_b32 s7, s9
2479 ; GFX6-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2480 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2481 ; GFX6-NEXT: ; return to shader part epilog
2483 ; GFX8-LABEL: atomic_inc_i64_1d:
2484 ; GFX8: ; %bb.0: ; %main_body
2485 ; GFX8-NEXT: s_mov_b32 s0, s2
2486 ; GFX8-NEXT: s_mov_b32 s1, s3
2487 ; GFX8-NEXT: s_mov_b32 s2, s4
2488 ; GFX8-NEXT: s_mov_b32 s3, s5
2489 ; GFX8-NEXT: s_mov_b32 s4, s6
2490 ; GFX8-NEXT: s_mov_b32 s5, s7
2491 ; GFX8-NEXT: s_mov_b32 s6, s8
2492 ; GFX8-NEXT: s_mov_b32 s7, s9
2493 ; GFX8-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2494 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2495 ; GFX8-NEXT: ; return to shader part epilog
2497 ; GFX900-LABEL: atomic_inc_i64_1d:
2498 ; GFX900: ; %bb.0: ; %main_body
2499 ; GFX900-NEXT: s_mov_b32 s0, s2
2500 ; GFX900-NEXT: s_mov_b32 s1, s3
2501 ; GFX900-NEXT: s_mov_b32 s2, s4
2502 ; GFX900-NEXT: s_mov_b32 s3, s5
2503 ; GFX900-NEXT: s_mov_b32 s4, s6
2504 ; GFX900-NEXT: s_mov_b32 s5, s7
2505 ; GFX900-NEXT: s_mov_b32 s6, s8
2506 ; GFX900-NEXT: s_mov_b32 s7, s9
2507 ; GFX900-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2508 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2509 ; GFX900-NEXT: ; return to shader part epilog
2511 ; GFX90A-LABEL: atomic_inc_i64_1d:
2512 ; GFX90A: ; %bb.0: ; %main_body
2513 ; GFX90A-NEXT: s_mov_b32 s0, s2
2514 ; GFX90A-NEXT: s_mov_b32 s1, s3
2515 ; GFX90A-NEXT: s_mov_b32 s2, s4
2516 ; GFX90A-NEXT: s_mov_b32 s3, s5
2517 ; GFX90A-NEXT: s_mov_b32 s4, s6
2518 ; GFX90A-NEXT: s_mov_b32 s5, s7
2519 ; GFX90A-NEXT: s_mov_b32 s6, s8
2520 ; GFX90A-NEXT: s_mov_b32 s7, s9
2521 ; GFX90A-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2522 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2523 ; GFX90A-NEXT: ; return to shader part epilog
2525 ; GFX10PLUS-LABEL: atomic_inc_i64_1d:
2526 ; GFX10PLUS: ; %bb.0: ; %main_body
2527 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2528 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2529 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2530 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2531 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2532 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2533 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2534 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2535 ; GFX10PLUS-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2536 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2537 ; GFX10PLUS-NEXT: ; return to shader part epilog
2539 %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2540 %out = bitcast i64 %v to <2 x float>
2541 ret <2 x float> %out
2544 define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2545 ; GFX6-LABEL: atomic_dec_i64_1d:
2546 ; GFX6: ; %bb.0: ; %main_body
2547 ; GFX6-NEXT: s_mov_b32 s0, s2
2548 ; GFX6-NEXT: s_mov_b32 s1, s3
2549 ; GFX6-NEXT: s_mov_b32 s2, s4
2550 ; GFX6-NEXT: s_mov_b32 s3, s5
2551 ; GFX6-NEXT: s_mov_b32 s4, s6
2552 ; GFX6-NEXT: s_mov_b32 s5, s7
2553 ; GFX6-NEXT: s_mov_b32 s6, s8
2554 ; GFX6-NEXT: s_mov_b32 s7, s9
2555 ; GFX6-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2556 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2557 ; GFX6-NEXT: ; return to shader part epilog
2559 ; GFX8-LABEL: atomic_dec_i64_1d:
2560 ; GFX8: ; %bb.0: ; %main_body
2561 ; GFX8-NEXT: s_mov_b32 s0, s2
2562 ; GFX8-NEXT: s_mov_b32 s1, s3
2563 ; GFX8-NEXT: s_mov_b32 s2, s4
2564 ; GFX8-NEXT: s_mov_b32 s3, s5
2565 ; GFX8-NEXT: s_mov_b32 s4, s6
2566 ; GFX8-NEXT: s_mov_b32 s5, s7
2567 ; GFX8-NEXT: s_mov_b32 s6, s8
2568 ; GFX8-NEXT: s_mov_b32 s7, s9
2569 ; GFX8-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2570 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2571 ; GFX8-NEXT: ; return to shader part epilog
2573 ; GFX900-LABEL: atomic_dec_i64_1d:
2574 ; GFX900: ; %bb.0: ; %main_body
2575 ; GFX900-NEXT: s_mov_b32 s0, s2
2576 ; GFX900-NEXT: s_mov_b32 s1, s3
2577 ; GFX900-NEXT: s_mov_b32 s2, s4
2578 ; GFX900-NEXT: s_mov_b32 s3, s5
2579 ; GFX900-NEXT: s_mov_b32 s4, s6
2580 ; GFX900-NEXT: s_mov_b32 s5, s7
2581 ; GFX900-NEXT: s_mov_b32 s6, s8
2582 ; GFX900-NEXT: s_mov_b32 s7, s9
2583 ; GFX900-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2584 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2585 ; GFX900-NEXT: ; return to shader part epilog
2587 ; GFX90A-LABEL: atomic_dec_i64_1d:
2588 ; GFX90A: ; %bb.0: ; %main_body
2589 ; GFX90A-NEXT: s_mov_b32 s0, s2
2590 ; GFX90A-NEXT: s_mov_b32 s1, s3
2591 ; GFX90A-NEXT: s_mov_b32 s2, s4
2592 ; GFX90A-NEXT: s_mov_b32 s3, s5
2593 ; GFX90A-NEXT: s_mov_b32 s4, s6
2594 ; GFX90A-NEXT: s_mov_b32 s5, s7
2595 ; GFX90A-NEXT: s_mov_b32 s6, s8
2596 ; GFX90A-NEXT: s_mov_b32 s7, s9
2597 ; GFX90A-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2598 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2599 ; GFX90A-NEXT: ; return to shader part epilog
2601 ; GFX10PLUS-LABEL: atomic_dec_i64_1d:
2602 ; GFX10PLUS: ; %bb.0: ; %main_body
2603 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2604 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2605 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2606 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2607 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2608 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2609 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2610 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2611 ; GFX10PLUS-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2612 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2613 ; GFX10PLUS-NEXT: ; return to shader part epilog
2615 %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2616 %out = bitcast i64 %v to <2 x float>
2617 ret <2 x float> %out
2620 define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
2621 ; GFX6-LABEL: atomic_cmpswap_i64_1d:
2622 ; GFX6: ; %bb.0: ; %main_body
2623 ; GFX6-NEXT: s_mov_b32 s0, s2
2624 ; GFX6-NEXT: s_mov_b32 s1, s3
2625 ; GFX6-NEXT: s_mov_b32 s2, s4
2626 ; GFX6-NEXT: s_mov_b32 s3, s5
2627 ; GFX6-NEXT: s_mov_b32 s4, s6
2628 ; GFX6-NEXT: s_mov_b32 s5, s7
2629 ; GFX6-NEXT: s_mov_b32 s6, s8
2630 ; GFX6-NEXT: s_mov_b32 s7, s9
2631 ; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2632 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2633 ; GFX6-NEXT: ; return to shader part epilog
2635 ; GFX8-LABEL: atomic_cmpswap_i64_1d:
2636 ; GFX8: ; %bb.0: ; %main_body
2637 ; GFX8-NEXT: s_mov_b32 s0, s2
2638 ; GFX8-NEXT: s_mov_b32 s1, s3
2639 ; GFX8-NEXT: s_mov_b32 s2, s4
2640 ; GFX8-NEXT: s_mov_b32 s3, s5
2641 ; GFX8-NEXT: s_mov_b32 s4, s6
2642 ; GFX8-NEXT: s_mov_b32 s5, s7
2643 ; GFX8-NEXT: s_mov_b32 s6, s8
2644 ; GFX8-NEXT: s_mov_b32 s7, s9
2645 ; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2646 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2647 ; GFX8-NEXT: ; return to shader part epilog
2649 ; GFX900-LABEL: atomic_cmpswap_i64_1d:
2650 ; GFX900: ; %bb.0: ; %main_body
2651 ; GFX900-NEXT: s_mov_b32 s0, s2
2652 ; GFX900-NEXT: s_mov_b32 s1, s3
2653 ; GFX900-NEXT: s_mov_b32 s2, s4
2654 ; GFX900-NEXT: s_mov_b32 s3, s5
2655 ; GFX900-NEXT: s_mov_b32 s4, s6
2656 ; GFX900-NEXT: s_mov_b32 s5, s7
2657 ; GFX900-NEXT: s_mov_b32 s6, s8
2658 ; GFX900-NEXT: s_mov_b32 s7, s9
2659 ; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2660 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2661 ; GFX900-NEXT: ; return to shader part epilog
2663 ; GFX90A-LABEL: atomic_cmpswap_i64_1d:
2664 ; GFX90A: ; %bb.0: ; %main_body
2665 ; GFX90A-NEXT: s_mov_b32 s0, s2
2666 ; GFX90A-NEXT: s_mov_b32 s1, s3
2667 ; GFX90A-NEXT: s_mov_b32 s2, s4
2668 ; GFX90A-NEXT: s_mov_b32 s3, s5
2669 ; GFX90A-NEXT: s_mov_b32 s4, s6
2670 ; GFX90A-NEXT: s_mov_b32 s5, s7
2671 ; GFX90A-NEXT: s_mov_b32 s6, s8
2672 ; GFX90A-NEXT: s_mov_b32 s7, s9
2673 ; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2674 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2675 ; GFX90A-NEXT: ; return to shader part epilog
2677 ; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d:
2678 ; GFX10PLUS: ; %bb.0: ; %main_body
2679 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2680 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2681 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2682 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2683 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2684 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2685 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2686 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2687 ; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
2688 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2689 ; GFX10PLUS-NEXT: ; return to shader part epilog
2691 %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2692 %out = bitcast i64 %v to <2 x float>
2693 ret <2 x float> %out
2696 define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
2697 ; GFX6-LABEL: atomic_cmpswap_i64_1d_no_return:
2698 ; GFX6: ; %bb.0: ; %main_body
2699 ; GFX6-NEXT: s_mov_b32 s0, s2
2700 ; GFX6-NEXT: s_mov_b32 s1, s3
2701 ; GFX6-NEXT: s_mov_b32 s2, s4
2702 ; GFX6-NEXT: s_mov_b32 s3, s5
2703 ; GFX6-NEXT: s_mov_b32 s4, s6
2704 ; GFX6-NEXT: s_mov_b32 s5, s7
2705 ; GFX6-NEXT: s_mov_b32 s6, s8
2706 ; GFX6-NEXT: s_mov_b32 s7, s9
2707 ; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2708 ; GFX6-NEXT: s_endpgm
2710 ; GFX8-LABEL: atomic_cmpswap_i64_1d_no_return:
2711 ; GFX8: ; %bb.0: ; %main_body
2712 ; GFX8-NEXT: s_mov_b32 s0, s2
2713 ; GFX8-NEXT: s_mov_b32 s1, s3
2714 ; GFX8-NEXT: s_mov_b32 s2, s4
2715 ; GFX8-NEXT: s_mov_b32 s3, s5
2716 ; GFX8-NEXT: s_mov_b32 s4, s6
2717 ; GFX8-NEXT: s_mov_b32 s5, s7
2718 ; GFX8-NEXT: s_mov_b32 s6, s8
2719 ; GFX8-NEXT: s_mov_b32 s7, s9
2720 ; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2721 ; GFX8-NEXT: s_endpgm
2723 ; GFX900-LABEL: atomic_cmpswap_i64_1d_no_return:
2724 ; GFX900: ; %bb.0: ; %main_body
2725 ; GFX900-NEXT: s_mov_b32 s0, s2
2726 ; GFX900-NEXT: s_mov_b32 s1, s3
2727 ; GFX900-NEXT: s_mov_b32 s2, s4
2728 ; GFX900-NEXT: s_mov_b32 s3, s5
2729 ; GFX900-NEXT: s_mov_b32 s4, s6
2730 ; GFX900-NEXT: s_mov_b32 s5, s7
2731 ; GFX900-NEXT: s_mov_b32 s6, s8
2732 ; GFX900-NEXT: s_mov_b32 s7, s9
2733 ; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2734 ; GFX900-NEXT: s_endpgm
2736 ; GFX90A-LABEL: atomic_cmpswap_i64_1d_no_return:
2737 ; GFX90A: ; %bb.0: ; %main_body
2738 ; GFX90A-NEXT: s_mov_b32 s0, s2
2739 ; GFX90A-NEXT: s_mov_b32 s1, s3
2740 ; GFX90A-NEXT: s_mov_b32 s2, s4
2741 ; GFX90A-NEXT: s_mov_b32 s3, s5
2742 ; GFX90A-NEXT: s_mov_b32 s4, s6
2743 ; GFX90A-NEXT: s_mov_b32 s5, s7
2744 ; GFX90A-NEXT: s_mov_b32 s6, s8
2745 ; GFX90A-NEXT: s_mov_b32 s7, s9
2746 ; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
2747 ; GFX90A-NEXT: s_endpgm
2749 ; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d_no_return:
2750 ; GFX10PLUS: ; %bb.0: ; %main_body
2751 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2752 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2753 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2754 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2755 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2756 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2757 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2758 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2759 ; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
2760 ; GFX10PLUS-NEXT: s_endpgm
2762 %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2766 define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t) {
2767 ; GFX6-LABEL: atomic_add_i64_2d:
2768 ; GFX6: ; %bb.0: ; %main_body
2769 ; GFX6-NEXT: s_mov_b32 s0, s2
2770 ; GFX6-NEXT: s_mov_b32 s1, s3
2771 ; GFX6-NEXT: s_mov_b32 s2, s4
2772 ; GFX6-NEXT: s_mov_b32 s3, s5
2773 ; GFX6-NEXT: s_mov_b32 s4, s6
2774 ; GFX6-NEXT: s_mov_b32 s5, s7
2775 ; GFX6-NEXT: s_mov_b32 s6, s8
2776 ; GFX6-NEXT: s_mov_b32 s7, s9
2777 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
2778 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2779 ; GFX6-NEXT: ; return to shader part epilog
2781 ; GFX8-LABEL: atomic_add_i64_2d:
2782 ; GFX8: ; %bb.0: ; %main_body
2783 ; GFX8-NEXT: s_mov_b32 s0, s2
2784 ; GFX8-NEXT: s_mov_b32 s1, s3
2785 ; GFX8-NEXT: s_mov_b32 s2, s4
2786 ; GFX8-NEXT: s_mov_b32 s3, s5
2787 ; GFX8-NEXT: s_mov_b32 s4, s6
2788 ; GFX8-NEXT: s_mov_b32 s5, s7
2789 ; GFX8-NEXT: s_mov_b32 s6, s8
2790 ; GFX8-NEXT: s_mov_b32 s7, s9
2791 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
2792 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2793 ; GFX8-NEXT: ; return to shader part epilog
2795 ; GFX900-LABEL: atomic_add_i64_2d:
2796 ; GFX900: ; %bb.0: ; %main_body
2797 ; GFX900-NEXT: s_mov_b32 s0, s2
2798 ; GFX900-NEXT: s_mov_b32 s1, s3
2799 ; GFX900-NEXT: s_mov_b32 s2, s4
2800 ; GFX900-NEXT: s_mov_b32 s3, s5
2801 ; GFX900-NEXT: s_mov_b32 s4, s6
2802 ; GFX900-NEXT: s_mov_b32 s5, s7
2803 ; GFX900-NEXT: s_mov_b32 s6, s8
2804 ; GFX900-NEXT: s_mov_b32 s7, s9
2805 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
2806 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2807 ; GFX900-NEXT: ; return to shader part epilog
2809 ; GFX90A-LABEL: atomic_add_i64_2d:
2810 ; GFX90A: ; %bb.0: ; %main_body
2811 ; GFX90A-NEXT: s_mov_b32 s0, s2
2812 ; GFX90A-NEXT: s_mov_b32 s1, s3
2813 ; GFX90A-NEXT: s_mov_b32 s2, s4
2814 ; GFX90A-NEXT: s_mov_b32 s3, s5
2815 ; GFX90A-NEXT: s_mov_b32 s4, s6
2816 ; GFX90A-NEXT: s_mov_b32 s5, s7
2817 ; GFX90A-NEXT: s_mov_b32 s6, s8
2818 ; GFX90A-NEXT: s_mov_b32 s7, s9
2819 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
2820 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2821 ; GFX90A-NEXT: ; return to shader part epilog
2823 ; GFX10PLUS-LABEL: atomic_add_i64_2d:
2824 ; GFX10PLUS: ; %bb.0: ; %main_body
2825 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2826 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2827 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2828 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2829 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2830 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2831 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2832 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2833 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc
2834 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2835 ; GFX10PLUS-NEXT: ; return to shader part epilog
2837 %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
2838 %out = bitcast i64 %v to <2 x float>
2839 ret <2 x float> %out
2842 define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %r) {
2843 ; GFX6-LABEL: atomic_add_i64_3d:
2844 ; GFX6: ; %bb.0: ; %main_body
2845 ; GFX6-NEXT: s_mov_b32 s0, s2
2846 ; GFX6-NEXT: s_mov_b32 s1, s3
2847 ; GFX6-NEXT: s_mov_b32 s2, s4
2848 ; GFX6-NEXT: s_mov_b32 s3, s5
2849 ; GFX6-NEXT: s_mov_b32 s4, s6
2850 ; GFX6-NEXT: s_mov_b32 s5, s7
2851 ; GFX6-NEXT: s_mov_b32 s6, s8
2852 ; GFX6-NEXT: s_mov_b32 s7, s9
2853 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
2854 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2855 ; GFX6-NEXT: ; return to shader part epilog
2857 ; GFX8-LABEL: atomic_add_i64_3d:
2858 ; GFX8: ; %bb.0: ; %main_body
2859 ; GFX8-NEXT: s_mov_b32 s0, s2
2860 ; GFX8-NEXT: s_mov_b32 s1, s3
2861 ; GFX8-NEXT: s_mov_b32 s2, s4
2862 ; GFX8-NEXT: s_mov_b32 s3, s5
2863 ; GFX8-NEXT: s_mov_b32 s4, s6
2864 ; GFX8-NEXT: s_mov_b32 s5, s7
2865 ; GFX8-NEXT: s_mov_b32 s6, s8
2866 ; GFX8-NEXT: s_mov_b32 s7, s9
2867 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
2868 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2869 ; GFX8-NEXT: ; return to shader part epilog
2871 ; GFX900-LABEL: atomic_add_i64_3d:
2872 ; GFX900: ; %bb.0: ; %main_body
2873 ; GFX900-NEXT: s_mov_b32 s0, s2
2874 ; GFX900-NEXT: s_mov_b32 s1, s3
2875 ; GFX900-NEXT: s_mov_b32 s2, s4
2876 ; GFX900-NEXT: s_mov_b32 s3, s5
2877 ; GFX900-NEXT: s_mov_b32 s4, s6
2878 ; GFX900-NEXT: s_mov_b32 s5, s7
2879 ; GFX900-NEXT: s_mov_b32 s6, s8
2880 ; GFX900-NEXT: s_mov_b32 s7, s9
2881 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
2882 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2883 ; GFX900-NEXT: ; return to shader part epilog
2885 ; GFX90A-LABEL: atomic_add_i64_3d:
2886 ; GFX90A: ; %bb.0: ; %main_body
2887 ; GFX90A-NEXT: s_mov_b32 s0, s2
2888 ; GFX90A-NEXT: s_mov_b32 s1, s3
2889 ; GFX90A-NEXT: s_mov_b32 s2, s4
2890 ; GFX90A-NEXT: s_mov_b32 s3, s5
2891 ; GFX90A-NEXT: s_mov_b32 s4, s6
2892 ; GFX90A-NEXT: s_mov_b32 s5, s7
2893 ; GFX90A-NEXT: s_mov_b32 s6, s8
2894 ; GFX90A-NEXT: s_mov_b32 s7, s9
2895 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
2896 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2897 ; GFX90A-NEXT: ; return to shader part epilog
2899 ; GFX10PLUS-LABEL: atomic_add_i64_3d:
2900 ; GFX10PLUS: ; %bb.0: ; %main_body
2901 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2902 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2903 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2904 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2905 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2906 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2907 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2908 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2909 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc
2910 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2911 ; GFX10PLUS-NEXT: ; return to shader part epilog
2913 %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
2914 %out = bitcast i64 %v to <2 x float>
2915 ret <2 x float> %out
2918 define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %face) {
2919 ; GFX6-LABEL: atomic_add_i64_cube:
2920 ; GFX6: ; %bb.0: ; %main_body
2921 ; GFX6-NEXT: s_mov_b32 s0, s2
2922 ; GFX6-NEXT: s_mov_b32 s1, s3
2923 ; GFX6-NEXT: s_mov_b32 s2, s4
2924 ; GFX6-NEXT: s_mov_b32 s3, s5
2925 ; GFX6-NEXT: s_mov_b32 s4, s6
2926 ; GFX6-NEXT: s_mov_b32 s5, s7
2927 ; GFX6-NEXT: s_mov_b32 s6, s8
2928 ; GFX6-NEXT: s_mov_b32 s7, s9
2929 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
2930 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2931 ; GFX6-NEXT: ; return to shader part epilog
2933 ; GFX8-LABEL: atomic_add_i64_cube:
2934 ; GFX8: ; %bb.0: ; %main_body
2935 ; GFX8-NEXT: s_mov_b32 s0, s2
2936 ; GFX8-NEXT: s_mov_b32 s1, s3
2937 ; GFX8-NEXT: s_mov_b32 s2, s4
2938 ; GFX8-NEXT: s_mov_b32 s3, s5
2939 ; GFX8-NEXT: s_mov_b32 s4, s6
2940 ; GFX8-NEXT: s_mov_b32 s5, s7
2941 ; GFX8-NEXT: s_mov_b32 s6, s8
2942 ; GFX8-NEXT: s_mov_b32 s7, s9
2943 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
2944 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2945 ; GFX8-NEXT: ; return to shader part epilog
2947 ; GFX900-LABEL: atomic_add_i64_cube:
2948 ; GFX900: ; %bb.0: ; %main_body
2949 ; GFX900-NEXT: s_mov_b32 s0, s2
2950 ; GFX900-NEXT: s_mov_b32 s1, s3
2951 ; GFX900-NEXT: s_mov_b32 s2, s4
2952 ; GFX900-NEXT: s_mov_b32 s3, s5
2953 ; GFX900-NEXT: s_mov_b32 s4, s6
2954 ; GFX900-NEXT: s_mov_b32 s5, s7
2955 ; GFX900-NEXT: s_mov_b32 s6, s8
2956 ; GFX900-NEXT: s_mov_b32 s7, s9
2957 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
2958 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2959 ; GFX900-NEXT: ; return to shader part epilog
2961 ; GFX90A-LABEL: atomic_add_i64_cube:
2962 ; GFX90A: ; %bb.0: ; %main_body
2963 ; GFX90A-NEXT: s_mov_b32 s0, s2
2964 ; GFX90A-NEXT: s_mov_b32 s1, s3
2965 ; GFX90A-NEXT: s_mov_b32 s2, s4
2966 ; GFX90A-NEXT: s_mov_b32 s3, s5
2967 ; GFX90A-NEXT: s_mov_b32 s4, s6
2968 ; GFX90A-NEXT: s_mov_b32 s5, s7
2969 ; GFX90A-NEXT: s_mov_b32 s6, s8
2970 ; GFX90A-NEXT: s_mov_b32 s7, s9
2971 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
2972 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2973 ; GFX90A-NEXT: ; return to shader part epilog
2975 ; GFX10PLUS-LABEL: atomic_add_i64_cube:
2976 ; GFX10PLUS: ; %bb.0: ; %main_body
2977 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2978 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2979 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2980 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2981 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2982 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2983 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2984 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2985 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc
2986 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2987 ; GFX10PLUS-NEXT: ; return to shader part epilog
2989 %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
2990 %out = bitcast i64 %v to <2 x float>
2991 ret <2 x float> %out
2994 define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %slice) {
2995 ; GFX6-LABEL: atomic_add_i64_1darray:
2996 ; GFX6: ; %bb.0: ; %main_body
2997 ; GFX6-NEXT: s_mov_b32 s0, s2
2998 ; GFX6-NEXT: s_mov_b32 s1, s3
2999 ; GFX6-NEXT: s_mov_b32 s2, s4
3000 ; GFX6-NEXT: s_mov_b32 s3, s5
3001 ; GFX6-NEXT: s_mov_b32 s4, s6
3002 ; GFX6-NEXT: s_mov_b32 s5, s7
3003 ; GFX6-NEXT: s_mov_b32 s6, s8
3004 ; GFX6-NEXT: s_mov_b32 s7, s9
3005 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3006 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3007 ; GFX6-NEXT: ; return to shader part epilog
3009 ; GFX8-LABEL: atomic_add_i64_1darray:
3010 ; GFX8: ; %bb.0: ; %main_body
3011 ; GFX8-NEXT: s_mov_b32 s0, s2
3012 ; GFX8-NEXT: s_mov_b32 s1, s3
3013 ; GFX8-NEXT: s_mov_b32 s2, s4
3014 ; GFX8-NEXT: s_mov_b32 s3, s5
3015 ; GFX8-NEXT: s_mov_b32 s4, s6
3016 ; GFX8-NEXT: s_mov_b32 s5, s7
3017 ; GFX8-NEXT: s_mov_b32 s6, s8
3018 ; GFX8-NEXT: s_mov_b32 s7, s9
3019 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3020 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3021 ; GFX8-NEXT: ; return to shader part epilog
3023 ; GFX900-LABEL: atomic_add_i64_1darray:
3024 ; GFX900: ; %bb.0: ; %main_body
3025 ; GFX900-NEXT: s_mov_b32 s0, s2
3026 ; GFX900-NEXT: s_mov_b32 s1, s3
3027 ; GFX900-NEXT: s_mov_b32 s2, s4
3028 ; GFX900-NEXT: s_mov_b32 s3, s5
3029 ; GFX900-NEXT: s_mov_b32 s4, s6
3030 ; GFX900-NEXT: s_mov_b32 s5, s7
3031 ; GFX900-NEXT: s_mov_b32 s6, s8
3032 ; GFX900-NEXT: s_mov_b32 s7, s9
3033 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3034 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3035 ; GFX900-NEXT: ; return to shader part epilog
3037 ; GFX90A-LABEL: atomic_add_i64_1darray:
3038 ; GFX90A: ; %bb.0: ; %main_body
3039 ; GFX90A-NEXT: s_mov_b32 s0, s2
3040 ; GFX90A-NEXT: s_mov_b32 s1, s3
3041 ; GFX90A-NEXT: s_mov_b32 s2, s4
3042 ; GFX90A-NEXT: s_mov_b32 s3, s5
3043 ; GFX90A-NEXT: s_mov_b32 s4, s6
3044 ; GFX90A-NEXT: s_mov_b32 s5, s7
3045 ; GFX90A-NEXT: s_mov_b32 s6, s8
3046 ; GFX90A-NEXT: s_mov_b32 s7, s9
3047 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3048 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3049 ; GFX90A-NEXT: ; return to shader part epilog
3051 ; GFX10PLUS-LABEL: atomic_add_i64_1darray:
3052 ; GFX10PLUS: ; %bb.0: ; %main_body
3053 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3054 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3055 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3056 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3057 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3058 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3059 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3060 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3061 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc
3062 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3063 ; GFX10PLUS-NEXT: ; return to shader part epilog
3065 %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
3066 %out = bitcast i64 %v to <2 x float>
3067 ret <2 x float> %out
3070 define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice) {
3071 ; GFX6-LABEL: atomic_add_i64_2darray:
3072 ; GFX6: ; %bb.0: ; %main_body
3073 ; GFX6-NEXT: s_mov_b32 s0, s2
3074 ; GFX6-NEXT: s_mov_b32 s1, s3
3075 ; GFX6-NEXT: s_mov_b32 s2, s4
3076 ; GFX6-NEXT: s_mov_b32 s3, s5
3077 ; GFX6-NEXT: s_mov_b32 s4, s6
3078 ; GFX6-NEXT: s_mov_b32 s5, s7
3079 ; GFX6-NEXT: s_mov_b32 s6, s8
3080 ; GFX6-NEXT: s_mov_b32 s7, s9
3081 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3082 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3083 ; GFX6-NEXT: ; return to shader part epilog
3085 ; GFX8-LABEL: atomic_add_i64_2darray:
3086 ; GFX8: ; %bb.0: ; %main_body
3087 ; GFX8-NEXT: s_mov_b32 s0, s2
3088 ; GFX8-NEXT: s_mov_b32 s1, s3
3089 ; GFX8-NEXT: s_mov_b32 s2, s4
3090 ; GFX8-NEXT: s_mov_b32 s3, s5
3091 ; GFX8-NEXT: s_mov_b32 s4, s6
3092 ; GFX8-NEXT: s_mov_b32 s5, s7
3093 ; GFX8-NEXT: s_mov_b32 s6, s8
3094 ; GFX8-NEXT: s_mov_b32 s7, s9
3095 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3096 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3097 ; GFX8-NEXT: ; return to shader part epilog
3099 ; GFX900-LABEL: atomic_add_i64_2darray:
3100 ; GFX900: ; %bb.0: ; %main_body
3101 ; GFX900-NEXT: s_mov_b32 s0, s2
3102 ; GFX900-NEXT: s_mov_b32 s1, s3
3103 ; GFX900-NEXT: s_mov_b32 s2, s4
3104 ; GFX900-NEXT: s_mov_b32 s3, s5
3105 ; GFX900-NEXT: s_mov_b32 s4, s6
3106 ; GFX900-NEXT: s_mov_b32 s5, s7
3107 ; GFX900-NEXT: s_mov_b32 s6, s8
3108 ; GFX900-NEXT: s_mov_b32 s7, s9
3109 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3110 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3111 ; GFX900-NEXT: ; return to shader part epilog
3113 ; GFX90A-LABEL: atomic_add_i64_2darray:
3114 ; GFX90A: ; %bb.0: ; %main_body
3115 ; GFX90A-NEXT: s_mov_b32 s0, s2
3116 ; GFX90A-NEXT: s_mov_b32 s1, s3
3117 ; GFX90A-NEXT: s_mov_b32 s2, s4
3118 ; GFX90A-NEXT: s_mov_b32 s3, s5
3119 ; GFX90A-NEXT: s_mov_b32 s4, s6
3120 ; GFX90A-NEXT: s_mov_b32 s5, s7
3121 ; GFX90A-NEXT: s_mov_b32 s6, s8
3122 ; GFX90A-NEXT: s_mov_b32 s7, s9
3123 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3124 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3125 ; GFX90A-NEXT: ; return to shader part epilog
3127 ; GFX10PLUS-LABEL: atomic_add_i64_2darray:
3128 ; GFX10PLUS: ; %bb.0: ; %main_body
3129 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3130 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3131 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3132 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3133 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3134 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3135 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3136 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3137 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc
3138 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3139 ; GFX10PLUS-NEXT: ; return to shader part epilog
3141 %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
3142 %out = bitcast i64 %v to <2 x float>
3143 ret <2 x float> %out
3146 define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %fragid) {
3147 ; GFX6-LABEL: atomic_add_i64_2dmsaa:
3148 ; GFX6: ; %bb.0: ; %main_body
3149 ; GFX6-NEXT: s_mov_b32 s0, s2
3150 ; GFX6-NEXT: s_mov_b32 s1, s3
3151 ; GFX6-NEXT: s_mov_b32 s2, s4
3152 ; GFX6-NEXT: s_mov_b32 s3, s5
3153 ; GFX6-NEXT: s_mov_b32 s4, s6
3154 ; GFX6-NEXT: s_mov_b32 s5, s7
3155 ; GFX6-NEXT: s_mov_b32 s6, s8
3156 ; GFX6-NEXT: s_mov_b32 s7, s9
3157 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3158 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3159 ; GFX6-NEXT: ; return to shader part epilog
3161 ; GFX8-LABEL: atomic_add_i64_2dmsaa:
3162 ; GFX8: ; %bb.0: ; %main_body
3163 ; GFX8-NEXT: s_mov_b32 s0, s2
3164 ; GFX8-NEXT: s_mov_b32 s1, s3
3165 ; GFX8-NEXT: s_mov_b32 s2, s4
3166 ; GFX8-NEXT: s_mov_b32 s3, s5
3167 ; GFX8-NEXT: s_mov_b32 s4, s6
3168 ; GFX8-NEXT: s_mov_b32 s5, s7
3169 ; GFX8-NEXT: s_mov_b32 s6, s8
3170 ; GFX8-NEXT: s_mov_b32 s7, s9
3171 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3172 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3173 ; GFX8-NEXT: ; return to shader part epilog
3175 ; GFX900-LABEL: atomic_add_i64_2dmsaa:
3176 ; GFX900: ; %bb.0: ; %main_body
3177 ; GFX900-NEXT: s_mov_b32 s0, s2
3178 ; GFX900-NEXT: s_mov_b32 s1, s3
3179 ; GFX900-NEXT: s_mov_b32 s2, s4
3180 ; GFX900-NEXT: s_mov_b32 s3, s5
3181 ; GFX900-NEXT: s_mov_b32 s4, s6
3182 ; GFX900-NEXT: s_mov_b32 s5, s7
3183 ; GFX900-NEXT: s_mov_b32 s6, s8
3184 ; GFX900-NEXT: s_mov_b32 s7, s9
3185 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3186 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3187 ; GFX900-NEXT: ; return to shader part epilog
3189 ; GFX90A-LABEL: atomic_add_i64_2dmsaa:
3190 ; GFX90A: ; %bb.0: ; %main_body
3191 ; GFX90A-NEXT: s_mov_b32 s0, s2
3192 ; GFX90A-NEXT: s_mov_b32 s1, s3
3193 ; GFX90A-NEXT: s_mov_b32 s2, s4
3194 ; GFX90A-NEXT: s_mov_b32 s3, s5
3195 ; GFX90A-NEXT: s_mov_b32 s4, s6
3196 ; GFX90A-NEXT: s_mov_b32 s5, s7
3197 ; GFX90A-NEXT: s_mov_b32 s6, s8
3198 ; GFX90A-NEXT: s_mov_b32 s7, s9
3199 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3200 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3201 ; GFX90A-NEXT: ; return to shader part epilog
3203 ; GFX10PLUS-LABEL: atomic_add_i64_2dmsaa:
3204 ; GFX10PLUS: ; %bb.0: ; %main_body
3205 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3206 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3207 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3208 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3209 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3210 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3211 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3212 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3213 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc
3214 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3215 ; GFX10PLUS-NEXT: ; return to shader part epilog
3217 %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
3218 %out = bitcast i64 %v to <2 x float>
3219 ret <2 x float> %out
3222 define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
3223 ; GFX6-LABEL: atomic_add_i64_2darraymsaa:
3224 ; GFX6: ; %bb.0: ; %main_body
3225 ; GFX6-NEXT: s_mov_b32 s0, s2
3226 ; GFX6-NEXT: s_mov_b32 s1, s3
3227 ; GFX6-NEXT: s_mov_b32 s2, s4
3228 ; GFX6-NEXT: s_mov_b32 s3, s5
3229 ; GFX6-NEXT: s_mov_b32 s4, s6
3230 ; GFX6-NEXT: s_mov_b32 s5, s7
3231 ; GFX6-NEXT: s_mov_b32 s6, s8
3232 ; GFX6-NEXT: s_mov_b32 s7, s9
3233 ; GFX6-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3234 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3235 ; GFX6-NEXT: ; return to shader part epilog
3237 ; GFX8-LABEL: atomic_add_i64_2darraymsaa:
3238 ; GFX8: ; %bb.0: ; %main_body
3239 ; GFX8-NEXT: s_mov_b32 s0, s2
3240 ; GFX8-NEXT: s_mov_b32 s1, s3
3241 ; GFX8-NEXT: s_mov_b32 s2, s4
3242 ; GFX8-NEXT: s_mov_b32 s3, s5
3243 ; GFX8-NEXT: s_mov_b32 s4, s6
3244 ; GFX8-NEXT: s_mov_b32 s5, s7
3245 ; GFX8-NEXT: s_mov_b32 s6, s8
3246 ; GFX8-NEXT: s_mov_b32 s7, s9
3247 ; GFX8-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3248 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3249 ; GFX8-NEXT: ; return to shader part epilog
3251 ; GFX900-LABEL: atomic_add_i64_2darraymsaa:
3252 ; GFX900: ; %bb.0: ; %main_body
3253 ; GFX900-NEXT: s_mov_b32 s0, s2
3254 ; GFX900-NEXT: s_mov_b32 s1, s3
3255 ; GFX900-NEXT: s_mov_b32 s2, s4
3256 ; GFX900-NEXT: s_mov_b32 s3, s5
3257 ; GFX900-NEXT: s_mov_b32 s4, s6
3258 ; GFX900-NEXT: s_mov_b32 s5, s7
3259 ; GFX900-NEXT: s_mov_b32 s6, s8
3260 ; GFX900-NEXT: s_mov_b32 s7, s9
3261 ; GFX900-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3262 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3263 ; GFX900-NEXT: ; return to shader part epilog
3265 ; GFX90A-LABEL: atomic_add_i64_2darraymsaa:
3266 ; GFX90A: ; %bb.0: ; %main_body
3267 ; GFX90A-NEXT: s_mov_b32 s0, s2
3268 ; GFX90A-NEXT: s_mov_b32 s1, s3
3269 ; GFX90A-NEXT: s_mov_b32 s2, s4
3270 ; GFX90A-NEXT: s_mov_b32 s3, s5
3271 ; GFX90A-NEXT: s_mov_b32 s4, s6
3272 ; GFX90A-NEXT: s_mov_b32 s5, s7
3273 ; GFX90A-NEXT: s_mov_b32 s6, s8
3274 ; GFX90A-NEXT: s_mov_b32 s7, s9
3275 ; GFX90A-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3276 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3277 ; GFX90A-NEXT: ; return to shader part epilog
3279 ; GFX10PLUS-LABEL: atomic_add_i64_2darraymsaa:
3280 ; GFX10PLUS: ; %bb.0: ; %main_body
3281 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3282 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3283 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3284 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3285 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3286 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3287 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3288 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3289 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc
3290 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3291 ; GFX10PLUS-NEXT: ; return to shader part epilog
3293 %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
3294 %out = bitcast i64 %v to <2 x float>
3295 ret <2 x float> %out
3298 define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
3299 ; GFX6-LABEL: atomic_add_i64_1d_slc:
3300 ; GFX6: ; %bb.0: ; %main_body
3301 ; GFX6-NEXT: s_mov_b32 s0, s2
3302 ; GFX6-NEXT: s_mov_b32 s1, s3
3303 ; GFX6-NEXT: s_mov_b32 s2, s4
3304 ; GFX6-NEXT: s_mov_b32 s3, s5
3305 ; GFX6-NEXT: s_mov_b32 s4, s6
3306 ; GFX6-NEXT: s_mov_b32 s5, s7
3307 ; GFX6-NEXT: s_mov_b32 s6, s8
3308 ; GFX6-NEXT: s_mov_b32 s7, s9
3309 ; GFX6-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3310 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
3311 ; GFX6-NEXT: ; return to shader part epilog
3313 ; GFX8-LABEL: atomic_add_i64_1d_slc:
3314 ; GFX8: ; %bb.0: ; %main_body
3315 ; GFX8-NEXT: s_mov_b32 s0, s2
3316 ; GFX8-NEXT: s_mov_b32 s1, s3
3317 ; GFX8-NEXT: s_mov_b32 s2, s4
3318 ; GFX8-NEXT: s_mov_b32 s3, s5
3319 ; GFX8-NEXT: s_mov_b32 s4, s6
3320 ; GFX8-NEXT: s_mov_b32 s5, s7
3321 ; GFX8-NEXT: s_mov_b32 s6, s8
3322 ; GFX8-NEXT: s_mov_b32 s7, s9
3323 ; GFX8-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3324 ; GFX8-NEXT: s_waitcnt vmcnt(0)
3325 ; GFX8-NEXT: ; return to shader part epilog
3327 ; GFX900-LABEL: atomic_add_i64_1d_slc:
3328 ; GFX900: ; %bb.0: ; %main_body
3329 ; GFX900-NEXT: s_mov_b32 s0, s2
3330 ; GFX900-NEXT: s_mov_b32 s1, s3
3331 ; GFX900-NEXT: s_mov_b32 s2, s4
3332 ; GFX900-NEXT: s_mov_b32 s3, s5
3333 ; GFX900-NEXT: s_mov_b32 s4, s6
3334 ; GFX900-NEXT: s_mov_b32 s5, s7
3335 ; GFX900-NEXT: s_mov_b32 s6, s8
3336 ; GFX900-NEXT: s_mov_b32 s7, s9
3337 ; GFX900-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3338 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3339 ; GFX900-NEXT: ; return to shader part epilog
3341 ; GFX90A-LABEL: atomic_add_i64_1d_slc:
3342 ; GFX90A: ; %bb.0: ; %main_body
3343 ; GFX90A-NEXT: s_mov_b32 s0, s2
3344 ; GFX90A-NEXT: s_mov_b32 s1, s3
3345 ; GFX90A-NEXT: s_mov_b32 s2, s4
3346 ; GFX90A-NEXT: s_mov_b32 s3, s5
3347 ; GFX90A-NEXT: s_mov_b32 s4, s6
3348 ; GFX90A-NEXT: s_mov_b32 s5, s7
3349 ; GFX90A-NEXT: s_mov_b32 s6, s8
3350 ; GFX90A-NEXT: s_mov_b32 s7, s9
3351 ; GFX90A-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3352 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3353 ; GFX90A-NEXT: ; return to shader part epilog
3355 ; GFX10PLUS-LABEL: atomic_add_i64_1d_slc:
3356 ; GFX10PLUS: ; %bb.0: ; %main_body
3357 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3358 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3359 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3360 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3361 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3362 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3363 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3364 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3365 ; GFX10PLUS-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc
3366 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
3367 ; GFX10PLUS-NEXT: ; return to shader part epilog
3369 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
3370 %out = bitcast i64 %v to <2 x float>
3371 ret <2 x float> %out
3374 declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3375 declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3376 declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3377 declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3378 declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3379 declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3380 declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3381 declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3382 declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3383 declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3384 declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3385 declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3386 declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3387 declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3388 declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3389 declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3390 declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3391 declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3392 declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3393 declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3395 declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3396 declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3397 declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3398 declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3399 declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3400 declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3401 declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3402 declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3403 declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3404 declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3405 declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3406 declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3407 declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3408 declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3409 declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3410 declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3411 declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3412 declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3413 declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3414 declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3416 attributes #0 = { nounwind }