1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
5 define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
6 ; GFX9-LABEL: atomic_swap_i32_1d:
7 ; GFX9: ; %bb.0: ; %main_body
8 ; GFX9-NEXT: s_mov_b32 s0, s2
9 ; GFX9-NEXT: s_mov_b32 s1, s3
10 ; GFX9-NEXT: s_mov_b32 s2, s4
11 ; GFX9-NEXT: s_mov_b32 s3, s5
12 ; GFX9-NEXT: s_mov_b32 s4, s6
13 ; GFX9-NEXT: s_mov_b32 s5, s7
14 ; GFX9-NEXT: s_mov_b32 s6, s8
15 ; GFX9-NEXT: s_mov_b32 s7, s9
16 ; GFX9-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc a16
17 ; GFX9-NEXT: s_waitcnt vmcnt(0)
18 ; GFX9-NEXT: ; return to shader part epilog
20 ; GFX10-LABEL: atomic_swap_i32_1d:
21 ; GFX10: ; %bb.0: ; %main_body
22 ; GFX10-NEXT: s_mov_b32 s0, s2
23 ; GFX10-NEXT: s_mov_b32 s1, s3
24 ; GFX10-NEXT: s_mov_b32 s2, s4
25 ; GFX10-NEXT: s_mov_b32 s3, s5
26 ; GFX10-NEXT: s_mov_b32 s4, s6
27 ; GFX10-NEXT: s_mov_b32 s5, s7
28 ; GFX10-NEXT: s_mov_b32 s6, s8
29 ; GFX10-NEXT: s_mov_b32 s7, s9
30 ; GFX10-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
31 ; GFX10-NEXT: s_waitcnt vmcnt(0)
32 ; GFX10-NEXT: ; return to shader part epilog
34 %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
35 %out = bitcast i32 %v to float
39 define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
40 ; GFX9-LABEL: atomic_add_i32_1d:
41 ; GFX9: ; %bb.0: ; %main_body
42 ; GFX9-NEXT: s_mov_b32 s0, s2
43 ; GFX9-NEXT: s_mov_b32 s1, s3
44 ; GFX9-NEXT: s_mov_b32 s2, s4
45 ; GFX9-NEXT: s_mov_b32 s3, s5
46 ; GFX9-NEXT: s_mov_b32 s4, s6
47 ; GFX9-NEXT: s_mov_b32 s5, s7
48 ; GFX9-NEXT: s_mov_b32 s6, s8
49 ; GFX9-NEXT: s_mov_b32 s7, s9
50 ; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16
51 ; GFX9-NEXT: s_waitcnt vmcnt(0)
52 ; GFX9-NEXT: ; return to shader part epilog
54 ; GFX10-LABEL: atomic_add_i32_1d:
55 ; GFX10: ; %bb.0: ; %main_body
56 ; GFX10-NEXT: s_mov_b32 s0, s2
57 ; GFX10-NEXT: s_mov_b32 s1, s3
58 ; GFX10-NEXT: s_mov_b32 s2, s4
59 ; GFX10-NEXT: s_mov_b32 s3, s5
60 ; GFX10-NEXT: s_mov_b32 s4, s6
61 ; GFX10-NEXT: s_mov_b32 s5, s7
62 ; GFX10-NEXT: s_mov_b32 s6, s8
63 ; GFX10-NEXT: s_mov_b32 s7, s9
64 ; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
65 ; GFX10-NEXT: s_waitcnt vmcnt(0)
66 ; GFX10-NEXT: ; return to shader part epilog
68 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
69 %out = bitcast i32 %v to float
73 define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
74 ; GFX9-LABEL: atomic_sub_i32_1d:
75 ; GFX9: ; %bb.0: ; %main_body
76 ; GFX9-NEXT: s_mov_b32 s0, s2
77 ; GFX9-NEXT: s_mov_b32 s1, s3
78 ; GFX9-NEXT: s_mov_b32 s2, s4
79 ; GFX9-NEXT: s_mov_b32 s3, s5
80 ; GFX9-NEXT: s_mov_b32 s4, s6
81 ; GFX9-NEXT: s_mov_b32 s5, s7
82 ; GFX9-NEXT: s_mov_b32 s6, s8
83 ; GFX9-NEXT: s_mov_b32 s7, s9
84 ; GFX9-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc a16
85 ; GFX9-NEXT: s_waitcnt vmcnt(0)
86 ; GFX9-NEXT: ; return to shader part epilog
88 ; GFX10-LABEL: atomic_sub_i32_1d:
89 ; GFX10: ; %bb.0: ; %main_body
90 ; GFX10-NEXT: s_mov_b32 s0, s2
91 ; GFX10-NEXT: s_mov_b32 s1, s3
92 ; GFX10-NEXT: s_mov_b32 s2, s4
93 ; GFX10-NEXT: s_mov_b32 s3, s5
94 ; GFX10-NEXT: s_mov_b32 s4, s6
95 ; GFX10-NEXT: s_mov_b32 s5, s7
96 ; GFX10-NEXT: s_mov_b32 s6, s8
97 ; GFX10-NEXT: s_mov_b32 s7, s9
98 ; GFX10-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
99 ; GFX10-NEXT: s_waitcnt vmcnt(0)
100 ; GFX10-NEXT: ; return to shader part epilog
102 %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
103 %out = bitcast i32 %v to float
107 define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
108 ; GFX9-LABEL: atomic_smin_i32_1d:
109 ; GFX9: ; %bb.0: ; %main_body
110 ; GFX9-NEXT: s_mov_b32 s0, s2
111 ; GFX9-NEXT: s_mov_b32 s1, s3
112 ; GFX9-NEXT: s_mov_b32 s2, s4
113 ; GFX9-NEXT: s_mov_b32 s3, s5
114 ; GFX9-NEXT: s_mov_b32 s4, s6
115 ; GFX9-NEXT: s_mov_b32 s5, s7
116 ; GFX9-NEXT: s_mov_b32 s6, s8
117 ; GFX9-NEXT: s_mov_b32 s7, s9
118 ; GFX9-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc a16
119 ; GFX9-NEXT: s_waitcnt vmcnt(0)
120 ; GFX9-NEXT: ; return to shader part epilog
122 ; GFX10-LABEL: atomic_smin_i32_1d:
123 ; GFX10: ; %bb.0: ; %main_body
124 ; GFX10-NEXT: s_mov_b32 s0, s2
125 ; GFX10-NEXT: s_mov_b32 s1, s3
126 ; GFX10-NEXT: s_mov_b32 s2, s4
127 ; GFX10-NEXT: s_mov_b32 s3, s5
128 ; GFX10-NEXT: s_mov_b32 s4, s6
129 ; GFX10-NEXT: s_mov_b32 s5, s7
130 ; GFX10-NEXT: s_mov_b32 s6, s8
131 ; GFX10-NEXT: s_mov_b32 s7, s9
132 ; GFX10-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
133 ; GFX10-NEXT: s_waitcnt vmcnt(0)
134 ; GFX10-NEXT: ; return to shader part epilog
136 %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
137 %out = bitcast i32 %v to float
141 define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
142 ; GFX9-LABEL: atomic_umin_i32_1d:
143 ; GFX9: ; %bb.0: ; %main_body
144 ; GFX9-NEXT: s_mov_b32 s0, s2
145 ; GFX9-NEXT: s_mov_b32 s1, s3
146 ; GFX9-NEXT: s_mov_b32 s2, s4
147 ; GFX9-NEXT: s_mov_b32 s3, s5
148 ; GFX9-NEXT: s_mov_b32 s4, s6
149 ; GFX9-NEXT: s_mov_b32 s5, s7
150 ; GFX9-NEXT: s_mov_b32 s6, s8
151 ; GFX9-NEXT: s_mov_b32 s7, s9
152 ; GFX9-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc a16
153 ; GFX9-NEXT: s_waitcnt vmcnt(0)
154 ; GFX9-NEXT: ; return to shader part epilog
156 ; GFX10-LABEL: atomic_umin_i32_1d:
157 ; GFX10: ; %bb.0: ; %main_body
158 ; GFX10-NEXT: s_mov_b32 s0, s2
159 ; GFX10-NEXT: s_mov_b32 s1, s3
160 ; GFX10-NEXT: s_mov_b32 s2, s4
161 ; GFX10-NEXT: s_mov_b32 s3, s5
162 ; GFX10-NEXT: s_mov_b32 s4, s6
163 ; GFX10-NEXT: s_mov_b32 s5, s7
164 ; GFX10-NEXT: s_mov_b32 s6, s8
165 ; GFX10-NEXT: s_mov_b32 s7, s9
166 ; GFX10-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
167 ; GFX10-NEXT: s_waitcnt vmcnt(0)
168 ; GFX10-NEXT: ; return to shader part epilog
170 %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
171 %out = bitcast i32 %v to float
175 define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
176 ; GFX9-LABEL: atomic_smax_i32_1d:
177 ; GFX9: ; %bb.0: ; %main_body
178 ; GFX9-NEXT: s_mov_b32 s0, s2
179 ; GFX9-NEXT: s_mov_b32 s1, s3
180 ; GFX9-NEXT: s_mov_b32 s2, s4
181 ; GFX9-NEXT: s_mov_b32 s3, s5
182 ; GFX9-NEXT: s_mov_b32 s4, s6
183 ; GFX9-NEXT: s_mov_b32 s5, s7
184 ; GFX9-NEXT: s_mov_b32 s6, s8
185 ; GFX9-NEXT: s_mov_b32 s7, s9
186 ; GFX9-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc a16
187 ; GFX9-NEXT: s_waitcnt vmcnt(0)
188 ; GFX9-NEXT: ; return to shader part epilog
190 ; GFX10-LABEL: atomic_smax_i32_1d:
191 ; GFX10: ; %bb.0: ; %main_body
192 ; GFX10-NEXT: s_mov_b32 s0, s2
193 ; GFX10-NEXT: s_mov_b32 s1, s3
194 ; GFX10-NEXT: s_mov_b32 s2, s4
195 ; GFX10-NEXT: s_mov_b32 s3, s5
196 ; GFX10-NEXT: s_mov_b32 s4, s6
197 ; GFX10-NEXT: s_mov_b32 s5, s7
198 ; GFX10-NEXT: s_mov_b32 s6, s8
199 ; GFX10-NEXT: s_mov_b32 s7, s9
200 ; GFX10-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
201 ; GFX10-NEXT: s_waitcnt vmcnt(0)
202 ; GFX10-NEXT: ; return to shader part epilog
204 %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
205 %out = bitcast i32 %v to float
209 define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
210 ; GFX9-LABEL: atomic_umax_i32_1d:
211 ; GFX9: ; %bb.0: ; %main_body
212 ; GFX9-NEXT: s_mov_b32 s0, s2
213 ; GFX9-NEXT: s_mov_b32 s1, s3
214 ; GFX9-NEXT: s_mov_b32 s2, s4
215 ; GFX9-NEXT: s_mov_b32 s3, s5
216 ; GFX9-NEXT: s_mov_b32 s4, s6
217 ; GFX9-NEXT: s_mov_b32 s5, s7
218 ; GFX9-NEXT: s_mov_b32 s6, s8
219 ; GFX9-NEXT: s_mov_b32 s7, s9
220 ; GFX9-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc a16
221 ; GFX9-NEXT: s_waitcnt vmcnt(0)
222 ; GFX9-NEXT: ; return to shader part epilog
224 ; GFX10-LABEL: atomic_umax_i32_1d:
225 ; GFX10: ; %bb.0: ; %main_body
226 ; GFX10-NEXT: s_mov_b32 s0, s2
227 ; GFX10-NEXT: s_mov_b32 s1, s3
228 ; GFX10-NEXT: s_mov_b32 s2, s4
229 ; GFX10-NEXT: s_mov_b32 s3, s5
230 ; GFX10-NEXT: s_mov_b32 s4, s6
231 ; GFX10-NEXT: s_mov_b32 s5, s7
232 ; GFX10-NEXT: s_mov_b32 s6, s8
233 ; GFX10-NEXT: s_mov_b32 s7, s9
234 ; GFX10-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
235 ; GFX10-NEXT: s_waitcnt vmcnt(0)
236 ; GFX10-NEXT: ; return to shader part epilog
238 %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
239 %out = bitcast i32 %v to float
243 define amdgpu_ps float @atomic_and_i321d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
244 ; GFX9-LABEL: atomic_and_i321d:
245 ; GFX9: ; %bb.0: ; %main_body
246 ; GFX9-NEXT: s_mov_b32 s0, s2
247 ; GFX9-NEXT: s_mov_b32 s1, s3
248 ; GFX9-NEXT: s_mov_b32 s2, s4
249 ; GFX9-NEXT: s_mov_b32 s3, s5
250 ; GFX9-NEXT: s_mov_b32 s4, s6
251 ; GFX9-NEXT: s_mov_b32 s5, s7
252 ; GFX9-NEXT: s_mov_b32 s6, s8
253 ; GFX9-NEXT: s_mov_b32 s7, s9
254 ; GFX9-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc a16
255 ; GFX9-NEXT: s_waitcnt vmcnt(0)
256 ; GFX9-NEXT: ; return to shader part epilog
258 ; GFX10-LABEL: atomic_and_i321d:
259 ; GFX10: ; %bb.0: ; %main_body
260 ; GFX10-NEXT: s_mov_b32 s0, s2
261 ; GFX10-NEXT: s_mov_b32 s1, s3
262 ; GFX10-NEXT: s_mov_b32 s2, s4
263 ; GFX10-NEXT: s_mov_b32 s3, s5
264 ; GFX10-NEXT: s_mov_b32 s4, s6
265 ; GFX10-NEXT: s_mov_b32 s5, s7
266 ; GFX10-NEXT: s_mov_b32 s6, s8
267 ; GFX10-NEXT: s_mov_b32 s7, s9
268 ; GFX10-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
269 ; GFX10-NEXT: s_waitcnt vmcnt(0)
270 ; GFX10-NEXT: ; return to shader part epilog
272 %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
273 %out = bitcast i32 %v to float
277 define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
278 ; GFX9-LABEL: atomic_or_i32_1d:
279 ; GFX9: ; %bb.0: ; %main_body
280 ; GFX9-NEXT: s_mov_b32 s0, s2
281 ; GFX9-NEXT: s_mov_b32 s1, s3
282 ; GFX9-NEXT: s_mov_b32 s2, s4
283 ; GFX9-NEXT: s_mov_b32 s3, s5
284 ; GFX9-NEXT: s_mov_b32 s4, s6
285 ; GFX9-NEXT: s_mov_b32 s5, s7
286 ; GFX9-NEXT: s_mov_b32 s6, s8
287 ; GFX9-NEXT: s_mov_b32 s7, s9
288 ; GFX9-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc a16
289 ; GFX9-NEXT: s_waitcnt vmcnt(0)
290 ; GFX9-NEXT: ; return to shader part epilog
292 ; GFX10-LABEL: atomic_or_i32_1d:
293 ; GFX10: ; %bb.0: ; %main_body
294 ; GFX10-NEXT: s_mov_b32 s0, s2
295 ; GFX10-NEXT: s_mov_b32 s1, s3
296 ; GFX10-NEXT: s_mov_b32 s2, s4
297 ; GFX10-NEXT: s_mov_b32 s3, s5
298 ; GFX10-NEXT: s_mov_b32 s4, s6
299 ; GFX10-NEXT: s_mov_b32 s5, s7
300 ; GFX10-NEXT: s_mov_b32 s6, s8
301 ; GFX10-NEXT: s_mov_b32 s7, s9
302 ; GFX10-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
303 ; GFX10-NEXT: s_waitcnt vmcnt(0)
304 ; GFX10-NEXT: ; return to shader part epilog
306 %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
307 %out = bitcast i32 %v to float
311 define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
312 ; GFX9-LABEL: atomic_xor_i32_1d:
313 ; GFX9: ; %bb.0: ; %main_body
314 ; GFX9-NEXT: s_mov_b32 s0, s2
315 ; GFX9-NEXT: s_mov_b32 s1, s3
316 ; GFX9-NEXT: s_mov_b32 s2, s4
317 ; GFX9-NEXT: s_mov_b32 s3, s5
318 ; GFX9-NEXT: s_mov_b32 s4, s6
319 ; GFX9-NEXT: s_mov_b32 s5, s7
320 ; GFX9-NEXT: s_mov_b32 s6, s8
321 ; GFX9-NEXT: s_mov_b32 s7, s9
322 ; GFX9-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc a16
323 ; GFX9-NEXT: s_waitcnt vmcnt(0)
324 ; GFX9-NEXT: ; return to shader part epilog
326 ; GFX10-LABEL: atomic_xor_i32_1d:
327 ; GFX10: ; %bb.0: ; %main_body
328 ; GFX10-NEXT: s_mov_b32 s0, s2
329 ; GFX10-NEXT: s_mov_b32 s1, s3
330 ; GFX10-NEXT: s_mov_b32 s2, s4
331 ; GFX10-NEXT: s_mov_b32 s3, s5
332 ; GFX10-NEXT: s_mov_b32 s4, s6
333 ; GFX10-NEXT: s_mov_b32 s5, s7
334 ; GFX10-NEXT: s_mov_b32 s6, s8
335 ; GFX10-NEXT: s_mov_b32 s7, s9
336 ; GFX10-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
337 ; GFX10-NEXT: s_waitcnt vmcnt(0)
338 ; GFX10-NEXT: ; return to shader part epilog
340 %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
341 %out = bitcast i32 %v to float
345 define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
346 ; GFX9-LABEL: atomic_inc_i32_1d:
347 ; GFX9: ; %bb.0: ; %main_body
348 ; GFX9-NEXT: s_mov_b32 s0, s2
349 ; GFX9-NEXT: s_mov_b32 s1, s3
350 ; GFX9-NEXT: s_mov_b32 s2, s4
351 ; GFX9-NEXT: s_mov_b32 s3, s5
352 ; GFX9-NEXT: s_mov_b32 s4, s6
353 ; GFX9-NEXT: s_mov_b32 s5, s7
354 ; GFX9-NEXT: s_mov_b32 s6, s8
355 ; GFX9-NEXT: s_mov_b32 s7, s9
356 ; GFX9-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc a16
357 ; GFX9-NEXT: s_waitcnt vmcnt(0)
358 ; GFX9-NEXT: ; return to shader part epilog
360 ; GFX10-LABEL: atomic_inc_i32_1d:
361 ; GFX10: ; %bb.0: ; %main_body
362 ; GFX10-NEXT: s_mov_b32 s0, s2
363 ; GFX10-NEXT: s_mov_b32 s1, s3
364 ; GFX10-NEXT: s_mov_b32 s2, s4
365 ; GFX10-NEXT: s_mov_b32 s3, s5
366 ; GFX10-NEXT: s_mov_b32 s4, s6
367 ; GFX10-NEXT: s_mov_b32 s5, s7
368 ; GFX10-NEXT: s_mov_b32 s6, s8
369 ; GFX10-NEXT: s_mov_b32 s7, s9
370 ; GFX10-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
371 ; GFX10-NEXT: s_waitcnt vmcnt(0)
372 ; GFX10-NEXT: ; return to shader part epilog
374 %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
375 %out = bitcast i32 %v to float
379 define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
380 ; GFX9-LABEL: atomic_dec_i32_1d:
381 ; GFX9: ; %bb.0: ; %main_body
382 ; GFX9-NEXT: s_mov_b32 s0, s2
383 ; GFX9-NEXT: s_mov_b32 s1, s3
384 ; GFX9-NEXT: s_mov_b32 s2, s4
385 ; GFX9-NEXT: s_mov_b32 s3, s5
386 ; GFX9-NEXT: s_mov_b32 s4, s6
387 ; GFX9-NEXT: s_mov_b32 s5, s7
388 ; GFX9-NEXT: s_mov_b32 s6, s8
389 ; GFX9-NEXT: s_mov_b32 s7, s9
390 ; GFX9-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc a16
391 ; GFX9-NEXT: s_waitcnt vmcnt(0)
392 ; GFX9-NEXT: ; return to shader part epilog
394 ; GFX10-LABEL: atomic_dec_i32_1d:
395 ; GFX10: ; %bb.0: ; %main_body
396 ; GFX10-NEXT: s_mov_b32 s0, s2
397 ; GFX10-NEXT: s_mov_b32 s1, s3
398 ; GFX10-NEXT: s_mov_b32 s2, s4
399 ; GFX10-NEXT: s_mov_b32 s3, s5
400 ; GFX10-NEXT: s_mov_b32 s4, s6
401 ; GFX10-NEXT: s_mov_b32 s5, s7
402 ; GFX10-NEXT: s_mov_b32 s6, s8
403 ; GFX10-NEXT: s_mov_b32 s7, s9
404 ; GFX10-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
405 ; GFX10-NEXT: s_waitcnt vmcnt(0)
406 ; GFX10-NEXT: ; return to shader part epilog
408 %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
409 %out = bitcast i32 %v to float
413 define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s) {
414 ; GFX9-LABEL: atomic_cmpswap_i32_1d:
415 ; GFX9: ; %bb.0: ; %main_body
416 ; GFX9-NEXT: s_mov_b32 s0, s2
417 ; GFX9-NEXT: s_mov_b32 s1, s3
418 ; GFX9-NEXT: s_mov_b32 s2, s4
419 ; GFX9-NEXT: s_mov_b32 s3, s5
420 ; GFX9-NEXT: s_mov_b32 s4, s6
421 ; GFX9-NEXT: s_mov_b32 s5, s7
422 ; GFX9-NEXT: s_mov_b32 s6, s8
423 ; GFX9-NEXT: s_mov_b32 s7, s9
424 ; GFX9-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
425 ; GFX9-NEXT: s_waitcnt vmcnt(0)
426 ; GFX9-NEXT: ; return to shader part epilog
428 ; GFX10-LABEL: atomic_cmpswap_i32_1d:
429 ; GFX10: ; %bb.0: ; %main_body
430 ; GFX10-NEXT: s_mov_b32 s0, s2
431 ; GFX10-NEXT: s_mov_b32 s1, s3
432 ; GFX10-NEXT: s_mov_b32 s2, s4
433 ; GFX10-NEXT: s_mov_b32 s3, s5
434 ; GFX10-NEXT: s_mov_b32 s4, s6
435 ; GFX10-NEXT: s_mov_b32 s5, s7
436 ; GFX10-NEXT: s_mov_b32 s6, s8
437 ; GFX10-NEXT: s_mov_b32 s7, s9
438 ; GFX10-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
439 ; GFX10-NEXT: s_waitcnt vmcnt(0)
440 ; GFX10-NEXT: ; return to shader part epilog
442 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
443 %out = bitcast i32 %v to float
447 define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t) {
448 ; GFX9-LABEL: atomic_add_i32_2d:
449 ; GFX9: ; %bb.0: ; %main_body
450 ; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff
451 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
452 ; GFX9-NEXT: s_mov_b32 s0, s2
453 ; GFX9-NEXT: s_mov_b32 s1, s3
454 ; GFX9-NEXT: s_mov_b32 s2, s4
455 ; GFX9-NEXT: s_mov_b32 s3, s5
456 ; GFX9-NEXT: s_mov_b32 s4, s6
457 ; GFX9-NEXT: s_mov_b32 s5, s7
458 ; GFX9-NEXT: s_mov_b32 s6, s8
459 ; GFX9-NEXT: s_mov_b32 s7, s9
460 ; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2
461 ; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16
462 ; GFX9-NEXT: s_waitcnt vmcnt(0)
463 ; GFX9-NEXT: ; return to shader part epilog
465 ; GFX10-LABEL: atomic_add_i32_2d:
466 ; GFX10: ; %bb.0: ; %main_body
467 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
468 ; GFX10-NEXT: s_mov_b32 s0, s2
469 ; GFX10-NEXT: s_mov_b32 s1, s3
470 ; GFX10-NEXT: s_mov_b32 s2, s4
471 ; GFX10-NEXT: s_mov_b32 s3, s5
472 ; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2
473 ; GFX10-NEXT: s_mov_b32 s4, s6
474 ; GFX10-NEXT: s_mov_b32 s5, s7
475 ; GFX10-NEXT: s_mov_b32 s6, s8
476 ; GFX10-NEXT: s_mov_b32 s7, s9
477 ; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc a16
478 ; GFX10-NEXT: s_waitcnt vmcnt(0)
479 ; GFX10-NEXT: ; return to shader part epilog
481 %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
482 %out = bitcast i32 %v to float
486 define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %r) {
487 ; GFX9-LABEL: atomic_add_i32_3d:
488 ; GFX9: ; %bb.0: ; %main_body
489 ; GFX9-NEXT: s_mov_b32 s0, s2
490 ; GFX9-NEXT: s_mov_b32 s2, s4
491 ; GFX9-NEXT: s_mov_b32 s4, s6
492 ; GFX9-NEXT: s_mov_b32 s6, s8
493 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff
494 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
495 ; GFX9-NEXT: s_lshl_b32 s8, s0, 16
496 ; GFX9-NEXT: s_mov_b32 s1, s3
497 ; GFX9-NEXT: s_mov_b32 s3, s5
498 ; GFX9-NEXT: s_mov_b32 s5, s7
499 ; GFX9-NEXT: s_mov_b32 s7, s9
500 ; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2
501 ; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8
502 ; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16
503 ; GFX9-NEXT: s_waitcnt vmcnt(0)
504 ; GFX9-NEXT: ; return to shader part epilog
506 ; GFX10-LABEL: atomic_add_i32_3d:
507 ; GFX10: ; %bb.0: ; %main_body
508 ; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff
509 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
510 ; GFX10-NEXT: s_mov_b32 s0, s2
511 ; GFX10-NEXT: s_mov_b32 s2, s4
512 ; GFX10-NEXT: s_mov_b32 s4, s6
513 ; GFX10-NEXT: s_mov_b32 s6, s8
514 ; GFX10-NEXT: s_lshl_b32 s8, s0, 16
515 ; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2
516 ; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8
517 ; GFX10-NEXT: s_mov_b32 s1, s3
518 ; GFX10-NEXT: s_mov_b32 s3, s5
519 ; GFX10-NEXT: s_mov_b32 s5, s7
520 ; GFX10-NEXT: s_mov_b32 s7, s9
521 ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc a16
522 ; GFX10-NEXT: s_waitcnt vmcnt(0)
523 ; GFX10-NEXT: ; return to shader part epilog
525 %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
526 %out = bitcast i32 %v to float
530 define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %face) {
531 ; GFX9-LABEL: atomic_add_i32_cube:
532 ; GFX9: ; %bb.0: ; %main_body
533 ; GFX9-NEXT: s_mov_b32 s0, s2
534 ; GFX9-NEXT: s_mov_b32 s2, s4
535 ; GFX9-NEXT: s_mov_b32 s4, s6
536 ; GFX9-NEXT: s_mov_b32 s6, s8
537 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff
538 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
539 ; GFX9-NEXT: s_lshl_b32 s8, s0, 16
540 ; GFX9-NEXT: s_mov_b32 s1, s3
541 ; GFX9-NEXT: s_mov_b32 s3, s5
542 ; GFX9-NEXT: s_mov_b32 s5, s7
543 ; GFX9-NEXT: s_mov_b32 s7, s9
544 ; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2
545 ; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8
546 ; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da
547 ; GFX9-NEXT: s_waitcnt vmcnt(0)
548 ; GFX9-NEXT: ; return to shader part epilog
550 ; GFX10-LABEL: atomic_add_i32_cube:
551 ; GFX10: ; %bb.0: ; %main_body
552 ; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff
553 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
554 ; GFX10-NEXT: s_mov_b32 s0, s2
555 ; GFX10-NEXT: s_mov_b32 s2, s4
556 ; GFX10-NEXT: s_mov_b32 s4, s6
557 ; GFX10-NEXT: s_mov_b32 s6, s8
558 ; GFX10-NEXT: s_lshl_b32 s8, s0, 16
559 ; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2
560 ; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8
561 ; GFX10-NEXT: s_mov_b32 s1, s3
562 ; GFX10-NEXT: s_mov_b32 s3, s5
563 ; GFX10-NEXT: s_mov_b32 s5, s7
564 ; GFX10-NEXT: s_mov_b32 s7, s9
565 ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc a16
566 ; GFX10-NEXT: s_waitcnt vmcnt(0)
567 ; GFX10-NEXT: ; return to shader part epilog
569 %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0)
570 %out = bitcast i32 %v to float
574 define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %slice) {
575 ; GFX9-LABEL: atomic_add_i32_1darray:
576 ; GFX9: ; %bb.0: ; %main_body
577 ; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff
578 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
579 ; GFX9-NEXT: s_mov_b32 s0, s2
580 ; GFX9-NEXT: s_mov_b32 s1, s3
581 ; GFX9-NEXT: s_mov_b32 s2, s4
582 ; GFX9-NEXT: s_mov_b32 s3, s5
583 ; GFX9-NEXT: s_mov_b32 s4, s6
584 ; GFX9-NEXT: s_mov_b32 s5, s7
585 ; GFX9-NEXT: s_mov_b32 s6, s8
586 ; GFX9-NEXT: s_mov_b32 s7, s9
587 ; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2
588 ; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 da
589 ; GFX9-NEXT: s_waitcnt vmcnt(0)
590 ; GFX9-NEXT: ; return to shader part epilog
592 ; GFX10-LABEL: atomic_add_i32_1darray:
593 ; GFX10: ; %bb.0: ; %main_body
594 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
595 ; GFX10-NEXT: s_mov_b32 s0, s2
596 ; GFX10-NEXT: s_mov_b32 s1, s3
597 ; GFX10-NEXT: s_mov_b32 s2, s4
598 ; GFX10-NEXT: s_mov_b32 s3, s5
599 ; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2
600 ; GFX10-NEXT: s_mov_b32 s4, s6
601 ; GFX10-NEXT: s_mov_b32 s5, s7
602 ; GFX10-NEXT: s_mov_b32 s6, s8
603 ; GFX10-NEXT: s_mov_b32 s7, s9
604 ; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16
605 ; GFX10-NEXT: s_waitcnt vmcnt(0)
606 ; GFX10-NEXT: ; return to shader part epilog
608 %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
609 %out = bitcast i32 %v to float
613 define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice) {
614 ; GFX9-LABEL: atomic_add_i32_2darray:
615 ; GFX9: ; %bb.0: ; %main_body
616 ; GFX9-NEXT: s_mov_b32 s0, s2
617 ; GFX9-NEXT: s_mov_b32 s2, s4
618 ; GFX9-NEXT: s_mov_b32 s4, s6
619 ; GFX9-NEXT: s_mov_b32 s6, s8
620 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff
621 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
622 ; GFX9-NEXT: s_lshl_b32 s8, s0, 16
623 ; GFX9-NEXT: s_mov_b32 s1, s3
624 ; GFX9-NEXT: s_mov_b32 s3, s5
625 ; GFX9-NEXT: s_mov_b32 s5, s7
626 ; GFX9-NEXT: s_mov_b32 s7, s9
627 ; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2
628 ; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8
629 ; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da
630 ; GFX9-NEXT: s_waitcnt vmcnt(0)
631 ; GFX9-NEXT: ; return to shader part epilog
633 ; GFX10-LABEL: atomic_add_i32_2darray:
634 ; GFX10: ; %bb.0: ; %main_body
635 ; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff
636 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
637 ; GFX10-NEXT: s_mov_b32 s0, s2
638 ; GFX10-NEXT: s_mov_b32 s2, s4
639 ; GFX10-NEXT: s_mov_b32 s4, s6
640 ; GFX10-NEXT: s_mov_b32 s6, s8
641 ; GFX10-NEXT: s_lshl_b32 s8, s0, 16
642 ; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2
643 ; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8
644 ; GFX10-NEXT: s_mov_b32 s1, s3
645 ; GFX10-NEXT: s_mov_b32 s3, s5
646 ; GFX10-NEXT: s_mov_b32 s5, s7
647 ; GFX10-NEXT: s_mov_b32 s7, s9
648 ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16
649 ; GFX10-NEXT: s_waitcnt vmcnt(0)
650 ; GFX10-NEXT: ; return to shader part epilog
652 %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
653 %out = bitcast i32 %v to float
657 define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %fragid) {
658 ; GFX9-LABEL: atomic_add_i32_2dmsaa:
659 ; GFX9: ; %bb.0: ; %main_body
660 ; GFX9-NEXT: s_mov_b32 s0, s2
661 ; GFX9-NEXT: s_mov_b32 s2, s4
662 ; GFX9-NEXT: s_mov_b32 s4, s6
663 ; GFX9-NEXT: s_mov_b32 s6, s8
664 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff
665 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
666 ; GFX9-NEXT: s_lshl_b32 s8, s0, 16
667 ; GFX9-NEXT: s_mov_b32 s1, s3
668 ; GFX9-NEXT: s_mov_b32 s3, s5
669 ; GFX9-NEXT: s_mov_b32 s5, s7
670 ; GFX9-NEXT: s_mov_b32 s7, s9
671 ; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2
672 ; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8
673 ; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16
674 ; GFX9-NEXT: s_waitcnt vmcnt(0)
675 ; GFX9-NEXT: ; return to shader part epilog
677 ; GFX10-LABEL: atomic_add_i32_2dmsaa:
678 ; GFX10: ; %bb.0: ; %main_body
679 ; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff
680 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
681 ; GFX10-NEXT: s_mov_b32 s0, s2
682 ; GFX10-NEXT: s_mov_b32 s2, s4
683 ; GFX10-NEXT: s_mov_b32 s4, s6
684 ; GFX10-NEXT: s_mov_b32 s6, s8
685 ; GFX10-NEXT: s_lshl_b32 s8, s0, 16
686 ; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2
687 ; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8
688 ; GFX10-NEXT: s_mov_b32 s1, s3
689 ; GFX10-NEXT: s_mov_b32 s3, s5
690 ; GFX10-NEXT: s_mov_b32 s5, s7
691 ; GFX10-NEXT: s_mov_b32 s7, s9
692 ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16
693 ; GFX10-NEXT: s_waitcnt vmcnt(0)
694 ; GFX10-NEXT: ; return to shader part epilog
696 %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
697 %out = bitcast i32 %v to float
701 define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
702 ; GFX9-LABEL: atomic_add_i32_2darraymsaa:
703 ; GFX9: ; %bb.0: ; %main_body
704 ; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff
705 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
706 ; GFX9-NEXT: v_and_or_b32 v1, v1, v5, v2
707 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v4
708 ; GFX9-NEXT: s_mov_b32 s0, s2
709 ; GFX9-NEXT: s_mov_b32 s1, s3
710 ; GFX9-NEXT: s_mov_b32 s2, s4
711 ; GFX9-NEXT: s_mov_b32 s3, s5
712 ; GFX9-NEXT: s_mov_b32 s4, s6
713 ; GFX9-NEXT: s_mov_b32 s5, s7
714 ; GFX9-NEXT: s_mov_b32 s6, s8
715 ; GFX9-NEXT: s_mov_b32 s7, s9
716 ; GFX9-NEXT: v_and_or_b32 v2, v3, v5, v2
717 ; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da
718 ; GFX9-NEXT: s_waitcnt vmcnt(0)
719 ; GFX9-NEXT: ; return to shader part epilog
721 ; GFX10-LABEL: atomic_add_i32_2darraymsaa:
722 ; GFX10: ; %bb.0: ; %main_body
723 ; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff
724 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
725 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
726 ; GFX10-NEXT: s_mov_b32 s0, s2
727 ; GFX10-NEXT: s_mov_b32 s1, s3
728 ; GFX10-NEXT: s_mov_b32 s2, s4
729 ; GFX10-NEXT: v_and_or_b32 v1, v1, v5, v2
730 ; GFX10-NEXT: v_and_or_b32 v2, v3, v5, v4
731 ; GFX10-NEXT: s_mov_b32 s3, s5
732 ; GFX10-NEXT: s_mov_b32 s4, s6
733 ; GFX10-NEXT: s_mov_b32 s5, s7
734 ; GFX10-NEXT: s_mov_b32 s6, s8
735 ; GFX10-NEXT: s_mov_b32 s7, s9
736 ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16
737 ; GFX10-NEXT: s_waitcnt vmcnt(0)
738 ; GFX10-NEXT: ; return to shader part epilog
740 %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
741 %out = bitcast i32 %v to float
745 define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
746 ; GFX9-LABEL: atomic_add_i32_1d_slc:
747 ; GFX9: ; %bb.0: ; %main_body
748 ; GFX9-NEXT: s_mov_b32 s0, s2
749 ; GFX9-NEXT: s_mov_b32 s1, s3
750 ; GFX9-NEXT: s_mov_b32 s2, s4
751 ; GFX9-NEXT: s_mov_b32 s3, s5
752 ; GFX9-NEXT: s_mov_b32 s4, s6
753 ; GFX9-NEXT: s_mov_b32 s5, s7
754 ; GFX9-NEXT: s_mov_b32 s6, s8
755 ; GFX9-NEXT: s_mov_b32 s7, s9
756 ; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc a16
757 ; GFX9-NEXT: s_waitcnt vmcnt(0)
758 ; GFX9-NEXT: ; return to shader part epilog
760 ; GFX10-LABEL: atomic_add_i32_1d_slc:
761 ; GFX10: ; %bb.0: ; %main_body
762 ; GFX10-NEXT: s_mov_b32 s0, s2
763 ; GFX10-NEXT: s_mov_b32 s1, s3
764 ; GFX10-NEXT: s_mov_b32 s2, s4
765 ; GFX10-NEXT: s_mov_b32 s3, s5
766 ; GFX10-NEXT: s_mov_b32 s4, s6
767 ; GFX10-NEXT: s_mov_b32 s5, s7
768 ; GFX10-NEXT: s_mov_b32 s6, s8
769 ; GFX10-NEXT: s_mov_b32 s7, s9
770 ; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc a16
771 ; GFX10-NEXT: s_waitcnt vmcnt(0)
772 ; GFX10-NEXT: ; return to shader part epilog
774 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
775 %out = bitcast i32 %v to float
779 define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
780 ; GFX9-LABEL: atomic_swap_i64_1d:
781 ; GFX9: ; %bb.0: ; %main_body
782 ; GFX9-NEXT: s_mov_b32 s0, s2
783 ; GFX9-NEXT: s_mov_b32 s1, s3
784 ; GFX9-NEXT: s_mov_b32 s2, s4
785 ; GFX9-NEXT: s_mov_b32 s3, s5
786 ; GFX9-NEXT: s_mov_b32 s4, s6
787 ; GFX9-NEXT: s_mov_b32 s5, s7
788 ; GFX9-NEXT: s_mov_b32 s6, s8
789 ; GFX9-NEXT: s_mov_b32 s7, s9
790 ; GFX9-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
791 ; GFX9-NEXT: s_waitcnt vmcnt(0)
792 ; GFX9-NEXT: ; return to shader part epilog
794 ; GFX10-LABEL: atomic_swap_i64_1d:
795 ; GFX10: ; %bb.0: ; %main_body
796 ; GFX10-NEXT: s_mov_b32 s0, s2
797 ; GFX10-NEXT: s_mov_b32 s1, s3
798 ; GFX10-NEXT: s_mov_b32 s2, s4
799 ; GFX10-NEXT: s_mov_b32 s3, s5
800 ; GFX10-NEXT: s_mov_b32 s4, s6
801 ; GFX10-NEXT: s_mov_b32 s5, s7
802 ; GFX10-NEXT: s_mov_b32 s6, s8
803 ; GFX10-NEXT: s_mov_b32 s7, s9
804 ; GFX10-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
805 ; GFX10-NEXT: s_waitcnt vmcnt(0)
806 ; GFX10-NEXT: ; return to shader part epilog
808 %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
809 %out = bitcast i64 %v to <2 x float>
813 define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
814 ; GFX9-LABEL: atomic_add_i64_1d:
815 ; GFX9: ; %bb.0: ; %main_body
816 ; GFX9-NEXT: s_mov_b32 s0, s2
817 ; GFX9-NEXT: s_mov_b32 s1, s3
818 ; GFX9-NEXT: s_mov_b32 s2, s4
819 ; GFX9-NEXT: s_mov_b32 s3, s5
820 ; GFX9-NEXT: s_mov_b32 s4, s6
821 ; GFX9-NEXT: s_mov_b32 s5, s7
822 ; GFX9-NEXT: s_mov_b32 s6, s8
823 ; GFX9-NEXT: s_mov_b32 s7, s9
824 ; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
825 ; GFX9-NEXT: s_waitcnt vmcnt(0)
826 ; GFX9-NEXT: ; return to shader part epilog
828 ; GFX10-LABEL: atomic_add_i64_1d:
829 ; GFX10: ; %bb.0: ; %main_body
830 ; GFX10-NEXT: s_mov_b32 s0, s2
831 ; GFX10-NEXT: s_mov_b32 s1, s3
832 ; GFX10-NEXT: s_mov_b32 s2, s4
833 ; GFX10-NEXT: s_mov_b32 s3, s5
834 ; GFX10-NEXT: s_mov_b32 s4, s6
835 ; GFX10-NEXT: s_mov_b32 s5, s7
836 ; GFX10-NEXT: s_mov_b32 s6, s8
837 ; GFX10-NEXT: s_mov_b32 s7, s9
838 ; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
839 ; GFX10-NEXT: s_waitcnt vmcnt(0)
840 ; GFX10-NEXT: ; return to shader part epilog
842 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
843 %out = bitcast i64 %v to <2 x float>
847 define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
848 ; GFX9-LABEL: atomic_sub_i64_1d:
849 ; GFX9: ; %bb.0: ; %main_body
850 ; GFX9-NEXT: s_mov_b32 s0, s2
851 ; GFX9-NEXT: s_mov_b32 s1, s3
852 ; GFX9-NEXT: s_mov_b32 s2, s4
853 ; GFX9-NEXT: s_mov_b32 s3, s5
854 ; GFX9-NEXT: s_mov_b32 s4, s6
855 ; GFX9-NEXT: s_mov_b32 s5, s7
856 ; GFX9-NEXT: s_mov_b32 s6, s8
857 ; GFX9-NEXT: s_mov_b32 s7, s9
858 ; GFX9-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
859 ; GFX9-NEXT: s_waitcnt vmcnt(0)
860 ; GFX9-NEXT: ; return to shader part epilog
862 ; GFX10-LABEL: atomic_sub_i64_1d:
863 ; GFX10: ; %bb.0: ; %main_body
864 ; GFX10-NEXT: s_mov_b32 s0, s2
865 ; GFX10-NEXT: s_mov_b32 s1, s3
866 ; GFX10-NEXT: s_mov_b32 s2, s4
867 ; GFX10-NEXT: s_mov_b32 s3, s5
868 ; GFX10-NEXT: s_mov_b32 s4, s6
869 ; GFX10-NEXT: s_mov_b32 s5, s7
870 ; GFX10-NEXT: s_mov_b32 s6, s8
871 ; GFX10-NEXT: s_mov_b32 s7, s9
872 ; GFX10-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
873 ; GFX10-NEXT: s_waitcnt vmcnt(0)
874 ; GFX10-NEXT: ; return to shader part epilog
876 %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
877 %out = bitcast i64 %v to <2 x float>
881 define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
882 ; GFX9-LABEL: atomic_smin_i64_1d:
883 ; GFX9: ; %bb.0: ; %main_body
884 ; GFX9-NEXT: s_mov_b32 s0, s2
885 ; GFX9-NEXT: s_mov_b32 s1, s3
886 ; GFX9-NEXT: s_mov_b32 s2, s4
887 ; GFX9-NEXT: s_mov_b32 s3, s5
888 ; GFX9-NEXT: s_mov_b32 s4, s6
889 ; GFX9-NEXT: s_mov_b32 s5, s7
890 ; GFX9-NEXT: s_mov_b32 s6, s8
891 ; GFX9-NEXT: s_mov_b32 s7, s9
892 ; GFX9-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
893 ; GFX9-NEXT: s_waitcnt vmcnt(0)
894 ; GFX9-NEXT: ; return to shader part epilog
896 ; GFX10-LABEL: atomic_smin_i64_1d:
897 ; GFX10: ; %bb.0: ; %main_body
898 ; GFX10-NEXT: s_mov_b32 s0, s2
899 ; GFX10-NEXT: s_mov_b32 s1, s3
900 ; GFX10-NEXT: s_mov_b32 s2, s4
901 ; GFX10-NEXT: s_mov_b32 s3, s5
902 ; GFX10-NEXT: s_mov_b32 s4, s6
903 ; GFX10-NEXT: s_mov_b32 s5, s7
904 ; GFX10-NEXT: s_mov_b32 s6, s8
905 ; GFX10-NEXT: s_mov_b32 s7, s9
906 ; GFX10-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
907 ; GFX10-NEXT: s_waitcnt vmcnt(0)
908 ; GFX10-NEXT: ; return to shader part epilog
910 %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
911 %out = bitcast i64 %v to <2 x float>
915 define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
916 ; GFX9-LABEL: atomic_umin_i64_1d:
917 ; GFX9: ; %bb.0: ; %main_body
918 ; GFX9-NEXT: s_mov_b32 s0, s2
919 ; GFX9-NEXT: s_mov_b32 s1, s3
920 ; GFX9-NEXT: s_mov_b32 s2, s4
921 ; GFX9-NEXT: s_mov_b32 s3, s5
922 ; GFX9-NEXT: s_mov_b32 s4, s6
923 ; GFX9-NEXT: s_mov_b32 s5, s7
924 ; GFX9-NEXT: s_mov_b32 s6, s8
925 ; GFX9-NEXT: s_mov_b32 s7, s9
926 ; GFX9-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
927 ; GFX9-NEXT: s_waitcnt vmcnt(0)
928 ; GFX9-NEXT: ; return to shader part epilog
930 ; GFX10-LABEL: atomic_umin_i64_1d:
931 ; GFX10: ; %bb.0: ; %main_body
932 ; GFX10-NEXT: s_mov_b32 s0, s2
933 ; GFX10-NEXT: s_mov_b32 s1, s3
934 ; GFX10-NEXT: s_mov_b32 s2, s4
935 ; GFX10-NEXT: s_mov_b32 s3, s5
936 ; GFX10-NEXT: s_mov_b32 s4, s6
937 ; GFX10-NEXT: s_mov_b32 s5, s7
938 ; GFX10-NEXT: s_mov_b32 s6, s8
939 ; GFX10-NEXT: s_mov_b32 s7, s9
940 ; GFX10-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
941 ; GFX10-NEXT: s_waitcnt vmcnt(0)
942 ; GFX10-NEXT: ; return to shader part epilog
944 %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
945 %out = bitcast i64 %v to <2 x float>
949 define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
950 ; GFX9-LABEL: atomic_smax_i64_1d:
951 ; GFX9: ; %bb.0: ; %main_body
952 ; GFX9-NEXT: s_mov_b32 s0, s2
953 ; GFX9-NEXT: s_mov_b32 s1, s3
954 ; GFX9-NEXT: s_mov_b32 s2, s4
955 ; GFX9-NEXT: s_mov_b32 s3, s5
956 ; GFX9-NEXT: s_mov_b32 s4, s6
957 ; GFX9-NEXT: s_mov_b32 s5, s7
958 ; GFX9-NEXT: s_mov_b32 s6, s8
959 ; GFX9-NEXT: s_mov_b32 s7, s9
960 ; GFX9-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
961 ; GFX9-NEXT: s_waitcnt vmcnt(0)
962 ; GFX9-NEXT: ; return to shader part epilog
964 ; GFX10-LABEL: atomic_smax_i64_1d:
965 ; GFX10: ; %bb.0: ; %main_body
966 ; GFX10-NEXT: s_mov_b32 s0, s2
967 ; GFX10-NEXT: s_mov_b32 s1, s3
968 ; GFX10-NEXT: s_mov_b32 s2, s4
969 ; GFX10-NEXT: s_mov_b32 s3, s5
970 ; GFX10-NEXT: s_mov_b32 s4, s6
971 ; GFX10-NEXT: s_mov_b32 s5, s7
972 ; GFX10-NEXT: s_mov_b32 s6, s8
973 ; GFX10-NEXT: s_mov_b32 s7, s9
974 ; GFX10-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
975 ; GFX10-NEXT: s_waitcnt vmcnt(0)
976 ; GFX10-NEXT: ; return to shader part epilog
978 %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
979 %out = bitcast i64 %v to <2 x float>
983 define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
984 ; GFX9-LABEL: atomic_umax_i64_1d:
985 ; GFX9: ; %bb.0: ; %main_body
986 ; GFX9-NEXT: s_mov_b32 s0, s2
987 ; GFX9-NEXT: s_mov_b32 s1, s3
988 ; GFX9-NEXT: s_mov_b32 s2, s4
989 ; GFX9-NEXT: s_mov_b32 s3, s5
990 ; GFX9-NEXT: s_mov_b32 s4, s6
991 ; GFX9-NEXT: s_mov_b32 s5, s7
992 ; GFX9-NEXT: s_mov_b32 s6, s8
993 ; GFX9-NEXT: s_mov_b32 s7, s9
994 ; GFX9-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
995 ; GFX9-NEXT: s_waitcnt vmcnt(0)
996 ; GFX9-NEXT: ; return to shader part epilog
998 ; GFX10-LABEL: atomic_umax_i64_1d:
999 ; GFX10: ; %bb.0: ; %main_body
1000 ; GFX10-NEXT: s_mov_b32 s0, s2
1001 ; GFX10-NEXT: s_mov_b32 s1, s3
1002 ; GFX10-NEXT: s_mov_b32 s2, s4
1003 ; GFX10-NEXT: s_mov_b32 s3, s5
1004 ; GFX10-NEXT: s_mov_b32 s4, s6
1005 ; GFX10-NEXT: s_mov_b32 s5, s7
1006 ; GFX10-NEXT: s_mov_b32 s6, s8
1007 ; GFX10-NEXT: s_mov_b32 s7, s9
1008 ; GFX10-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1009 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1010 ; GFX10-NEXT: ; return to shader part epilog
1012 %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1013 %out = bitcast i64 %v to <2 x float>
1014 ret <2 x float> %out
1017 define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1018 ; GFX9-LABEL: atomic_and_i64_1d:
1019 ; GFX9: ; %bb.0: ; %main_body
1020 ; GFX9-NEXT: s_mov_b32 s0, s2
1021 ; GFX9-NEXT: s_mov_b32 s1, s3
1022 ; GFX9-NEXT: s_mov_b32 s2, s4
1023 ; GFX9-NEXT: s_mov_b32 s3, s5
1024 ; GFX9-NEXT: s_mov_b32 s4, s6
1025 ; GFX9-NEXT: s_mov_b32 s5, s7
1026 ; GFX9-NEXT: s_mov_b32 s6, s8
1027 ; GFX9-NEXT: s_mov_b32 s7, s9
1028 ; GFX9-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1029 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1030 ; GFX9-NEXT: ; return to shader part epilog
1032 ; GFX10-LABEL: atomic_and_i64_1d:
1033 ; GFX10: ; %bb.0: ; %main_body
1034 ; GFX10-NEXT: s_mov_b32 s0, s2
1035 ; GFX10-NEXT: s_mov_b32 s1, s3
1036 ; GFX10-NEXT: s_mov_b32 s2, s4
1037 ; GFX10-NEXT: s_mov_b32 s3, s5
1038 ; GFX10-NEXT: s_mov_b32 s4, s6
1039 ; GFX10-NEXT: s_mov_b32 s5, s7
1040 ; GFX10-NEXT: s_mov_b32 s6, s8
1041 ; GFX10-NEXT: s_mov_b32 s7, s9
1042 ; GFX10-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1043 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1044 ; GFX10-NEXT: ; return to shader part epilog
1046 %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1047 %out = bitcast i64 %v to <2 x float>
1048 ret <2 x float> %out
1051 define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1052 ; GFX9-LABEL: atomic_or_i64_1d:
1053 ; GFX9: ; %bb.0: ; %main_body
1054 ; GFX9-NEXT: s_mov_b32 s0, s2
1055 ; GFX9-NEXT: s_mov_b32 s1, s3
1056 ; GFX9-NEXT: s_mov_b32 s2, s4
1057 ; GFX9-NEXT: s_mov_b32 s3, s5
1058 ; GFX9-NEXT: s_mov_b32 s4, s6
1059 ; GFX9-NEXT: s_mov_b32 s5, s7
1060 ; GFX9-NEXT: s_mov_b32 s6, s8
1061 ; GFX9-NEXT: s_mov_b32 s7, s9
1062 ; GFX9-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1063 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1064 ; GFX9-NEXT: ; return to shader part epilog
1066 ; GFX10-LABEL: atomic_or_i64_1d:
1067 ; GFX10: ; %bb.0: ; %main_body
1068 ; GFX10-NEXT: s_mov_b32 s0, s2
1069 ; GFX10-NEXT: s_mov_b32 s1, s3
1070 ; GFX10-NEXT: s_mov_b32 s2, s4
1071 ; GFX10-NEXT: s_mov_b32 s3, s5
1072 ; GFX10-NEXT: s_mov_b32 s4, s6
1073 ; GFX10-NEXT: s_mov_b32 s5, s7
1074 ; GFX10-NEXT: s_mov_b32 s6, s8
1075 ; GFX10-NEXT: s_mov_b32 s7, s9
1076 ; GFX10-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1077 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1078 ; GFX10-NEXT: ; return to shader part epilog
1080 %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1081 %out = bitcast i64 %v to <2 x float>
1082 ret <2 x float> %out
1085 define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1086 ; GFX9-LABEL: atomic_xor_i64_1d:
1087 ; GFX9: ; %bb.0: ; %main_body
1088 ; GFX9-NEXT: s_mov_b32 s0, s2
1089 ; GFX9-NEXT: s_mov_b32 s1, s3
1090 ; GFX9-NEXT: s_mov_b32 s2, s4
1091 ; GFX9-NEXT: s_mov_b32 s3, s5
1092 ; GFX9-NEXT: s_mov_b32 s4, s6
1093 ; GFX9-NEXT: s_mov_b32 s5, s7
1094 ; GFX9-NEXT: s_mov_b32 s6, s8
1095 ; GFX9-NEXT: s_mov_b32 s7, s9
1096 ; GFX9-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1097 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1098 ; GFX9-NEXT: ; return to shader part epilog
1100 ; GFX10-LABEL: atomic_xor_i64_1d:
1101 ; GFX10: ; %bb.0: ; %main_body
1102 ; GFX10-NEXT: s_mov_b32 s0, s2
1103 ; GFX10-NEXT: s_mov_b32 s1, s3
1104 ; GFX10-NEXT: s_mov_b32 s2, s4
1105 ; GFX10-NEXT: s_mov_b32 s3, s5
1106 ; GFX10-NEXT: s_mov_b32 s4, s6
1107 ; GFX10-NEXT: s_mov_b32 s5, s7
1108 ; GFX10-NEXT: s_mov_b32 s6, s8
1109 ; GFX10-NEXT: s_mov_b32 s7, s9
1110 ; GFX10-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1111 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1112 ; GFX10-NEXT: ; return to shader part epilog
1114 %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1115 %out = bitcast i64 %v to <2 x float>
1116 ret <2 x float> %out
1119 define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1120 ; GFX9-LABEL: atomic_inc_i64_1d:
1121 ; GFX9: ; %bb.0: ; %main_body
1122 ; GFX9-NEXT: s_mov_b32 s0, s2
1123 ; GFX9-NEXT: s_mov_b32 s1, s3
1124 ; GFX9-NEXT: s_mov_b32 s2, s4
1125 ; GFX9-NEXT: s_mov_b32 s3, s5
1126 ; GFX9-NEXT: s_mov_b32 s4, s6
1127 ; GFX9-NEXT: s_mov_b32 s5, s7
1128 ; GFX9-NEXT: s_mov_b32 s6, s8
1129 ; GFX9-NEXT: s_mov_b32 s7, s9
1130 ; GFX9-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1131 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1132 ; GFX9-NEXT: ; return to shader part epilog
1134 ; GFX10-LABEL: atomic_inc_i64_1d:
1135 ; GFX10: ; %bb.0: ; %main_body
1136 ; GFX10-NEXT: s_mov_b32 s0, s2
1137 ; GFX10-NEXT: s_mov_b32 s1, s3
1138 ; GFX10-NEXT: s_mov_b32 s2, s4
1139 ; GFX10-NEXT: s_mov_b32 s3, s5
1140 ; GFX10-NEXT: s_mov_b32 s4, s6
1141 ; GFX10-NEXT: s_mov_b32 s5, s7
1142 ; GFX10-NEXT: s_mov_b32 s6, s8
1143 ; GFX10-NEXT: s_mov_b32 s7, s9
1144 ; GFX10-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1145 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1146 ; GFX10-NEXT: ; return to shader part epilog
1148 %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1149 %out = bitcast i64 %v to <2 x float>
1150 ret <2 x float> %out
1153 define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1154 ; GFX9-LABEL: atomic_dec_i64_1d:
1155 ; GFX9: ; %bb.0: ; %main_body
1156 ; GFX9-NEXT: s_mov_b32 s0, s2
1157 ; GFX9-NEXT: s_mov_b32 s1, s3
1158 ; GFX9-NEXT: s_mov_b32 s2, s4
1159 ; GFX9-NEXT: s_mov_b32 s3, s5
1160 ; GFX9-NEXT: s_mov_b32 s4, s6
1161 ; GFX9-NEXT: s_mov_b32 s5, s7
1162 ; GFX9-NEXT: s_mov_b32 s6, s8
1163 ; GFX9-NEXT: s_mov_b32 s7, s9
1164 ; GFX9-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1165 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1166 ; GFX9-NEXT: ; return to shader part epilog
1168 ; GFX10-LABEL: atomic_dec_i64_1d:
1169 ; GFX10: ; %bb.0: ; %main_body
1170 ; GFX10-NEXT: s_mov_b32 s0, s2
1171 ; GFX10-NEXT: s_mov_b32 s1, s3
1172 ; GFX10-NEXT: s_mov_b32 s2, s4
1173 ; GFX10-NEXT: s_mov_b32 s3, s5
1174 ; GFX10-NEXT: s_mov_b32 s4, s6
1175 ; GFX10-NEXT: s_mov_b32 s5, s7
1176 ; GFX10-NEXT: s_mov_b32 s6, s8
1177 ; GFX10-NEXT: s_mov_b32 s7, s9
1178 ; GFX10-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1179 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1180 ; GFX10-NEXT: ; return to shader part epilog
1182 %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1183 %out = bitcast i64 %v to <2 x float>
1184 ret <2 x float> %out
1187 define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i16 %s) {
1188 ; GFX9-LABEL: atomic_cmpswap_i64_1d:
1189 ; GFX9: ; %bb.0: ; %main_body
1190 ; GFX9-NEXT: s_mov_b32 s0, s2
1191 ; GFX9-NEXT: s_mov_b32 s1, s3
1192 ; GFX9-NEXT: s_mov_b32 s2, s4
1193 ; GFX9-NEXT: s_mov_b32 s3, s5
1194 ; GFX9-NEXT: s_mov_b32 s4, s6
1195 ; GFX9-NEXT: s_mov_b32 s5, s7
1196 ; GFX9-NEXT: s_mov_b32 s6, s8
1197 ; GFX9-NEXT: s_mov_b32 s7, s9
1198 ; GFX9-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc a16
1199 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1200 ; GFX9-NEXT: ; return to shader part epilog
1202 ; GFX10-LABEL: atomic_cmpswap_i64_1d:
1203 ; GFX10: ; %bb.0: ; %main_body
1204 ; GFX10-NEXT: s_mov_b32 s0, s2
1205 ; GFX10-NEXT: s_mov_b32 s1, s3
1206 ; GFX10-NEXT: s_mov_b32 s2, s4
1207 ; GFX10-NEXT: s_mov_b32 s3, s5
1208 ; GFX10-NEXT: s_mov_b32 s4, s6
1209 ; GFX10-NEXT: s_mov_b32 s5, s7
1210 ; GFX10-NEXT: s_mov_b32 s6, s8
1211 ; GFX10-NEXT: s_mov_b32 s7, s9
1212 ; GFX10-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16
1213 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1214 ; GFX10-NEXT: ; return to shader part epilog
1216 %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64 %cmp, i64 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1217 %out = bitcast i64 %v to <2 x float>
1218 ret <2 x float> %out
1221 define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t) {
1222 ; GFX9-LABEL: atomic_add_i64_2d:
1223 ; GFX9: ; %bb.0: ; %main_body
1224 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff
1225 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1226 ; GFX9-NEXT: s_mov_b32 s0, s2
1227 ; GFX9-NEXT: s_mov_b32 s1, s3
1228 ; GFX9-NEXT: s_mov_b32 s2, s4
1229 ; GFX9-NEXT: s_mov_b32 s3, s5
1230 ; GFX9-NEXT: s_mov_b32 s4, s6
1231 ; GFX9-NEXT: s_mov_b32 s5, s7
1232 ; GFX9-NEXT: s_mov_b32 s6, s8
1233 ; GFX9-NEXT: s_mov_b32 s7, s9
1234 ; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3
1235 ; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1236 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1237 ; GFX9-NEXT: ; return to shader part epilog
1239 ; GFX10-LABEL: atomic_add_i64_2d:
1240 ; GFX10: ; %bb.0: ; %main_body
1241 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1242 ; GFX10-NEXT: s_mov_b32 s0, s2
1243 ; GFX10-NEXT: s_mov_b32 s1, s3
1244 ; GFX10-NEXT: s_mov_b32 s2, s4
1245 ; GFX10-NEXT: s_mov_b32 s3, s5
1246 ; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v2, v3
1247 ; GFX10-NEXT: s_mov_b32 s4, s6
1248 ; GFX10-NEXT: s_mov_b32 s5, s7
1249 ; GFX10-NEXT: s_mov_b32 s6, s8
1250 ; GFX10-NEXT: s_mov_b32 s7, s9
1251 ; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc a16
1252 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1253 ; GFX10-NEXT: ; return to shader part epilog
1255 %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
1256 %out = bitcast i64 %v to <2 x float>
1257 ret <2 x float> %out
1260 define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %r) {
1261 ; GFX9-LABEL: atomic_add_i64_3d:
1262 ; GFX9: ; %bb.0: ; %main_body
1263 ; GFX9-NEXT: s_mov_b32 s0, s2
1264 ; GFX9-NEXT: s_mov_b32 s2, s4
1265 ; GFX9-NEXT: s_mov_b32 s4, s6
1266 ; GFX9-NEXT: s_mov_b32 s6, s8
1267 ; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff
1268 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1269 ; GFX9-NEXT: s_lshl_b32 s8, s0, 16
1270 ; GFX9-NEXT: s_mov_b32 s1, s3
1271 ; GFX9-NEXT: s_mov_b32 s3, s5
1272 ; GFX9-NEXT: s_mov_b32 s5, s7
1273 ; GFX9-NEXT: s_mov_b32 s7, s9
1274 ; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3
1275 ; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8
1276 ; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16
1277 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1278 ; GFX9-NEXT: ; return to shader part epilog
1280 ; GFX10-LABEL: atomic_add_i64_3d:
1281 ; GFX10: ; %bb.0: ; %main_body
1282 ; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff
1283 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1284 ; GFX10-NEXT: s_mov_b32 s0, s2
1285 ; GFX10-NEXT: s_mov_b32 s2, s4
1286 ; GFX10-NEXT: s_mov_b32 s4, s6
1287 ; GFX10-NEXT: s_mov_b32 s6, s8
1288 ; GFX10-NEXT: s_lshl_b32 s8, s0, 16
1289 ; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3
1290 ; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8
1291 ; GFX10-NEXT: s_mov_b32 s1, s3
1292 ; GFX10-NEXT: s_mov_b32 s3, s5
1293 ; GFX10-NEXT: s_mov_b32 s5, s7
1294 ; GFX10-NEXT: s_mov_b32 s7, s9
1295 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc a16
1296 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1297 ; GFX10-NEXT: ; return to shader part epilog
1299 %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
1300 %out = bitcast i64 %v to <2 x float>
1301 ret <2 x float> %out
1304 define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %face) {
1305 ; GFX9-LABEL: atomic_add_i64_cube:
1306 ; GFX9: ; %bb.0: ; %main_body
1307 ; GFX9-NEXT: s_mov_b32 s0, s2
1308 ; GFX9-NEXT: s_mov_b32 s2, s4
1309 ; GFX9-NEXT: s_mov_b32 s4, s6
1310 ; GFX9-NEXT: s_mov_b32 s6, s8
1311 ; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff
1312 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1313 ; GFX9-NEXT: s_lshl_b32 s8, s0, 16
1314 ; GFX9-NEXT: s_mov_b32 s1, s3
1315 ; GFX9-NEXT: s_mov_b32 s3, s5
1316 ; GFX9-NEXT: s_mov_b32 s5, s7
1317 ; GFX9-NEXT: s_mov_b32 s7, s9
1318 ; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3
1319 ; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8
1320 ; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da
1321 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1322 ; GFX9-NEXT: ; return to shader part epilog
1324 ; GFX10-LABEL: atomic_add_i64_cube:
1325 ; GFX10: ; %bb.0: ; %main_body
1326 ; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff
1327 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1328 ; GFX10-NEXT: s_mov_b32 s0, s2
1329 ; GFX10-NEXT: s_mov_b32 s2, s4
1330 ; GFX10-NEXT: s_mov_b32 s4, s6
1331 ; GFX10-NEXT: s_mov_b32 s6, s8
1332 ; GFX10-NEXT: s_lshl_b32 s8, s0, 16
1333 ; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3
1334 ; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8
1335 ; GFX10-NEXT: s_mov_b32 s1, s3
1336 ; GFX10-NEXT: s_mov_b32 s3, s5
1337 ; GFX10-NEXT: s_mov_b32 s5, s7
1338 ; GFX10-NEXT: s_mov_b32 s7, s9
1339 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc a16
1340 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1341 ; GFX10-NEXT: ; return to shader part epilog
1343 %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64 %data, i16 %s, i16 %t, i16 %face , <8 x i32> %rsrc, i32 0, i32 0)
1344 %out = bitcast i64 %v to <2 x float>
1345 ret <2 x float> %out
1348 define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %slice) {
1349 ; GFX9-LABEL: atomic_add_i64_1darray:
1350 ; GFX9: ; %bb.0: ; %main_body
1351 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff
1352 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1353 ; GFX9-NEXT: s_mov_b32 s0, s2
1354 ; GFX9-NEXT: s_mov_b32 s1, s3
1355 ; GFX9-NEXT: s_mov_b32 s2, s4
1356 ; GFX9-NEXT: s_mov_b32 s3, s5
1357 ; GFX9-NEXT: s_mov_b32 s4, s6
1358 ; GFX9-NEXT: s_mov_b32 s5, s7
1359 ; GFX9-NEXT: s_mov_b32 s6, s8
1360 ; GFX9-NEXT: s_mov_b32 s7, s9
1361 ; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3
1362 ; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 da
1363 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1364 ; GFX9-NEXT: ; return to shader part epilog
1366 ; GFX10-LABEL: atomic_add_i64_1darray:
1367 ; GFX10: ; %bb.0: ; %main_body
1368 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1369 ; GFX10-NEXT: s_mov_b32 s0, s2
1370 ; GFX10-NEXT: s_mov_b32 s1, s3
1371 ; GFX10-NEXT: s_mov_b32 s2, s4
1372 ; GFX10-NEXT: s_mov_b32 s3, s5
1373 ; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v2, v3
1374 ; GFX10-NEXT: s_mov_b32 s4, s6
1375 ; GFX10-NEXT: s_mov_b32 s5, s7
1376 ; GFX10-NEXT: s_mov_b32 s6, s8
1377 ; GFX10-NEXT: s_mov_b32 s7, s9
1378 ; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16
1379 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1380 ; GFX10-NEXT: ; return to shader part epilog
1382 %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1383 %out = bitcast i64 %v to <2 x float>
1384 ret <2 x float> %out
1387 define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice) {
1388 ; GFX9-LABEL: atomic_add_i64_2darray:
1389 ; GFX9: ; %bb.0: ; %main_body
1390 ; GFX9-NEXT: s_mov_b32 s0, s2
1391 ; GFX9-NEXT: s_mov_b32 s2, s4
1392 ; GFX9-NEXT: s_mov_b32 s4, s6
1393 ; GFX9-NEXT: s_mov_b32 s6, s8
1394 ; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff
1395 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1396 ; GFX9-NEXT: s_lshl_b32 s8, s0, 16
1397 ; GFX9-NEXT: s_mov_b32 s1, s3
1398 ; GFX9-NEXT: s_mov_b32 s3, s5
1399 ; GFX9-NEXT: s_mov_b32 s5, s7
1400 ; GFX9-NEXT: s_mov_b32 s7, s9
1401 ; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3
1402 ; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8
1403 ; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da
1404 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1405 ; GFX9-NEXT: ; return to shader part epilog
1407 ; GFX10-LABEL: atomic_add_i64_2darray:
1408 ; GFX10: ; %bb.0: ; %main_body
1409 ; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff
1410 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1411 ; GFX10-NEXT: s_mov_b32 s0, s2
1412 ; GFX10-NEXT: s_mov_b32 s2, s4
1413 ; GFX10-NEXT: s_mov_b32 s4, s6
1414 ; GFX10-NEXT: s_mov_b32 s6, s8
1415 ; GFX10-NEXT: s_lshl_b32 s8, s0, 16
1416 ; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3
1417 ; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8
1418 ; GFX10-NEXT: s_mov_b32 s1, s3
1419 ; GFX10-NEXT: s_mov_b32 s3, s5
1420 ; GFX10-NEXT: s_mov_b32 s5, s7
1421 ; GFX10-NEXT: s_mov_b32 s7, s9
1422 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16
1423 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1424 ; GFX10-NEXT: ; return to shader part epilog
1426 %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1427 %out = bitcast i64 %v to <2 x float>
1428 ret <2 x float> %out
1431 define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %fragid) {
1432 ; GFX9-LABEL: atomic_add_i64_2dmsaa:
1433 ; GFX9: ; %bb.0: ; %main_body
1434 ; GFX9-NEXT: s_mov_b32 s0, s2
1435 ; GFX9-NEXT: s_mov_b32 s2, s4
1436 ; GFX9-NEXT: s_mov_b32 s4, s6
1437 ; GFX9-NEXT: s_mov_b32 s6, s8
1438 ; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff
1439 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1440 ; GFX9-NEXT: s_lshl_b32 s8, s0, 16
1441 ; GFX9-NEXT: s_mov_b32 s1, s3
1442 ; GFX9-NEXT: s_mov_b32 s3, s5
1443 ; GFX9-NEXT: s_mov_b32 s5, s7
1444 ; GFX9-NEXT: s_mov_b32 s7, s9
1445 ; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3
1446 ; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8
1447 ; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16
1448 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1449 ; GFX9-NEXT: ; return to shader part epilog
1451 ; GFX10-LABEL: atomic_add_i64_2dmsaa:
1452 ; GFX10: ; %bb.0: ; %main_body
1453 ; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff
1454 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1455 ; GFX10-NEXT: s_mov_b32 s0, s2
1456 ; GFX10-NEXT: s_mov_b32 s2, s4
1457 ; GFX10-NEXT: s_mov_b32 s4, s6
1458 ; GFX10-NEXT: s_mov_b32 s6, s8
1459 ; GFX10-NEXT: s_lshl_b32 s8, s0, 16
1460 ; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3
1461 ; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8
1462 ; GFX10-NEXT: s_mov_b32 s1, s3
1463 ; GFX10-NEXT: s_mov_b32 s3, s5
1464 ; GFX10-NEXT: s_mov_b32 s5, s7
1465 ; GFX10-NEXT: s_mov_b32 s7, s9
1466 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16
1467 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1468 ; GFX10-NEXT: ; return to shader part epilog
1470 %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1471 %out = bitcast i64 %v to <2 x float>
1472 ret <2 x float> %out
1475 define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
1476 ; GFX9-LABEL: atomic_add_i64_2darraymsaa:
1477 ; GFX9: ; %bb.0: ; %main_body
1478 ; GFX9-NEXT: v_mov_b32_e32 v6, 0xffff
1479 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1480 ; GFX9-NEXT: v_and_or_b32 v2, v2, v6, v3
1481 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v5
1482 ; GFX9-NEXT: s_mov_b32 s0, s2
1483 ; GFX9-NEXT: s_mov_b32 s1, s3
1484 ; GFX9-NEXT: s_mov_b32 s2, s4
1485 ; GFX9-NEXT: s_mov_b32 s3, s5
1486 ; GFX9-NEXT: s_mov_b32 s4, s6
1487 ; GFX9-NEXT: s_mov_b32 s5, s7
1488 ; GFX9-NEXT: s_mov_b32 s6, s8
1489 ; GFX9-NEXT: s_mov_b32 s7, s9
1490 ; GFX9-NEXT: v_and_or_b32 v3, v4, v6, v3
1491 ; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da
1492 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1493 ; GFX9-NEXT: ; return to shader part epilog
1495 ; GFX10-LABEL: atomic_add_i64_2darraymsaa:
1496 ; GFX10: ; %bb.0: ; %main_body
1497 ; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff
1498 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1499 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5
1500 ; GFX10-NEXT: s_mov_b32 s0, s2
1501 ; GFX10-NEXT: s_mov_b32 s1, s3
1502 ; GFX10-NEXT: s_mov_b32 s2, s4
1503 ; GFX10-NEXT: v_and_or_b32 v2, v2, v6, v3
1504 ; GFX10-NEXT: v_and_or_b32 v3, v4, v6, v5
1505 ; GFX10-NEXT: s_mov_b32 s3, s5
1506 ; GFX10-NEXT: s_mov_b32 s4, s6
1507 ; GFX10-NEXT: s_mov_b32 s5, s7
1508 ; GFX10-NEXT: s_mov_b32 s6, s8
1509 ; GFX10-NEXT: s_mov_b32 s7, s9
1510 ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16
1511 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1512 ; GFX10-NEXT: ; return to shader part epilog
1514 %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1515 %out = bitcast i64 %v to <2 x float>
1516 ret <2 x float> %out
1519 define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1520 ; GFX9-LABEL: atomic_add_i64_1d_slc:
1521 ; GFX9: ; %bb.0: ; %main_body
1522 ; GFX9-NEXT: s_mov_b32 s0, s2
1523 ; GFX9-NEXT: s_mov_b32 s1, s3
1524 ; GFX9-NEXT: s_mov_b32 s2, s4
1525 ; GFX9-NEXT: s_mov_b32 s3, s5
1526 ; GFX9-NEXT: s_mov_b32 s4, s6
1527 ; GFX9-NEXT: s_mov_b32 s5, s7
1528 ; GFX9-NEXT: s_mov_b32 s6, s8
1529 ; GFX9-NEXT: s_mov_b32 s7, s9
1530 ; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc a16
1531 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1532 ; GFX9-NEXT: ; return to shader part epilog
1534 ; GFX10-LABEL: atomic_add_i64_1d_slc:
1535 ; GFX10: ; %bb.0: ; %main_body
1536 ; GFX10-NEXT: s_mov_b32 s0, s2
1537 ; GFX10-NEXT: s_mov_b32 s1, s3
1538 ; GFX10-NEXT: s_mov_b32 s2, s4
1539 ; GFX10-NEXT: s_mov_b32 s3, s5
1540 ; GFX10-NEXT: s_mov_b32 s4, s6
1541 ; GFX10-NEXT: s_mov_b32 s5, s7
1542 ; GFX10-NEXT: s_mov_b32 s6, s8
1543 ; GFX10-NEXT: s_mov_b32 s7, s9
1544 ; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc a16
1545 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1546 ; GFX10-NEXT: ; return to shader part epilog
1548 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
1549 %out = bitcast i64 %v to <2 x float>
1550 ret <2 x float> %out
1553 declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1554 declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1555 declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1556 declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1557 declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1558 declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1559 declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1560 declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1561 declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1562 declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1563 declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1564 declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1565 declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1566 declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1567 declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1568 declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1569 declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1570 declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1571 declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1572 declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1574 declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1575 declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1576 declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1577 declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1578 declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1579 declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1580 declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1581 declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1582 declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1583 declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1584 declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1585 declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1586 declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64, i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1587 declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1588 declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1589 declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1590 declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1591 declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1592 declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1593 declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1595 attributes #0 = { nounwind }