1 ; RUN: llc -march=amdgcn -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI,FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=bonaire -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,FUNC %s
3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,SICIVI,FUNC %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s
5 ; RUN: llc -march=r600 -mcpu=redwood -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s
7 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
10 ; SICIVI-DAG: s_mov_b32 m0
13 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
14 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
15 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
16 ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
17 ; GCN: buffer_store_dword [[RESULT]],
19 define amdgpu_kernel void @lds_atomic_xchg_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
20 %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 4 seq_cst
21 store i32 %result, ptr addrspace(1) %out, align 4
25 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
26 ; SICIVI: s_mov_b32 m0
29 ; EG: LDS_WRXCHG_RET *
30 ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
32 define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
33 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
34 %result = atomicrmw xchg ptr addrspace(3) %gep, i32 4 seq_cst
35 store i32 %result, ptr addrspace(1) %out, align 4
39 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_f32_offset:
40 ; SICIVI: s_mov_b32 m0
43 ; EG: LDS_WRXCHG_RET *
44 ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
46 define amdgpu_kernel void @lds_atomic_xchg_ret_f32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
47 %gep = getelementptr float, ptr addrspace(3) %ptr, i32 4
48 %result = atomicrmw xchg ptr addrspace(3) %gep, float 4.0 seq_cst
49 store float %result, ptr addrspace(1) %out, align 4
53 ; XXX - Is it really necessary to load 4 into VGPR?
54 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
57 ; SICIVI-DAG: s_mov_b32 m0
60 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
61 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
62 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
63 ; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
64 ; GCN: buffer_store_dword [[RESULT]],
66 define amdgpu_kernel void @lds_atomic_add_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
67 %result = atomicrmw add ptr addrspace(3) %ptr, i32 4 seq_cst
68 store i32 %result, ptr addrspace(1) %out, align 4
72 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
73 ; SICIVI: s_mov_b32 m0
77 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
79 define amdgpu_kernel void @lds_atomic_add_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
80 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
81 %result = atomicrmw add ptr addrspace(3) %gep, i32 4 seq_cst
82 store i32 %result, ptr addrspace(1) %out, align 4
86 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
87 ; SICIVI: s_mov_b32 m0
91 ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
92 ; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
94 define amdgpu_kernel void @lds_atomic_add_ret_i32_bad_si_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr, i32 %a, i32 %b) nounwind {
96 %add = add i32 %sub, 4
97 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 %add
98 %result = atomicrmw add ptr addrspace(3) %gep, i32 4 seq_cst
99 store i32 %result, ptr addrspace(1) %out, align 4
103 ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32:
106 ; SICIVI-DAG: s_mov_b32 m0
109 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
110 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]]
112 define amdgpu_kernel void @lds_atomic_add1_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
113 %result = atomicrmw add ptr addrspace(3) %ptr, i32 1 seq_cst
114 store i32 %result, ptr addrspace(1) %out, align 4
118 ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_offset:
121 ; SICIVI-DAG: s_mov_b32 m0
124 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
125 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16
127 define amdgpu_kernel void @lds_atomic_add1_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
128 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
129 %result = atomicrmw add ptr addrspace(3) %gep, i32 1 seq_cst
130 store i32 %result, ptr addrspace(1) %out, align 4
134 ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_bad_si_offset:
135 ; SICIVI: s_mov_b32 m0
139 ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
140 ; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
142 define amdgpu_kernel void @lds_atomic_add1_ret_i32_bad_si_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr, i32 %a, i32 %b) nounwind {
143 %sub = sub i32 %a, %b
144 %add = add i32 %sub, 4
145 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 %add
146 %result = atomicrmw add ptr addrspace(3) %gep, i32 1 seq_cst
147 store i32 %result, ptr addrspace(1) %out, align 4
151 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32:
154 ; SICIVI: s_mov_b32 m0
157 ; GCN: ds_sub_rtn_u32
159 define amdgpu_kernel void @lds_atomic_sub_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
160 %result = atomicrmw sub ptr addrspace(3) %ptr, i32 4 seq_cst
161 store i32 %result, ptr addrspace(1) %out, align 4
165 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
168 ; SICIVI: s_mov_b32 m0
171 ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
173 define amdgpu_kernel void @lds_atomic_sub_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
174 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
175 %result = atomicrmw sub ptr addrspace(3) %gep, i32 4 seq_cst
176 store i32 %result, ptr addrspace(1) %out, align 4
180 ; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32:
183 ; SICIVI-DAG: s_mov_b32 m0
186 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
187 ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]]
189 define amdgpu_kernel void @lds_atomic_sub1_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
190 %result = atomicrmw sub ptr addrspace(3) %ptr, i32 1 seq_cst
191 store i32 %result, ptr addrspace(1) %out, align 4
195 ; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32_offset:
198 ; SICIVI-DAG: s_mov_b32 m0
201 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
202 ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16
204 define amdgpu_kernel void @lds_atomic_sub1_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
205 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
206 %result = atomicrmw sub ptr addrspace(3) %gep, i32 1 seq_cst
207 store i32 %result, ptr addrspace(1) %out, align 4
211 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32:
214 ; SICIVI-DAG: s_mov_b32 m0
217 ; GCN: ds_and_rtn_b32
219 define amdgpu_kernel void @lds_atomic_and_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
220 %result = atomicrmw and ptr addrspace(3) %ptr, i32 4 seq_cst
221 store i32 %result, ptr addrspace(1) %out, align 4
225 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
226 ; SICIVI: s_mov_b32 m0
230 ; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
232 define amdgpu_kernel void @lds_atomic_and_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
233 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
234 %result = atomicrmw and ptr addrspace(3) %gep, i32 4 seq_cst
235 store i32 %result, ptr addrspace(1) %out, align 4
239 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32:
240 ; SICIVI: s_mov_b32 m0
246 define amdgpu_kernel void @lds_atomic_or_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
247 %result = atomicrmw or ptr addrspace(3) %ptr, i32 4 seq_cst
248 store i32 %result, ptr addrspace(1) %out, align 4
252 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
253 ; SICIVI: s_mov_b32 m0
257 ; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
259 define amdgpu_kernel void @lds_atomic_or_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
260 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
261 %result = atomicrmw or ptr addrspace(3) %gep, i32 4 seq_cst
262 store i32 %result, ptr addrspace(1) %out, align 4
266 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32:
267 ; SICIVI: s_mov_b32 m0
271 ; GCN: ds_xor_rtn_b32
273 define amdgpu_kernel void @lds_atomic_xor_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
274 %result = atomicrmw xor ptr addrspace(3) %ptr, i32 4 seq_cst
275 store i32 %result, ptr addrspace(1) %out, align 4
279 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
280 ; SICIVI: s_mov_b32 m0
284 ; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
286 define amdgpu_kernel void @lds_atomic_xor_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
287 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
288 %result = atomicrmw xor ptr addrspace(3) %gep, i32 4 seq_cst
289 store i32 %result, ptr addrspace(1) %out, align 4
293 ; FIXME: There is no atomic nand instr
294 ; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i32:uction, so we somehow need to expand this.
295 ; define amdgpu_kernel void @lds_atomic_nand_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
296 ; %result = atomicrmw nand ptr addrspace(3) %ptr, i32 4 seq_cst
297 ; store i32 %result, ptr addrspace(1) %out, align 4
301 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32:
302 ; SICIVI: s_mov_b32 m0
305 ; EG: LDS_MIN_INT_RET *
306 ; GCN: ds_min_rtn_i32
308 define amdgpu_kernel void @lds_atomic_min_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
309 %result = atomicrmw min ptr addrspace(3) %ptr, i32 4 seq_cst
310 store i32 %result, ptr addrspace(1) %out, align 4
314 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
315 ; SICIVI: s_mov_b32 m0
318 ; EG: LDS_MIN_INT_RET *
319 ; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
321 define amdgpu_kernel void @lds_atomic_min_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
322 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
323 %result = atomicrmw min ptr addrspace(3) %gep, i32 4 seq_cst
324 store i32 %result, ptr addrspace(1) %out, align 4
328 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32:
329 ; SICIVI: s_mov_b32 m0
332 ; EG: LDS_MAX_INT_RET *
333 ; GCN: ds_max_rtn_i32
335 define amdgpu_kernel void @lds_atomic_max_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
336 %result = atomicrmw max ptr addrspace(3) %ptr, i32 4 seq_cst
337 store i32 %result, ptr addrspace(1) %out, align 4
341 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
342 ; SICIVI: s_mov_b32 m0
345 ; EG: LDS_MAX_INT_RET *
346 ; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
348 define amdgpu_kernel void @lds_atomic_max_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
349 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
350 %result = atomicrmw max ptr addrspace(3) %gep, i32 4 seq_cst
351 store i32 %result, ptr addrspace(1) %out, align 4
355 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32:
356 ; SICIVI: s_mov_b32 m0
359 ; EG: LDS_MIN_UINT_RET *
360 ; GCN: ds_min_rtn_u32
362 define amdgpu_kernel void @lds_atomic_umin_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
363 %result = atomicrmw umin ptr addrspace(3) %ptr, i32 4 seq_cst
364 store i32 %result, ptr addrspace(1) %out, align 4
368 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
369 ; SICIVI: s_mov_b32 m0
372 ; EG: LDS_MIN_UINT_RET *
373 ; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
375 define amdgpu_kernel void @lds_atomic_umin_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
376 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
377 %result = atomicrmw umin ptr addrspace(3) %gep, i32 4 seq_cst
378 store i32 %result, ptr addrspace(1) %out, align 4
382 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32:
383 ; SICIVI: s_mov_b32 m0
386 ; EG: LDS_MAX_UINT_RET *
387 ; GCN: ds_max_rtn_u32
389 define amdgpu_kernel void @lds_atomic_umax_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
390 %result = atomicrmw umax ptr addrspace(3) %ptr, i32 4 seq_cst
391 store i32 %result, ptr addrspace(1) %out, align 4
395 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
396 ; SICIVI: s_mov_b32 m0
399 ; EG: LDS_MAX_UINT_RET *
400 ; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
402 define amdgpu_kernel void @lds_atomic_umax_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
403 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
404 %result = atomicrmw umax ptr addrspace(3) %gep, i32 4 seq_cst
405 store i32 %result, ptr addrspace(1) %out, align 4
409 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32:
410 ; SICIVI-DAG: s_mov_b32 m0
413 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
414 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
415 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
416 ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
418 define amdgpu_kernel void @lds_atomic_xchg_noret_i32(ptr addrspace(3) %ptr) nounwind {
419 %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 4 seq_cst
423 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
424 ; SICIVI: s_mov_b32 m0
427 ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
429 define amdgpu_kernel void @lds_atomic_xchg_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
430 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
431 %result = atomicrmw xchg ptr addrspace(3) %gep, i32 4 seq_cst
435 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
436 ; SICIVI-DAG: s_mov_b32 m0
439 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
440 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
441 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
442 ; GCN: ds_add_u32 [[VPTR]], [[DATA]]
444 define amdgpu_kernel void @lds_atomic_add_noret_i32(ptr addrspace(3) %ptr) nounwind {
445 %result = atomicrmw add ptr addrspace(3) %ptr, i32 4 seq_cst
449 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
450 ; SICIVI: s_mov_b32 m0
453 ; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
455 define amdgpu_kernel void @lds_atomic_add_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
456 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
457 %result = atomicrmw add ptr addrspace(3) %gep, i32 4 seq_cst
461 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
462 ; SICIVI: s_mov_b32 m0
465 ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
466 ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
468 define amdgpu_kernel void @lds_atomic_add_noret_i32_bad_si_offset(ptr addrspace(3) %ptr, i32 %a, i32 %b) nounwind {
469 %sub = sub i32 %a, %b
470 %add = add i32 %sub, 4
471 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 %add
472 %result = atomicrmw add ptr addrspace(3) %gep, i32 4 seq_cst
476 ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32:
477 ; SICIVI-DAG: s_mov_b32 m0
480 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
481 ; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]]
483 define amdgpu_kernel void @lds_atomic_add1_noret_i32(ptr addrspace(3) %ptr) nounwind {
484 %result = atomicrmw add ptr addrspace(3) %ptr, i32 1 seq_cst
488 ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_offset:
489 ; SICIVI-DAG: s_mov_b32 m0
492 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
493 ; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] offset:16
495 define amdgpu_kernel void @lds_atomic_add1_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
496 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
497 %result = atomicrmw add ptr addrspace(3) %gep, i32 1 seq_cst
501 ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_bad_si_offset:
502 ; SICIVI: s_mov_b32 m0
505 ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
506 ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
508 define amdgpu_kernel void @lds_atomic_add1_noret_i32_bad_si_offset(ptr addrspace(3) %ptr, i32 %a, i32 %b) nounwind {
509 %sub = sub i32 %a, %b
510 %add = add i32 %sub, 4
511 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 %add
512 %result = atomicrmw add ptr addrspace(3) %gep, i32 1 seq_cst
516 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
517 ; SICIVI: s_mov_b32 m0
522 define amdgpu_kernel void @lds_atomic_sub_noret_i32(ptr addrspace(3) %ptr) nounwind {
523 %result = atomicrmw sub ptr addrspace(3) %ptr, i32 4 seq_cst
527 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
528 ; SICIVI: s_mov_b32 m0
531 ; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
533 define amdgpu_kernel void @lds_atomic_sub_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
534 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
535 %result = atomicrmw sub ptr addrspace(3) %gep, i32 4 seq_cst
539 ; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32:
540 ; SICIVI-DAG: s_mov_b32 m0
543 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
544 ; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]]
546 define amdgpu_kernel void @lds_atomic_sub1_noret_i32(ptr addrspace(3) %ptr) nounwind {
547 %result = atomicrmw sub ptr addrspace(3) %ptr, i32 1 seq_cst
551 ; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32_offset:
552 ; SICIVI-DAG: s_mov_b32 m0
555 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
556 ; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] offset:16
558 define amdgpu_kernel void @lds_atomic_sub1_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
559 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
560 %result = atomicrmw sub ptr addrspace(3) %gep, i32 1 seq_cst
564 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
565 ; SICIVI: s_mov_b32 m0
570 define amdgpu_kernel void @lds_atomic_and_noret_i32(ptr addrspace(3) %ptr) nounwind {
571 %result = atomicrmw and ptr addrspace(3) %ptr, i32 4 seq_cst
575 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
576 ; SICIVI: s_mov_b32 m0
579 ; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
581 define amdgpu_kernel void @lds_atomic_and_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
582 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
583 %result = atomicrmw and ptr addrspace(3) %gep, i32 4 seq_cst
587 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
588 ; SICIVI: s_mov_b32 m0
593 define amdgpu_kernel void @lds_atomic_or_noret_i32(ptr addrspace(3) %ptr) nounwind {
594 %result = atomicrmw or ptr addrspace(3) %ptr, i32 4 seq_cst
598 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
599 ; SICIVI: s_mov_b32 m0
602 ; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
604 define amdgpu_kernel void @lds_atomic_or_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
605 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
606 %result = atomicrmw or ptr addrspace(3) %gep, i32 4 seq_cst
610 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
611 ; SICIVI: s_mov_b32 m0
616 define amdgpu_kernel void @lds_atomic_xor_noret_i32(ptr addrspace(3) %ptr) nounwind {
617 %result = atomicrmw xor ptr addrspace(3) %ptr, i32 4 seq_cst
621 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
622 ; SICIVI: s_mov_b32 m0
625 ; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
627 define amdgpu_kernel void @lds_atomic_xor_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
628 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
629 %result = atomicrmw xor ptr addrspace(3) %gep, i32 4 seq_cst
633 ; FIXME: There is no atomic nand instr
634 ; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i32:uction, so we somehow need to expand this.
635 ; define amdgpu_kernel void @lds_atomic_nand_noret_i32(ptr addrspace(3) %ptr) nounwind {
636 ; %result = atomicrmw nand ptr addrspace(3) %ptr, i32 4 seq_cst
640 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
641 ; SICIVI: s_mov_b32 m0
646 define amdgpu_kernel void @lds_atomic_min_noret_i32(ptr addrspace(3) %ptr) nounwind {
647 %result = atomicrmw min ptr addrspace(3) %ptr, i32 4 seq_cst
651 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
652 ; SICIVI: s_mov_b32 m0
655 ; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
657 define amdgpu_kernel void @lds_atomic_min_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
658 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
659 %result = atomicrmw min ptr addrspace(3) %gep, i32 4 seq_cst
663 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
664 ; SICIVI: s_mov_b32 m0
669 define amdgpu_kernel void @lds_atomic_max_noret_i32(ptr addrspace(3) %ptr) nounwind {
670 %result = atomicrmw max ptr addrspace(3) %ptr, i32 4 seq_cst
674 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
675 ; SICIVI: s_mov_b32 m0
678 ; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
680 define amdgpu_kernel void @lds_atomic_max_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
681 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
682 %result = atomicrmw max ptr addrspace(3) %gep, i32 4 seq_cst
686 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
687 ; SICIVI: s_mov_b32 m0
692 define amdgpu_kernel void @lds_atomic_umin_noret_i32(ptr addrspace(3) %ptr) nounwind {
693 %result = atomicrmw umin ptr addrspace(3) %ptr, i32 4 seq_cst
697 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
698 ; SICIVI: s_mov_b32 m0
701 ; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
703 define amdgpu_kernel void @lds_atomic_umin_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
704 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
705 %result = atomicrmw umin ptr addrspace(3) %gep, i32 4 seq_cst
709 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
710 ; SICIVI: s_mov_b32 m0
715 define amdgpu_kernel void @lds_atomic_umax_noret_i32(ptr addrspace(3) %ptr) nounwind {
716 %result = atomicrmw umax ptr addrspace(3) %ptr, i32 4 seq_cst
720 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
721 ; SICIVI: s_mov_b32 m0
724 ; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
726 define amdgpu_kernel void @lds_atomic_umax_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
727 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
728 %result = atomicrmw umax ptr addrspace(3) %gep, i32 4 seq_cst
732 ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32:
733 ; SICIVI: s_mov_b32 m0
737 ; GCN: ds_inc_rtn_u32
739 define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
740 %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 4 seq_cst
741 store i32 %result, ptr addrspace(1) %out, align 4
745 ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
746 ; SICIVI: s_mov_b32 m0
750 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
752 define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
753 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
754 %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i32 4 seq_cst
755 store i32 %result, ptr addrspace(1) %out, align 4
759 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
760 ; SICIVI: s_mov_b32 m0
765 define amdgpu_kernel void @lds_atomic_inc_noret_i32(ptr addrspace(3) %ptr) nounwind {
766 %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 4 seq_cst
770 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
771 ; SICIVI: s_mov_b32 m0
774 ; GCN: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
776 define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
777 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
778 %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i32 4 seq_cst
782 ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32:
783 ; SICIVI: s_mov_b32 m0
787 ; GCN: ds_dec_rtn_u32
789 define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
790 %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 4 seq_cst
791 store i32 %result, ptr addrspace(1) %out, align 4
795 ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
796 ; SICIVI: s_mov_b32 m0
800 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
802 define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
803 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
804 %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i32 4 seq_cst
805 store i32 %result, ptr addrspace(1) %out, align 4
809 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
810 ; SICIVI: s_mov_b32 m0
815 define amdgpu_kernel void @lds_atomic_dec_noret_i32(ptr addrspace(3) %ptr) nounwind {
816 %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 4 seq_cst
820 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
821 ; SICIVI: s_mov_b32 m0
824 ; GCN: ds_dec_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
826 define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
827 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
828 %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i32 4 seq_cst