1 ; RUN: llc -march=amdgcn -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SI,SICIVI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,VI,SICIVI,GFX89 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9,GFX89 %s
5 ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64:
9 ; GCN: ds_wrxchg_rtn_b64
11 define amdgpu_kernel void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
12 %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
13 store i64 %result, i64 addrspace(1)* %out, align 8
17 ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
18 ; SICIVI: s_mov_b32 m0
21 ; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
23 define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
24 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
25 %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
26 store i64 %result, i64 addrspace(1)* %out, align 8
30 ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_f64_offset:
31 ; SICIVI: s_mov_b32 m0
34 ; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
36 define amdgpu_kernel void @lds_atomic_xchg_ret_f64_offset(double addrspace(1)* %out, double addrspace(3)* %ptr) nounwind {
37 %gep = getelementptr double, double addrspace(3)* %ptr, i32 4
38 %result = atomicrmw xchg double addrspace(3)* %gep, double 4.0 seq_cst
39 store double %result, double addrspace(1)* %out, align 8
43 ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:
44 ; SICIVI: s_mov_b32 m0
49 define amdgpu_kernel void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
50 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
51 store i64 %result, i64 addrspace(1)* %out, align 8
55 ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
56 ; SICIVI-DAG: s_mov_b32 m0
59 ; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
60 ; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
61 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
62 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
63 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
64 ; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
65 ; GCN: buffer_store_dwordx2 [[RESULT]],
67 define amdgpu_kernel void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
68 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
69 %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
70 store i64 %result, i64 addrspace(1)* %out, align 8
74 ; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64:
75 ; SICIVI-DAG: s_mov_b32 m0
78 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
79 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
80 ; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
81 ; GCN: buffer_store_dwordx2 [[RESULT]],
83 define amdgpu_kernel void @lds_atomic_add1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
84 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
85 store i64 %result, i64 addrspace(1)* %out, align 8
89 ; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64_offset:
90 ; SICIVI: s_mov_b32 m0
93 ; GCN: ds_add_rtn_u64 {{.*}} offset:32
95 define amdgpu_kernel void @lds_atomic_add1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
96 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
97 %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
98 store i64 %result, i64 addrspace(1)* %out, align 8
102 ; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64:
103 ; SICIVI: s_mov_b32 m0
106 ; GCN: ds_sub_rtn_u64
108 define amdgpu_kernel void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
109 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
110 store i64 %result, i64 addrspace(1)* %out, align 8
114 ; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
115 ; SICIVI: s_mov_b32 m0
118 ; GCN: ds_sub_rtn_u64 {{.*}} offset:32
120 define amdgpu_kernel void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
121 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
122 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
123 store i64 %result, i64 addrspace(1)* %out, align 8
127 ; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64:
128 ; SICIVI-DAG: s_mov_b32 m0
131 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
132 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
133 ; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
134 ; GCN: buffer_store_dwordx2 [[RESULT]],
136 define amdgpu_kernel void @lds_atomic_sub1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
137 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
138 store i64 %result, i64 addrspace(1)* %out, align 8
142 ; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64_offset:
143 ; SICIVI: s_mov_b32 m0
146 ; GCN: ds_sub_rtn_u64 {{.*}} offset:32
148 define amdgpu_kernel void @lds_atomic_sub1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
149 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
150 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
151 store i64 %result, i64 addrspace(1)* %out, align 8
155 ; GCN-LABEL: {{^}}lds_atomic_and_ret_i64:
156 ; SICIVI: s_mov_b32 m0
159 ; GCN: ds_and_rtn_b64
161 define amdgpu_kernel void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
162 %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
163 store i64 %result, i64 addrspace(1)* %out, align 8
167 ; GCN-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
168 ; SICIVI: s_mov_b32 m0
171 ; GCN: ds_and_rtn_b64 {{.*}} offset:32
173 define amdgpu_kernel void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
174 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
175 %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
176 store i64 %result, i64 addrspace(1)* %out, align 8
180 ; GCN-LABEL: {{^}}lds_atomic_or_ret_i64:
181 ; SICIVI: s_mov_b32 m0
186 define amdgpu_kernel void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
187 %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
188 store i64 %result, i64 addrspace(1)* %out, align 8
192 ; GCN-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
193 ; SICIVI: s_mov_b32 m0
196 ; GCN: ds_or_rtn_b64 {{.*}} offset:32
198 define amdgpu_kernel void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
199 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
200 %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
201 store i64 %result, i64 addrspace(1)* %out, align 8
205 ; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64:
206 ; SICIVI: s_mov_b32 m0
209 ; GCN: ds_xor_rtn_b64
211 define amdgpu_kernel void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
212 %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
213 store i64 %result, i64 addrspace(1)* %out, align 8
217 ; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
218 ; SICIVI: s_mov_b32 m0
221 ; GCN: ds_xor_rtn_b64 {{.*}} offset:32
223 define amdgpu_kernel void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
224 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
225 %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
226 store i64 %result, i64 addrspace(1)* %out, align 8
230 ; FIXME: There is no atomic nand instr
231 ; XGCN-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this.
232 ; define amdgpu_kernel void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
233 ; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
234 ; store i64 %result, i64 addrspace(1)* %out, align 8
238 ; GCN-LABEL: {{^}}lds_atomic_min_ret_i64:
239 ; SICIVI: s_mov_b32 m0
242 ; GCN: ds_min_rtn_i64
244 define amdgpu_kernel void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
245 %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
246 store i64 %result, i64 addrspace(1)* %out, align 8
250 ; GCN-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
251 ; SICIVI: s_mov_b32 m0
254 ; GCN: ds_min_rtn_i64 {{.*}} offset:32
256 define amdgpu_kernel void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
257 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
258 %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
259 store i64 %result, i64 addrspace(1)* %out, align 8
263 ; GCN-LABEL: {{^}}lds_atomic_max_ret_i64:
264 ; SICIVI: s_mov_b32 m0
267 ; GCN: ds_max_rtn_i64
269 define amdgpu_kernel void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
270 %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
271 store i64 %result, i64 addrspace(1)* %out, align 8
275 ; GCN-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
276 ; SICIVI: s_mov_b32 m0
279 ; GCN: ds_max_rtn_i64 {{.*}} offset:32
281 define amdgpu_kernel void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
282 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
283 %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
284 store i64 %result, i64 addrspace(1)* %out, align 8
288 ; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64:
289 ; SICIVI: s_mov_b32 m0
292 ; GCN: ds_min_rtn_u64
294 define amdgpu_kernel void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
295 %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
296 store i64 %result, i64 addrspace(1)* %out, align 8
300 ; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
301 ; SICIVI: s_mov_b32 m0
304 ; GCN: ds_min_rtn_u64 {{.*}} offset:32
306 define amdgpu_kernel void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
307 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
308 %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
309 store i64 %result, i64 addrspace(1)* %out, align 8
313 ; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64:
314 ; SICIVI: s_mov_b32 m0
317 ; GCN: ds_max_rtn_u64
319 define amdgpu_kernel void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
320 %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
321 store i64 %result, i64 addrspace(1)* %out, align 8
325 ; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
326 ; SICIVI: s_mov_b32 m0
329 ; GCN: ds_max_rtn_u64 {{.*}} offset:32
331 define amdgpu_kernel void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
332 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
333 %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
334 store i64 %result, i64 addrspace(1)* %out, align 8
338 ; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64:
339 ; SICIVI: s_mov_b32 m0
342 ; GCN: ds_wrxchg_rtn_b64
344 define amdgpu_kernel void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
345 %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
349 ; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
350 ; SICIVI: s_mov_b32 m0
353 ; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
355 define amdgpu_kernel void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
356 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
357 %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
361 ; GCN-LABEL: {{^}}lds_atomic_add_noret_i64:
362 ; SICIVI: s_mov_b32 m0
367 define amdgpu_kernel void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
368 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
372 ; GCN-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
373 ; SICIVI-DAG: s_mov_b32 m0
376 ; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
377 ; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
378 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
379 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
380 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
381 ; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
383 define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
384 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
385 %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
389 ; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64:
390 ; SICIVI-DAG: s_mov_b32 m0
393 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
394 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
395 ; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
397 define amdgpu_kernel void @lds_atomic_add1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
398 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
402 ; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64_offset:
403 ; SICIVI: s_mov_b32 m0
406 ; GCN: ds_add_u64 {{.*}} offset:32
408 define amdgpu_kernel void @lds_atomic_add1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
409 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
410 %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
414 ; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64:
415 ; SICIVI: s_mov_b32 m0
420 define amdgpu_kernel void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
421 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
425 ; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
426 ; SICIVI: s_mov_b32 m0
429 ; GCN: ds_sub_u64 {{.*}} offset:32
431 define amdgpu_kernel void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
432 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
433 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
437 ; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64:
438 ; SICIVI-DAG: s_mov_b32 m0
441 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
442 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
443 ; GCN: ds_sub_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
445 define amdgpu_kernel void @lds_atomic_sub1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
446 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
450 ; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64_offset:
451 ; SICIVI: s_mov_b32 m0
454 ; GCN: ds_sub_u64 {{.*}} offset:32
456 define amdgpu_kernel void @lds_atomic_sub1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
457 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
458 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
462 ; GCN-LABEL: {{^}}lds_atomic_and_noret_i64:
463 ; SICIVI: s_mov_b32 m0
468 define amdgpu_kernel void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
469 %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
473 ; GCN-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
474 ; SICIVI: s_mov_b32 m0
477 ; GCN: ds_and_b64 {{.*}} offset:32
479 define amdgpu_kernel void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
480 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
481 %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
485 ; GCN-LABEL: {{^}}lds_atomic_or_noret_i64:
486 ; SICIVI: s_mov_b32 m0
491 define amdgpu_kernel void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
492 %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
496 ; GCN-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
497 ; SICIVI: s_mov_b32 m0
500 ; GCN: ds_or_b64 {{.*}} offset:32
502 define amdgpu_kernel void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
503 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
504 %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
508 ; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64:
509 ; SICIVI: s_mov_b32 m0
514 define amdgpu_kernel void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
515 %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
519 ; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
520 ; SICIVI: s_mov_b32 m0
523 ; GCN: ds_xor_b64 {{.*}} offset:32
525 define amdgpu_kernel void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
526 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
527 %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
531 ; FIXME: There is no atomic nand instr
532 ; XGCN-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this.
533 ; define amdgpu_kernel void @lds_atomic_nand_noret_i64(i64 addrspace(3)* %ptr) nounwind {
534 ; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
538 ; GCN-LABEL: {{^}}lds_atomic_min_noret_i64:
539 ; SICIVI: s_mov_b32 m0
544 define amdgpu_kernel void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
545 %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
549 ; GCN-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
550 ; SICIVI: s_mov_b32 m0
553 ; GCN: ds_min_i64 {{.*}} offset:32
555 define amdgpu_kernel void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
556 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
557 %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
561 ; GCN-LABEL: {{^}}lds_atomic_max_noret_i64:
562 ; SICIVI: s_mov_b32 m0
567 define amdgpu_kernel void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
568 %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
572 ; GCN-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
573 ; SICIVI: s_mov_b32 m0
576 ; GCN: ds_max_i64 {{.*}} offset:32
578 define amdgpu_kernel void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
579 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
580 %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
584 ; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64:
585 ; SICIVI: s_mov_b32 m0
590 define amdgpu_kernel void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
591 %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
595 ; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
596 ; SICIVI: s_mov_b32 m0
599 ; GCN: ds_min_u64 {{.*}} offset:32
601 define amdgpu_kernel void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
602 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
603 %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
607 ; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64:
608 ; SICIVI: s_mov_b32 m0
613 define amdgpu_kernel void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
614 %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
618 ; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
619 ; SICIVI: s_mov_b32 m0
622 ; GCN: ds_max_u64 {{.*}} offset:32
624 define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
625 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
626 %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst