1 ; RUN: llc -mtriple=amdgcn -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SI,SICIVI %s
2 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SICIVI,GFX89 %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9,GFX89 %s
5 ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64:
9 ; GCN: ds_wrxchg_rtn_b64
11 define amdgpu_kernel void @lds_atomic_xchg_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
12 %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 4 seq_cst
13 store i64 %result, ptr addrspace(1) %out, align 8
17 ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
18 ; SICIVI: s_mov_b32 m0
21 ; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
23 define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
24 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
25 %result = atomicrmw xchg ptr addrspace(3) %gep, i64 4 seq_cst
26 store i64 %result, ptr addrspace(1) %out, align 8
30 ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_f64_offset:
31 ; SICIVI: s_mov_b32 m0
34 ; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
36 define amdgpu_kernel void @lds_atomic_xchg_ret_f64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
37 %gep = getelementptr double, ptr addrspace(3) %ptr, i32 4
38 %result = atomicrmw xchg ptr addrspace(3) %gep, double 4.0 seq_cst
39 store double %result, ptr addrspace(1) %out, align 8
43 ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_pointer_offset:
44 ; SICIVI: s_mov_b32 m0
47 ; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
49 define amdgpu_kernel void @lds_atomic_xchg_ret_pointer_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
50 %gep = getelementptr ptr, ptr addrspace(3) %ptr, i32 4
51 %result = atomicrmw xchg ptr addrspace(3) %gep, ptr null seq_cst
52 store ptr %result, ptr addrspace(1) %out, align 8
56 ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:
57 ; SICIVI: s_mov_b32 m0
62 define amdgpu_kernel void @lds_atomic_add_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
63 %result = atomicrmw add ptr addrspace(3) %ptr, i64 4 seq_cst
64 store i64 %result, ptr addrspace(1) %out, align 8
68 ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
69 ; SICIVI-DAG: s_mov_b32 m0
72 ; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
73 ; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
74 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
75 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
76 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
77 ; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32
78 ; GCN: buffer_store_dwordx2 [[RESULT]],
80 define amdgpu_kernel void @lds_atomic_add_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
81 %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4
82 %result = atomicrmw add ptr addrspace(3) %gep, i64 9 seq_cst
83 store i64 %result, ptr addrspace(1) %out, align 8
87 ; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64:
88 ; SICIVI-DAG: s_mov_b32 m0
91 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
92 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
93 ; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
94 ; GCN: buffer_store_dwordx2 [[RESULT]],
96 define amdgpu_kernel void @lds_atomic_add1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
97 %result = atomicrmw add ptr addrspace(3) %ptr, i64 1 seq_cst
98 store i64 %result, ptr addrspace(1) %out, align 8
102 ; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64_offset:
103 ; SICIVI: s_mov_b32 m0
106 ; GCN: ds_add_rtn_u64 {{.*}} offset:32
108 define amdgpu_kernel void @lds_atomic_add1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
109 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
110 %result = atomicrmw add ptr addrspace(3) %gep, i64 1 seq_cst
111 store i64 %result, ptr addrspace(1) %out, align 8
115 ; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64:
116 ; SICIVI: s_mov_b32 m0
119 ; GCN: ds_sub_rtn_u64
121 define amdgpu_kernel void @lds_atomic_sub_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
122 %result = atomicrmw sub ptr addrspace(3) %ptr, i64 4 seq_cst
123 store i64 %result, ptr addrspace(1) %out, align 8
127 ; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
128 ; SICIVI: s_mov_b32 m0
131 ; GCN: ds_sub_rtn_u64 {{.*}} offset:32
133 define amdgpu_kernel void @lds_atomic_sub_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
134 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
135 %result = atomicrmw sub ptr addrspace(3) %gep, i64 4 seq_cst
136 store i64 %result, ptr addrspace(1) %out, align 8
140 ; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64:
141 ; SICIVI-DAG: s_mov_b32 m0
144 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
145 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
146 ; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
147 ; GCN: buffer_store_dwordx2 [[RESULT]],
149 define amdgpu_kernel void @lds_atomic_sub1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
150 %result = atomicrmw sub ptr addrspace(3) %ptr, i64 1 seq_cst
151 store i64 %result, ptr addrspace(1) %out, align 8
155 ; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64_offset:
156 ; SICIVI: s_mov_b32 m0
159 ; GCN: ds_sub_rtn_u64 {{.*}} offset:32
161 define amdgpu_kernel void @lds_atomic_sub1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
162 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
163 %result = atomicrmw sub ptr addrspace(3) %gep, i64 1 seq_cst
164 store i64 %result, ptr addrspace(1) %out, align 8
168 ; GCN-LABEL: {{^}}lds_atomic_and_ret_i64:
169 ; SICIVI: s_mov_b32 m0
172 ; GCN: ds_and_rtn_b64
174 define amdgpu_kernel void @lds_atomic_and_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
175 %result = atomicrmw and ptr addrspace(3) %ptr, i64 4 seq_cst
176 store i64 %result, ptr addrspace(1) %out, align 8
180 ; GCN-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
181 ; SICIVI: s_mov_b32 m0
184 ; GCN: ds_and_rtn_b64 {{.*}} offset:32
186 define amdgpu_kernel void @lds_atomic_and_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
187 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
188 %result = atomicrmw and ptr addrspace(3) %gep, i64 4 seq_cst
189 store i64 %result, ptr addrspace(1) %out, align 8
193 ; GCN-LABEL: {{^}}lds_atomic_or_ret_i64:
194 ; SICIVI: s_mov_b32 m0
199 define amdgpu_kernel void @lds_atomic_or_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
200 %result = atomicrmw or ptr addrspace(3) %ptr, i64 4 seq_cst
201 store i64 %result, ptr addrspace(1) %out, align 8
205 ; GCN-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
206 ; SICIVI: s_mov_b32 m0
209 ; GCN: ds_or_rtn_b64 {{.*}} offset:32
211 define amdgpu_kernel void @lds_atomic_or_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
212 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
213 %result = atomicrmw or ptr addrspace(3) %gep, i64 4 seq_cst
214 store i64 %result, ptr addrspace(1) %out, align 8
218 ; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64:
219 ; SICIVI: s_mov_b32 m0
222 ; GCN: ds_xor_rtn_b64
224 define amdgpu_kernel void @lds_atomic_xor_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
225 %result = atomicrmw xor ptr addrspace(3) %ptr, i64 4 seq_cst
226 store i64 %result, ptr addrspace(1) %out, align 8
230 ; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
231 ; SICIVI: s_mov_b32 m0
234 ; GCN: ds_xor_rtn_b64 {{.*}} offset:32
236 define amdgpu_kernel void @lds_atomic_xor_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
237 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
238 %result = atomicrmw xor ptr addrspace(3) %gep, i64 4 seq_cst
239 store i64 %result, ptr addrspace(1) %out, align 8
243 ; FIXME: There is no atomic nand instr
244 ; XGCN-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this.
245 ; define amdgpu_kernel void @lds_atomic_nand_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
246 ; %result = atomicrmw nand ptr addrspace(3) %ptr, i32 4 seq_cst
247 ; store i64 %result, ptr addrspace(1) %out, align 8
251 ; GCN-LABEL: {{^}}lds_atomic_min_ret_i64:
252 ; SICIVI: s_mov_b32 m0
255 ; GCN: ds_min_rtn_i64
257 define amdgpu_kernel void @lds_atomic_min_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
258 %result = atomicrmw min ptr addrspace(3) %ptr, i64 4 seq_cst
259 store i64 %result, ptr addrspace(1) %out, align 8
263 ; GCN-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
264 ; SICIVI: s_mov_b32 m0
267 ; GCN: ds_min_rtn_i64 {{.*}} offset:32
269 define amdgpu_kernel void @lds_atomic_min_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
270 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
271 %result = atomicrmw min ptr addrspace(3) %gep, i64 4 seq_cst
272 store i64 %result, ptr addrspace(1) %out, align 8
276 ; GCN-LABEL: {{^}}lds_atomic_max_ret_i64:
277 ; SICIVI: s_mov_b32 m0
280 ; GCN: ds_max_rtn_i64
282 define amdgpu_kernel void @lds_atomic_max_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
283 %result = atomicrmw max ptr addrspace(3) %ptr, i64 4 seq_cst
284 store i64 %result, ptr addrspace(1) %out, align 8
288 ; GCN-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
289 ; SICIVI: s_mov_b32 m0
292 ; GCN: ds_max_rtn_i64 {{.*}} offset:32
294 define amdgpu_kernel void @lds_atomic_max_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
295 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
296 %result = atomicrmw max ptr addrspace(3) %gep, i64 4 seq_cst
297 store i64 %result, ptr addrspace(1) %out, align 8
301 ; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64:
302 ; SICIVI: s_mov_b32 m0
305 ; GCN: ds_min_rtn_u64
307 define amdgpu_kernel void @lds_atomic_umin_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
308 %result = atomicrmw umin ptr addrspace(3) %ptr, i64 4 seq_cst
309 store i64 %result, ptr addrspace(1) %out, align 8
313 ; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
314 ; SICIVI: s_mov_b32 m0
317 ; GCN: ds_min_rtn_u64 {{.*}} offset:32
319 define amdgpu_kernel void @lds_atomic_umin_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
320 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
321 %result = atomicrmw umin ptr addrspace(3) %gep, i64 4 seq_cst
322 store i64 %result, ptr addrspace(1) %out, align 8
326 ; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64:
327 ; SICIVI: s_mov_b32 m0
330 ; GCN: ds_max_rtn_u64
332 define amdgpu_kernel void @lds_atomic_umax_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
333 %result = atomicrmw umax ptr addrspace(3) %ptr, i64 4 seq_cst
334 store i64 %result, ptr addrspace(1) %out, align 8
338 ; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
339 ; SICIVI: s_mov_b32 m0
342 ; GCN: ds_max_rtn_u64 {{.*}} offset:32
344 define amdgpu_kernel void @lds_atomic_umax_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
345 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
346 %result = atomicrmw umax ptr addrspace(3) %gep, i64 4 seq_cst
347 store i64 %result, ptr addrspace(1) %out, align 8
351 ; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64:
352 ; SICIVI: s_mov_b32 m0
355 ; GCN: ds_wrxchg_rtn_b64
357 define amdgpu_kernel void @lds_atomic_xchg_noret_i64(ptr addrspace(3) %ptr) nounwind {
358 %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 4 seq_cst
362 ; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
363 ; SICIVI: s_mov_b32 m0
366 ; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
368 define amdgpu_kernel void @lds_atomic_xchg_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
369 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
370 %result = atomicrmw xchg ptr addrspace(3) %gep, i64 4 seq_cst
374 ; GCN-LABEL: {{^}}lds_atomic_add_noret_i64:
375 ; SICIVI: s_mov_b32 m0
380 define amdgpu_kernel void @lds_atomic_add_noret_i64(ptr addrspace(3) %ptr) nounwind {
381 %result = atomicrmw add ptr addrspace(3) %ptr, i64 4 seq_cst
385 ; GCN-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
386 ; SICIVI-DAG: s_mov_b32 m0
389 ; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
390 ; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
391 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
392 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
393 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
394 ; GCN: ds_add_u64 {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]] offset:32
396 define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
397 %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4
398 %result = atomicrmw add ptr addrspace(3) %gep, i64 9 seq_cst
402 ; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64:
403 ; SICIVI-DAG: s_mov_b32 m0
406 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
407 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
408 ; GCN: ds_add_u64 {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
410 define amdgpu_kernel void @lds_atomic_add1_noret_i64(ptr addrspace(3) %ptr) nounwind {
411 %result = atomicrmw add ptr addrspace(3) %ptr, i64 1 seq_cst
415 ; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64_offset:
416 ; SICIVI: s_mov_b32 m0
419 ; GCN: ds_add_u64 {{.*}} offset:32
421 define amdgpu_kernel void @lds_atomic_add1_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
422 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
423 %result = atomicrmw add ptr addrspace(3) %gep, i64 1 seq_cst
427 ; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64:
428 ; SICIVI: s_mov_b32 m0
433 define amdgpu_kernel void @lds_atomic_sub_noret_i64(ptr addrspace(3) %ptr) nounwind {
434 %result = atomicrmw sub ptr addrspace(3) %ptr, i64 4 seq_cst
438 ; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
439 ; SICIVI: s_mov_b32 m0
442 ; GCN: ds_sub_u64 {{.*}} offset:32
444 define amdgpu_kernel void @lds_atomic_sub_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
445 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
446 %result = atomicrmw sub ptr addrspace(3) %gep, i64 4 seq_cst
450 ; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64:
451 ; SICIVI-DAG: s_mov_b32 m0
454 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
455 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
456 ; GCN: ds_sub_u64 {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
458 define amdgpu_kernel void @lds_atomic_sub1_noret_i64(ptr addrspace(3) %ptr) nounwind {
459 %result = atomicrmw sub ptr addrspace(3) %ptr, i64 1 seq_cst
463 ; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64_offset:
464 ; SICIVI: s_mov_b32 m0
467 ; GCN: ds_sub_u64 {{.*}} offset:32
469 define amdgpu_kernel void @lds_atomic_sub1_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
470 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
471 %result = atomicrmw sub ptr addrspace(3) %gep, i64 1 seq_cst
475 ; GCN-LABEL: {{^}}lds_atomic_and_noret_i64:
476 ; SICIVI: s_mov_b32 m0
481 define amdgpu_kernel void @lds_atomic_and_noret_i64(ptr addrspace(3) %ptr) nounwind {
482 %result = atomicrmw and ptr addrspace(3) %ptr, i64 4 seq_cst
486 ; GCN-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
487 ; SICIVI: s_mov_b32 m0
490 ; GCN: ds_and_b64 {{.*}} offset:32
492 define amdgpu_kernel void @lds_atomic_and_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
493 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
494 %result = atomicrmw and ptr addrspace(3) %gep, i64 4 seq_cst
498 ; GCN-LABEL: {{^}}lds_atomic_or_noret_i64:
499 ; SICIVI: s_mov_b32 m0
504 define amdgpu_kernel void @lds_atomic_or_noret_i64(ptr addrspace(3) %ptr) nounwind {
505 %result = atomicrmw or ptr addrspace(3) %ptr, i64 4 seq_cst
509 ; GCN-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
510 ; SICIVI: s_mov_b32 m0
513 ; GCN: ds_or_b64 {{.*}} offset:32
515 define amdgpu_kernel void @lds_atomic_or_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
516 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
517 %result = atomicrmw or ptr addrspace(3) %gep, i64 4 seq_cst
521 ; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64:
522 ; SICIVI: s_mov_b32 m0
527 define amdgpu_kernel void @lds_atomic_xor_noret_i64(ptr addrspace(3) %ptr) nounwind {
528 %result = atomicrmw xor ptr addrspace(3) %ptr, i64 4 seq_cst
532 ; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
533 ; SICIVI: s_mov_b32 m0
536 ; GCN: ds_xor_b64 {{.*}} offset:32
538 define amdgpu_kernel void @lds_atomic_xor_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
539 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
540 %result = atomicrmw xor ptr addrspace(3) %gep, i64 4 seq_cst
544 ; FIXME: There is no atomic nand instr
545 ; XGCN-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this.
546 ; define amdgpu_kernel void @lds_atomic_nand_noret_i64(ptr addrspace(3) %ptr) nounwind {
547 ; %result = atomicrmw nand ptr addrspace(3) %ptr, i32 4 seq_cst
551 ; GCN-LABEL: {{^}}lds_atomic_min_noret_i64:
552 ; SICIVI: s_mov_b32 m0
557 define amdgpu_kernel void @lds_atomic_min_noret_i64(ptr addrspace(3) %ptr) nounwind {
558 %result = atomicrmw min ptr addrspace(3) %ptr, i64 4 seq_cst
562 ; GCN-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
563 ; SICIVI: s_mov_b32 m0
566 ; GCN: ds_min_i64 {{.*}} offset:32
568 define amdgpu_kernel void @lds_atomic_min_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
569 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
570 %result = atomicrmw min ptr addrspace(3) %gep, i64 4 seq_cst
574 ; GCN-LABEL: {{^}}lds_atomic_max_noret_i64:
575 ; SICIVI: s_mov_b32 m0
580 define amdgpu_kernel void @lds_atomic_max_noret_i64(ptr addrspace(3) %ptr) nounwind {
581 %result = atomicrmw max ptr addrspace(3) %ptr, i64 4 seq_cst
585 ; GCN-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
586 ; SICIVI: s_mov_b32 m0
589 ; GCN: ds_max_i64 {{.*}} offset:32
591 define amdgpu_kernel void @lds_atomic_max_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
592 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
593 %result = atomicrmw max ptr addrspace(3) %gep, i64 4 seq_cst
597 ; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64:
598 ; SICIVI: s_mov_b32 m0
603 define amdgpu_kernel void @lds_atomic_umin_noret_i64(ptr addrspace(3) %ptr) nounwind {
604 %result = atomicrmw umin ptr addrspace(3) %ptr, i64 4 seq_cst
608 ; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
609 ; SICIVI: s_mov_b32 m0
612 ; GCN: ds_min_u64 {{.*}} offset:32
614 define amdgpu_kernel void @lds_atomic_umin_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
615 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
616 %result = atomicrmw umin ptr addrspace(3) %gep, i64 4 seq_cst
620 ; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64:
621 ; SICIVI: s_mov_b32 m0
626 define amdgpu_kernel void @lds_atomic_umax_noret_i64(ptr addrspace(3) %ptr) nounwind {
627 %result = atomicrmw umax ptr addrspace(3) %ptr, i64 4 seq_cst
631 ; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
632 ; SICIVI: s_mov_b32 m0
635 ; GCN: ds_max_u64 {{.*}} offset:32
637 define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
638 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
639 %result = atomicrmw umax ptr addrspace(3) %gep, i64 4 seq_cst
643 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64:
644 ; SICIVI: s_mov_b32 m0
647 ; GCN: ds_inc_rtn_u64
649 define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
650 %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 4 seq_cst
651 store i64 %result, ptr addrspace(1) %out, align 8
655 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
656 ; SICIVI-DAG: s_mov_b32 m0
659 ; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
660 ; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
661 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
662 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
663 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
664 ; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32
665 ; GCN: buffer_store_dwordx2 [[RESULT]],
667 define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
668 %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4
669 %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 9 seq_cst
670 store i64 %result, ptr addrspace(1) %out, align 8
674 ; GCN-LABEL: {{^}}lds_atomic_inc1_ret_i64:
675 ; SICIVI-DAG: s_mov_b32 m0
678 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
679 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
680 ; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
681 ; GCN: buffer_store_dwordx2 [[RESULT]],
683 define amdgpu_kernel void @lds_atomic_inc1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
684 %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 1 seq_cst
685 store i64 %result, ptr addrspace(1) %out, align 8
689 ; GCN-LABEL: {{^}}lds_atomic_inc1_ret_i64_offset:
690 ; SICIVI: s_mov_b32 m0
693 ; GCN: ds_inc_rtn_u64 {{.*}} offset:32
695 define amdgpu_kernel void @lds_atomic_inc1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
696 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
697 %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 1 seq_cst
698 store i64 %result, ptr addrspace(1) %out, align 8
702 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64:
703 ; SICIVI: s_mov_b32 m0
706 ; GCN: ds_dec_rtn_u64
708 define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
709 %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 4 seq_cst
710 store i64 %result, ptr addrspace(1) %out, align 8
714 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
715 ; SICIVI-DAG: s_mov_b32 m0
718 ; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
719 ; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
720 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
721 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
722 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
723 ; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32
724 ; GCN: buffer_store_dwordx2 [[RESULT]],
726 define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
727 %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4
728 %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 9 seq_cst
729 store i64 %result, ptr addrspace(1) %out, align 8
733 ; GCN-LABEL: {{^}}lds_atomic_dec1_ret_i64:
734 ; SICIVI-DAG: s_mov_b32 m0
737 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
738 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
739 ; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
740 ; GCN: buffer_store_dwordx2 [[RESULT]],
742 define amdgpu_kernel void @lds_atomic_dec1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
743 %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 1 seq_cst
744 store i64 %result, ptr addrspace(1) %out, align 8
748 ; GCN-LABEL: {{^}}lds_atomic_dec1_ret_i64_offset:
749 ; SICIVI: s_mov_b32 m0
752 ; GCN: ds_dec_rtn_u64 {{.*}} offset:32
754 define amdgpu_kernel void @lds_atomic_dec1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
755 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
756 %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 1 seq_cst
757 store i64 %result, ptr addrspace(1) %out, align 8