1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,CIVI %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 declare i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #2
6 declare i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #2
7 declare i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr nocapture, i32, i32, i32, i1) #2
9 declare i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #2
10 declare i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) nocapture, i64, i32, i32, i1) #2
11 declare i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr nocapture, i64, i32, i32, i1) #2
13 declare i32 @llvm.amdgcn.workitem.id.x() #1
15 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
16 ; CIVI-DAG: s_mov_b32 m0
19 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
20 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
21 define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
22 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false)
23 store i32 %result, ptr addrspace(1) %out
27 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
28 ; CIVI-DAG: s_mov_b32 m0
31 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
32 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
33 define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
34 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
35 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false)
36 store i32 %result, ptr addrspace(1) %out
40 ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i32:
41 ; CIVI-DAG: s_mov_b32 m0
44 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
45 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
46 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
47 ; GCN: ds_dec_u32 [[VPTR]], [[DATA]]
48 define amdgpu_kernel void @lds_atomic_dec_noret_i32(ptr addrspace(3) %ptr) nounwind {
49 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false)
53 ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
54 ; CIVI-DAG: s_mov_b32 m0
57 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
58 ; GCN: ds_dec_u32 v{{[0-9]+}}, [[K]] offset:16
59 define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
60 %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
61 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false)
65 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32:
66 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
67 ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
68 ; GFX9-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
69 ; GFX9: global_atomic_dec v{{[0-9]+}}, [[ZERO]], [[K]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
70 define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
71 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false)
72 store i32 %result, ptr addrspace(1) %out
76 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset:
77 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
78 ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
80 ; GFX9-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
81 ; GFX9: global_atomic_dec v{{[0-9]+}}, [[ZERO]], [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
82 define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
83 %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
84 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
85 store i32 %result, ptr addrspace(1) %out
89 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i32:
90 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
91 ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
93 ; GFX9-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
94 ; GFX9: global_atomic_dec [[ZERO]], [[K]], s{{\[[0-9]+:[0-9]+\]$}}
95 define amdgpu_kernel void @global_atomic_dec_noret_i32(ptr addrspace(1) %ptr) nounwind {
96 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false)
100 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset:
101 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
102 ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
104 ; GFX9-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
105 ; GFX9: global_atomic_dec [[ZERO]], [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
106 define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(ptr addrspace(1) %ptr) nounwind {
107 %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
108 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
112 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset_addr64:
113 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
114 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
115 ; VI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
116 define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
117 %id = call i32 @llvm.amdgcn.workitem.id.x()
118 %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
119 %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id
120 %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
121 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
122 store i32 %result, ptr addrspace(1) %out.gep
126 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset_addr64:
127 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
128 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
129 ; VI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
130 define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspace(1) %ptr) #0 {
131 %id = call i32 @llvm.amdgcn.workitem.id.x()
132 %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
133 %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
134 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
138 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32:
139 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
140 ; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
141 define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #0 {
142 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false)
143 store i32 %result, ptr %out
147 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset:
148 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
149 ; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
150 ; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
151 define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #0 {
152 %gep = getelementptr i32, ptr %ptr, i32 4
153 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
154 store i32 %result, ptr %out
158 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32:
159 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
160 ; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
161 define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) nounwind {
162 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false)
166 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32_offset:
167 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
168 ; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
169 ; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}}
170 define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) nounwind {
171 %gep = getelementptr i32, ptr %ptr, i32 4
172 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
176 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset_addr64:
177 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
178 ; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
179 ; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
180 define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %ptr) #0 {
181 %id = call i32 @llvm.amdgcn.workitem.id.x()
182 %gep.tid = getelementptr i32, ptr %ptr, i32 %id
183 %out.gep = getelementptr i32, ptr %out, i32 %id
184 %gep = getelementptr i32, ptr %gep.tid, i32 5
185 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
186 store i32 %result, ptr %out.gep
190 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32_offset_addr64:
191 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
192 ; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
193 ; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
194 define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #0 {
195 %id = call i32 @llvm.amdgcn.workitem.id.x()
196 %gep.tid = getelementptr i32, ptr %ptr, i32 %id
197 %gep = getelementptr i32, ptr %gep.tid, i32 5
198 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
202 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64:
203 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
204 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
205 ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
206 define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) #0 {
207 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false)
208 store i64 %result, ptr %out
212 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset:
213 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
214 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
215 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
216 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:32 glc{{$}}
217 define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #0 {
218 %gep = getelementptr i64, ptr %ptr, i32 4
219 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
220 store i64 %result, ptr %out
224 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64:
225 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
226 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
227 ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}}
228 define amdgpu_kernel void @flat_atomic_dec_noret_i64(ptr %ptr) nounwind {
229 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false)
233 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset:
234 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
235 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
236 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}}
237 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:32{{$}}
238 define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) nounwind {
239 %gep = getelementptr i64, ptr %ptr, i32 4
240 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
244 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset_addr64:
245 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
246 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
247 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
248 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40 glc{{$}}
249 define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %ptr) #0 {
250 %id = call i32 @llvm.amdgcn.workitem.id.x()
251 %gep.tid = getelementptr i64, ptr %ptr, i32 %id
252 %out.gep = getelementptr i64, ptr %out, i32 %id
253 %gep = getelementptr i64, ptr %gep.tid, i32 5
254 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
255 store i64 %result, ptr %out.gep
259 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset_addr64:
260 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
261 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
262 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}}
263 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40{{$}}
264 define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #0 {
265 %id = call i32 @llvm.amdgcn.workitem.id.x()
266 %gep.tid = getelementptr i64, ptr %ptr, i32 %id
267 %gep = getelementptr i64, ptr %gep.tid, i32 5
268 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
272 @lds0 = addrspace(3) global [512 x i32] undef
274 ; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0:
275 ; CIVI-DAG: s_mov_b32 m0
278 ; GCN-DAG: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
279 ; GCN: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
280 define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
281 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
282 %idx.0 = add nsw i32 %tid.x, 2
283 %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
284 %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %arrayidx0, i32 9, i32 0, i32 0, i1 false)
285 store i32 %idx.0, ptr addrspace(1) %add_use
286 store i32 %val0, ptr addrspace(1) %out
290 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64:
291 ; CIVI-DAG: s_mov_b32 m0
294 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
295 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
296 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]]{{$}}
297 define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
298 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false)
299 store i64 %result, ptr addrspace(1) %out
303 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
304 ; CIVI-DAG: s_mov_b32 m0
307 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
308 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
309 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]] offset:32
310 define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
311 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
312 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false)
313 store i64 %result, ptr addrspace(1) %out
317 ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i64:
318 ; CIVI-DAG: s_mov_b32 m0
321 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
322 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
323 ; GCN: ds_dec_u64 v{{[0-9]+}}, v[[[KLO]]:[[KHI]]]{{$}}
324 define amdgpu_kernel void @lds_atomic_dec_noret_i64(ptr addrspace(3) %ptr) nounwind {
325 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false)
329 ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
330 ; CIVI-DAG: s_mov_b32 m0
333 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
334 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
335 ; GCN: ds_dec_u64 v{{[0-9]+}}, v[[[KLO]]:[[KHI]]] offset:32{{$}}
336 define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
337 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
338 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false)
342 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64:
343 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
344 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
345 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
346 ; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
348 ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
349 define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
350 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false)
351 store i64 %result, ptr addrspace(1) %out
355 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset:
356 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
357 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
358 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
359 ; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
360 ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
361 define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
362 %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
363 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
364 store i64 %result, ptr addrspace(1) %out
368 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i64:
369 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
370 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
371 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
372 ; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
373 ; GFX9: global_atomic_dec_x2 v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]$}}
374 define amdgpu_kernel void @global_atomic_dec_noret_i64(ptr addrspace(1) %ptr) nounwind {
375 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false)
379 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset:
380 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
381 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
382 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
383 ; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
384 ; GFX9: global_atomic_dec_x2 v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
385 define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(ptr addrspace(1) %ptr) nounwind {
386 %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
387 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
391 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64:
392 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
393 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
394 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
395 ; CI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
396 ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
397 define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
398 %id = call i32 @llvm.amdgcn.workitem.id.x()
399 %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
400 %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id
401 %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
402 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
403 store i64 %result, ptr addrspace(1) %out.gep
407 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64:
408 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
409 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
410 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
411 ; CI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
412 ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]]{{$}}
413 define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspace(1) %ptr) #0 {
414 %id = call i32 @llvm.amdgcn.workitem.id.x()
415 %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
416 %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
417 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
421 @lds1 = addrspace(3) global [512 x i64] undef, align 8
423 ; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0_i64:
424 ; CIVI-DAG: s_mov_b32 m0
427 ; GCN-DAG: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
428 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
429 define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
430 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
431 %idx.0 = add nsw i32 %tid.x, 2
432 %arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
433 %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %arrayidx0, i64 9, i32 0, i32 0, i1 false)
434 store i32 %idx.0, ptr addrspace(1) %add_use
435 store i64 %val0, ptr addrspace(1) %out
439 attributes #0 = { nounwind }
440 attributes #1 = { nounwind readnone }
441 attributes #2 = { nounwind argmemonly }