1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,CIVI %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
6 declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
7 declare i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
9 declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
10 declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
11 declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
13 declare i32 @llvm.amdgcn.workitem.id.x() #1
15 ; Make sure no crash on invalid non-constant
16 ; GCN-LABEL: {{^}}invalid_variable_order_lds_atomic_dec_ret_i32:
17 ; CIVI-DAG: s_mov_b32 m0
19 define amdgpu_kernel void @invalid_variable_order_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %order.var) #0 {
20 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %order.var, i32 0, i1 false)
21 store i32 %result, i32 addrspace(1)* %out
25 ; Make sure no crash on invalid non-constant
26 ; GCN-LABEL: {{^}}invalid_variable_scope_lds_atomic_dec_ret_i32:
27 ; CIVI-DAG: s_mov_b32 m0
29 define amdgpu_kernel void @invalid_variable_scope_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %scope.var) #0 {
30 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %scope.var, i1 false)
31 store i32 %result, i32 addrspace(1)* %out
35 ; Make sure no crash on invalid non-constant
36 ; GCN-LABEL: {{^}}invalid_variable_volatile_lds_atomic_dec_ret_i32:
37 define amdgpu_kernel void @invalid_variable_volatile_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i1 %volatile.var) #0 {
38 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %volatile.var)
39 store i32 %result, i32 addrspace(1)* %out
43 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
44 ; CIVI-DAG: s_mov_b32 m0
47 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
48 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
49 define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
50 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
51 store i32 %result, i32 addrspace(1)* %out
55 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
56 ; CIVI-DAG: s_mov_b32 m0
59 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
60 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
61 define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
62 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
63 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
64 store i32 %result, i32 addrspace(1)* %out
68 ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i32:
69 ; CIVI-DAG: s_mov_b32 m0
72 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
73 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
74 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
75 ; GCN: ds_dec_u32 [[VPTR]], [[DATA]]
76 define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
77 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
81 ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
82 ; CIVI-DAG: s_mov_b32 m0
85 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
86 ; GCN: ds_dec_u32 v{{[0-9]+}}, [[K]] offset:16
87 define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
88 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
89 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
93 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32:
94 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
95 ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
96 ; GFX9: global_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off glc{{$}}
97 define amdgpu_kernel void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
98 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
99 store i32 %result, i32 addrspace(1)* %out
103 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset:
104 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
105 ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
106 ; GFX9: global_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16 glc{{$}}
107 define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
108 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
109 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
110 store i32 %result, i32 addrspace(1)* %out
114 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i32:
115 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
116 ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
117 ; GFX9: global_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]], off{{$}}
118 define amdgpu_kernel void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind {
119 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
123 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset:
124 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
125 ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
126 ; GFX9: global_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16{{$}}
127 define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
128 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
129 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
133 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset_addr64:
134 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
135 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
136 ; VI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
137 define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
138 %id = call i32 @llvm.amdgcn.workitem.id.x()
139 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
140 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
141 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
142 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
143 store i32 %result, i32 addrspace(1)* %out.gep
147 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset_addr64:
148 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
149 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
150 ; VI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
151 define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
152 %id = call i32 @llvm.amdgcn.workitem.id.x()
153 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
154 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
155 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
159 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32:
160 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
161 ; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
162 define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 {
163 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
164 store i32 %result, i32* %out
168 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset:
169 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
170 ; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
171 ; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
172 define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr) #0 {
173 %gep = getelementptr i32, i32* %ptr, i32 4
174 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
175 store i32 %result, i32* %out
179 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32:
180 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
181 ; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
182 define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind {
183 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
187 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32_offset:
188 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
189 ; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
190 ; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}}
191 define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind {
192 %gep = getelementptr i32, i32* %ptr, i32 4
193 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
197 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset_addr64:
198 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
199 ; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
200 ; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
201 define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
202 %id = call i32 @llvm.amdgcn.workitem.id.x()
203 %gep.tid = getelementptr i32, i32* %ptr, i32 %id
204 %out.gep = getelementptr i32, i32* %out, i32 %id
205 %gep = getelementptr i32, i32* %gep.tid, i32 5
206 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
207 store i32 %result, i32* %out.gep
211 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32_offset_addr64:
212 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
213 ; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
214 ; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
215 define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0 {
216 %id = call i32 @llvm.amdgcn.workitem.id.x()
217 %gep.tid = getelementptr i32, i32* %ptr, i32 %id
218 %gep = getelementptr i32, i32* %gep.tid, i32 5
219 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
223 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64:
224 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
225 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
226 ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
227 define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 {
228 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
229 store i64 %result, i64* %out
233 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset:
234 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
235 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
236 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
237 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
238 define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr) #0 {
239 %gep = getelementptr i64, i64* %ptr, i32 4
240 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
241 store i64 %result, i64* %out
245 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64:
246 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
247 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
248 ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
249 define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind {
250 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
254 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset:
255 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
256 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
257 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
258 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
259 define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind {
260 %gep = getelementptr i64, i64* %ptr, i32 4
261 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
265 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset_addr64:
266 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
267 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
268 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
269 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
270 define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
271 %id = call i32 @llvm.amdgcn.workitem.id.x()
272 %gep.tid = getelementptr i64, i64* %ptr, i32 %id
273 %out.gep = getelementptr i64, i64* %out, i32 %id
274 %gep = getelementptr i64, i64* %gep.tid, i32 5
275 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
276 store i64 %result, i64* %out.gep
280 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset_addr64:
281 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
282 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
283 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
284 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
285 define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 {
286 %id = call i32 @llvm.amdgcn.workitem.id.x()
287 %gep.tid = getelementptr i64, i64* %ptr, i32 %id
288 %gep = getelementptr i64, i64* %gep.tid, i32 5
289 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
293 @lds0 = addrspace(3) global [512 x i32] undef
295 ; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0:
296 ; CIVI-DAG: s_mov_b32 m0
299 ; GCN-DAG: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
300 ; GCN: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
301 define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
302 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
303 %idx.0 = add nsw i32 %tid.x, 2
304 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
305 %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false)
306 store i32 %idx.0, i32 addrspace(1)* %add_use
307 store i32 %val0, i32 addrspace(1)* %out
311 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64:
312 ; CIVI-DAG: s_mov_b32 m0
315 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
316 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
317 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
318 define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
319 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
320 store i64 %result, i64 addrspace(1)* %out
324 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
325 ; CIVI-DAG: s_mov_b32 m0
328 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
329 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
330 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
331 define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
332 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
333 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
334 store i64 %result, i64 addrspace(1)* %out
338 ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i64:
339 ; CIVI-DAG: s_mov_b32 m0
342 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
343 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
344 ; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
345 define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
346 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
350 ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
351 ; CIVI-DAG: s_mov_b32 m0
354 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
355 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
356 ; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
357 define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
358 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
359 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
363 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64:
364 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
365 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
366 ; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
367 ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off glc{{$}}
368 define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
369 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
370 store i64 %result, i64 addrspace(1)* %out
374 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset:
375 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
376 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
377 ; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
378 ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32 glc{{$}}
379 define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
380 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
381 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
382 store i64 %result, i64 addrspace(1)* %out
386 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i64:
387 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
388 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
389 ; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
390 ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off{{$}}
391 define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind {
392 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
396 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset:
397 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
398 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
399 ; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
400 ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32{{$}}
401 define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
402 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
403 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
407 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64:
408 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
409 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
410 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
411 ; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
412 ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
413 define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
414 %id = call i32 @llvm.amdgcn.workitem.id.x()
415 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
416 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
417 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
418 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
419 store i64 %result, i64 addrspace(1)* %out.gep
423 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64:
424 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
425 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
426 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
427 ; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
428 ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
429 define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
430 %id = call i32 @llvm.amdgcn.workitem.id.x()
431 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
432 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
433 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
437 @lds1 = addrspace(3) global [512 x i64] undef, align 8
439 ; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0_i64:
440 ; CIVI-DAG: s_mov_b32 m0
443 ; GCN-DAG: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
444 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
445 define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
446 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
447 %idx.0 = add nsw i32 %tid.x, 2
448 %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
449 %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false)
450 store i32 %idx.0, i32 addrspace(1)* %add_use
451 store i64 %val0, i64 addrspace(1)* %out
455 attributes #0 = { nounwind }
456 attributes #1 = { nounwind readnone }
457 attributes #2 = { nounwind argmemonly }