1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
4 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -stop-before=machine-scheduler < %s | FileCheck -enable-var-scope -check-prefixes=MIR %s
6 declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
7 declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
8 declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
10 declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
11 declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
12 declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
14 declare i32 @llvm.amdgcn.workitem.id.x() #1
16 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32:
17 ; CIVI-DAG: s_mov_b32 m0
20 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
21 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
22 ; MIR-LABEL: @lds_atomic_inc_ret_i32
23 ; MIR: DS_INC_RTN_U32 {{.*}} :: (load store (s32) on %{{.*}}, !noalias !{{[0-9]+}}, addrspace 3)
24 define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
25 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false), !noalias !0
26 store i32 %result, i32 addrspace(1)* %out
30 !0 = distinct !{!0, !"noalias-scope"}
32 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
33 ; CIVI-DAG: s_mov_b32 m0
36 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
37 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
38 define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
39 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
40 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
41 store i32 %result, i32 addrspace(1)* %out
45 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32:
46 ; CIVI-DAG: s_mov_b32 m0
49 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
50 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
51 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
52 ; GCN: ds_inc_u32 [[VPTR]], [[DATA]]
53 define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
54 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
58 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
59 ; CIVI-DAG: s_mov_b32 m0
62 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
63 ; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16
64 define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
65 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
66 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
70 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32:
71 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
72 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
73 ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
74 define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
75 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
76 store i32 %result, i32 addrspace(1)* %out
80 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset:
81 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
82 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
83 ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
84 define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
85 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
86 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
87 store i32 %result, i32 addrspace(1)* %out
91 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32:
92 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
93 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
94 ; GFX9: global_atomic_inc v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]$}}
95 define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind {
96 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
100 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset:
101 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
102 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
103 ; GFX9: global_atomic_inc v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
104 define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
105 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
106 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
110 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset_addr64:
111 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
112 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
113 ; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
114 define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
115 %id = call i32 @llvm.amdgcn.workitem.id.x()
116 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
117 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
118 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
119 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
120 store i32 %result, i32 addrspace(1)* %out.gep
124 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset_addr64:
125 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
126 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
127 ; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
128 define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
129 %id = call i32 @llvm.amdgcn.workitem.id.x()
130 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
131 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
132 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
136 @lds0 = addrspace(3) global [512 x i32] undef, align 4
138 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32:
139 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
140 ; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
141 define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
142 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
143 %idx.0 = add nsw i32 %tid.x, 2
144 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
145 %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false)
146 store i32 %idx.0, i32 addrspace(1)* %add_use
147 store i32 %val0, i32 addrspace(1)* %out
151 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64:
152 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
153 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
154 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
155 define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
156 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
157 store i64 %result, i64 addrspace(1)* %out
161 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
162 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
163 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
164 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
165 define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
166 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
167 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
168 store i64 %result, i64 addrspace(1)* %out
172 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64:
173 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
174 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
175 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
176 define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
177 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
181 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
182 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
183 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
184 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
185 define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
186 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
187 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
191 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64:
192 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
193 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
194 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
195 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
196 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
197 define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
198 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
199 store i64 %result, i64 addrspace(1)* %out
203 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset:
204 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
205 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
206 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
207 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
208 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
209 define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
210 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
211 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
212 store i64 %result, i64 addrspace(1)* %out
216 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64:
217 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
218 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
219 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
220 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
222 ; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]$}}
223 define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind {
224 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
228 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset:
229 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
230 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
231 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
232 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
233 ; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
234 define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
235 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
236 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
240 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64:
241 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
242 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
243 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
244 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
245 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
246 define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
247 %id = call i32 @llvm.amdgcn.workitem.id.x()
248 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
249 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
250 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
251 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
252 store i64 %result, i64 addrspace(1)* %out.gep
256 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64:
257 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
258 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
259 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
260 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
261 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
262 define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
263 %id = call i32 @llvm.amdgcn.workitem.id.x()
264 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
265 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
266 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
270 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32:
271 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
272 ; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
273 define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 {
274 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
275 store i32 %result, i32* %out
279 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset:
280 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
281 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
282 ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
283 define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 {
284 %gep = getelementptr i32, i32* %ptr, i32 4
285 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
286 store i32 %result, i32* %out
290 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32:
291 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
292 ; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
293 define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind {
294 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
298 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset:
299 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
300 ; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
301 ; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}}
302 define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind {
303 %gep = getelementptr i32, i32* %ptr, i32 4
304 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
308 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset_addr64:
309 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
310 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
311 ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
312 define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
313 %id = call i32 @llvm.amdgcn.workitem.id.x()
314 %gep.tid = getelementptr i32, i32* %ptr, i32 %id
315 %out.gep = getelementptr i32, i32* %out, i32 %id
316 %gep = getelementptr i32, i32* %gep.tid, i32 5
317 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
318 store i32 %result, i32* %out.gep
322 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset_addr64:
323 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
324 ; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
325 ; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
326 define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 {
327 %id = call i32 @llvm.amdgcn.workitem.id.x()
328 %gep.tid = getelementptr i32, i32* %ptr, i32 %id
329 %gep = getelementptr i32, i32* %gep.tid, i32 5
330 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
334 @lds1 = addrspace(3) global [512 x i64] undef, align 8
336 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64:
337 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
338 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
339 define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
340 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
341 %idx.0 = add nsw i32 %tid.x, 2
342 %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
343 %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false)
344 store i32 %idx.0, i32 addrspace(1)* %add_use
345 store i64 %val0, i64 addrspace(1)* %out
349 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64:
350 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
351 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
352 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
353 define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 {
354 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
355 store i64 %result, i64* %out
359 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset:
360 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
361 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
362 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
363 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
364 define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 {
365 %gep = getelementptr i64, i64* %ptr, i32 4
366 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
367 store i64 %result, i64* %out
371 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64:
372 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
373 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
374 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
375 define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind {
376 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
380 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset:
381 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
382 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
383 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
384 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
385 define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind {
386 %gep = getelementptr i64, i64* %ptr, i32 4
387 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
391 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset_addr64:
392 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
393 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
394 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
395 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
396 define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
397 %id = call i32 @llvm.amdgcn.workitem.id.x()
398 %gep.tid = getelementptr i64, i64* %ptr, i32 %id
399 %out.gep = getelementptr i64, i64* %out, i32 %id
400 %gep = getelementptr i64, i64* %gep.tid, i32 5
401 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
402 store i64 %result, i64* %out.gep
406 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset_addr64:
407 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
408 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
409 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
410 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
411 define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 {
412 %id = call i32 @llvm.amdgcn.workitem.id.x()
413 %gep.tid = getelementptr i64, i64* %ptr, i32 %id
414 %gep = getelementptr i64, i64* %gep.tid, i32 5
415 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
419 ; GCN-LABEL: {{^}}nocse_lds_atomic_inc_ret_i32:
420 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
421 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
422 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
423 define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 {
424 %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
425 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
427 store i32 %result0, i32 addrspace(1)* %out0
428 store i32 %result1, i32 addrspace(1)* %out1
432 attributes #0 = { nounwind }
433 attributes #1 = { nounwind readnone }
434 attributes #2 = { nounwind argmemonly }