1 ; RUN: llc -march=amdgcn -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-atomic-optimizations=false -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
5 ; GCN-LABEL: {{^}}atomic_add_i32_offset:
6 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
7 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
8 define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
10 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
11 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
15 ; GCN-LABEL: {{^}}atomic_add_i32_max_neg_offset:
16 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:-4096{{$}}
17 define amdgpu_kernel void @atomic_add_i32_max_neg_offset(i32 addrspace(1)* %out, i32 %in) {
19 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 -1024
20 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
24 ; GCN-LABEL: {{^}}atomic_add_i32_soffset:
25 ; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0
26 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
28 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
29 define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) {
31 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000
32 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
36 ; GCN-LABEL: {{^}}atomic_add_i32_huge_offset:
37 ; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac
38 ; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd
39 ; SI: buffer_atomic_add v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
43 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
44 define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
46 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595
48 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
52 ; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
53 ; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
54 ; SIVI: buffer_store_dword [[RET]]
56 ; GFX9: global_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
57 define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
59 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
60 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
61 store i32 %val, i32 addrspace(1)* %out2
65 ; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
66 ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
67 ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
68 ; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
69 define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
71 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
72 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
73 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
77 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
78 ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
79 ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
80 ; SIVI: buffer_store_dword [[RET]]
82 ; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
83 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
84 define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
86 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
87 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
88 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
89 store i32 %val, i32 addrspace(1)* %out2
93 ; GCN-LABEL: {{^}}atomic_add_i32:
94 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
95 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
96 define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
98 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
102 ; GCN-LABEL: {{^}}atomic_add_i32_ret:
103 ; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
104 ; SIVI: buffer_store_dword [[RET]]
106 ; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
107 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
108 define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
110 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
111 store i32 %val, i32 addrspace(1)* %out2
115 ; GCN-LABEL: {{^}}atomic_add_i32_addr64:
116 ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
117 ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
118 ; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
119 define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
121 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
122 %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
126 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
127 ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
128 ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
129 ; SIVI: buffer_store_dword [[RET]]
131 ; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
132 define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
134 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
135 %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
136 store i32 %val, i32 addrspace(1)* %out2
140 ; GCN-LABEL: {{^}}atomic_and_i32_offset:
141 ; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
143 ; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
144 define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
146 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
147 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
151 ; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
152 ; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
153 ; SIVI: buffer_store_dword [[RET]]
155 ; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
156 define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
158 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
159 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
160 store i32 %val, i32 addrspace(1)* %out2
164 ; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
165 ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
166 ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
168 ; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
169 define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
171 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
172 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
173 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
177 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
178 ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
179 ; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
180 ; SIVI: buffer_store_dword [[RET]]
182 ; GFX9: global_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
183 define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
185 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
186 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
187 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
188 store i32 %val, i32 addrspace(1)* %out2
192 ; GCN-LABEL: {{^}}atomic_and_i32:
193 ; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
195 ; GFX9: global_atomic_and v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
196 define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
198 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
202 ; GCN-LABEL: {{^}}atomic_and_i32_ret:
203 ; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
204 ; SIVI: buffer_store_dword [[RET]]
206 ; GFX9: global_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
207 define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
209 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
210 store i32 %val, i32 addrspace(1)* %out2
214 ; GCN-LABEL: {{^}}atomic_and_i32_addr64:
215 ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
216 ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
218 ; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
219 define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
221 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
222 %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
226 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
227 ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
228 ; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
229 ; SIVI: buffer_store_dword [[RET]]
231 ; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
232 define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
234 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
235 %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
236 store i32 %val, i32 addrspace(1)* %out2
240 ; GCN-LABEL: {{^}}atomic_sub_i32_offset:
241 ; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
243 ; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
244 define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
246 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
247 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
251 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
252 ; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
253 ; SIVI: buffer_store_dword [[RET]]
255 ; GFX9: global_atomic_sub v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
256 define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
258 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
259 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
260 store i32 %val, i32 addrspace(1)* %out2
264 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
265 ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
266 ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
268 ; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
269 define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
271 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
272 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
273 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
277 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
278 ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
279 ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
280 ; SIVI: buffer_store_dword [[RET]]
282 ; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
283 define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
285 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
286 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
287 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
288 store i32 %val, i32 addrspace(1)* %out2
292 ; GCN-LABEL: {{^}}atomic_sub_i32:
293 ; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
295 ; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
296 define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
298 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
302 ; GCN-LABEL: {{^}}atomic_sub_i32_ret:
303 ; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
304 ; SIVI: buffer_store_dword [[RET]]
306 ; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
307 define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
309 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
310 store i32 %val, i32 addrspace(1)* %out2
314 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
315 ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
316 ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
318 ; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
319 define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
321 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
322 %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
326 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
327 ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
328 ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
329 ; SIVI: buffer_store_dword [[RET]]
331 ; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
332 define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
334 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
335 %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
336 store i32 %val, i32 addrspace(1)* %out2
340 ; GCN-LABEL: {{^}}atomic_max_i32_offset:
341 ; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
343 ; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
344 define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
346 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
347 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
351 ; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
352 ; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
353 ; SIVI: buffer_store_dword [[RET]]
355 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
356 define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
358 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
359 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
360 store i32 %val, i32 addrspace(1)* %out2
364 ; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
365 ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
366 ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
368 ; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
369 define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
371 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
372 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
373 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
377 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
378 ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
379 ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
380 ; SIVI: buffer_store_dword [[RET]]
382 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
383 define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
385 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
386 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
387 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
388 store i32 %val, i32 addrspace(1)* %out2
392 ; GCN-LABEL: {{^}}atomic_max_i32:
393 ; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
395 ; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
396 define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
398 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
402 ; GCN-LABEL: {{^}}atomic_max_i32_ret:
403 ; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
404 ; SIVI: buffer_store_dword [[RET]]
406 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
407 define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
409 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
410 store i32 %val, i32 addrspace(1)* %out2
414 ; GCN-LABEL: {{^}}atomic_max_i32_addr64:
415 ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
416 ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
418 ; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
419 define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
421 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
422 %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
426 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
427 ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
428 ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
429 ; SIVI: buffer_store_dword [[RET]]
431 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
432 define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
434 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
435 %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
436 store i32 %val, i32 addrspace(1)* %out2
440 ; GCN-LABEL: {{^}}atomic_umax_i32_offset:
441 ; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
443 ; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
444 define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
446 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
447 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
451 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
452 ; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
453 ; SIVI: buffer_store_dword [[RET]]
455 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
456 define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
458 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
459 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
460 store i32 %val, i32 addrspace(1)* %out2
464 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
465 ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
466 ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
467 ; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
468 define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
470 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
471 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
472 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
476 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
477 ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
478 ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
479 ; SIVI: buffer_store_dword [[RET]]
481 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
482 define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
484 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
485 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
486 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
487 store i32 %val, i32 addrspace(1)* %out2
491 ; GCN-LABEL: {{^}}atomic_umax_i32:
492 ; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
494 ; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
495 define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
497 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
501 ; GCN-LABEL: {{^}}atomic_umax_i32_ret:
502 ; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
503 ; SIVI: buffer_store_dword [[RET]]
505 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
506 define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
508 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
509 store i32 %val, i32 addrspace(1)* %out2
513 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
514 ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
515 ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
516 ; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
517 define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
519 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
520 %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
524 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
525 ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
526 ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
527 ; SIVI: buffer_store_dword [[RET]]
529 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
530 define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
532 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
533 %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
534 store i32 %val, i32 addrspace(1)* %out2
538 ; GCN-LABEL: {{^}}atomic_min_i32_offset:
539 ; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
541 ; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
542 define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
544 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
545 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
549 ; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
550 ; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
551 ; SIVI: buffer_store_dword [[RET]]
553 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
554 define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
556 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
557 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
558 store i32 %val, i32 addrspace(1)* %out2
562 ; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
563 ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
564 ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
565 ; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16
566 define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
568 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
569 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
570 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
574 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
575 ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
576 ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
577 ; SIVI: buffer_store_dword [[RET]]
579 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
580 define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
582 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
583 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
584 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
585 store i32 %val, i32 addrspace(1)* %out2
589 ; GCN-LABEL: {{^}}atomic_min_i32:
590 ; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
592 ; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
593 define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
595 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
599 ; GCN-LABEL: {{^}}atomic_min_i32_ret:
600 ; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
601 ; SIVI: buffer_store_dword [[RET]]
603 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
604 define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
606 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
607 store i32 %val, i32 addrspace(1)* %out2
611 ; GCN-LABEL: {{^}}atomic_min_i32_addr64:
612 ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
613 ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
614 ; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
615 define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
617 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
618 %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
622 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
623 ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
624 ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
625 ; SIVI: buffer_store_dword [[RET]]
627 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
628 define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
630 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
631 %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
632 store i32 %val, i32 addrspace(1)* %out2
636 ; GCN-LABEL: {{^}}atomic_umin_i32_offset:
637 ; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
639 ; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
640 define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
642 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
643 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
647 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
648 ; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
649 ; SIVI: buffer_store_dword [[RET]]
651 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
652 define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
654 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
655 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
656 store i32 %val, i32 addrspace(1)* %out2
660 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
661 ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
662 ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
663 ; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
664 define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
666 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
667 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
668 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
672 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
673 ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
674 ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
675 ; SIVI: buffer_store_dword [[RET]]
677 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
678 define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
680 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
681 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
682 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
683 store i32 %val, i32 addrspace(1)* %out2
687 ; GCN-LABEL: {{^}}atomic_umin_i32:
688 ; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
689 ; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
690 define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
692 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
696 ; GCN-LABEL: {{^}}atomic_umin_i32_ret:
697 ; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
698 ; SIVI: buffer_store_dword [[RET]]
700 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
701 define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
703 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
704 store i32 %val, i32 addrspace(1)* %out2
708 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
709 ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
710 ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
711 ; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
712 define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
714 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
715 %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
719 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
720 ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
721 ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
722 ; SIVI: buffer_store_dword [[RET]]
724 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
725 define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
727 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
728 %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
729 store i32 %val, i32 addrspace(1)* %out2
733 ; GCN-LABEL: {{^}}atomic_or_i32_offset:
734 ; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
736 ; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
737 define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
739 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
740 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
744 ; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
745 ; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
746 ; SIVI: buffer_store_dword [[RET]]
748 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
749 define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
751 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
752 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
753 store i32 %val, i32 addrspace(1)* %out2
757 ; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
758 ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
759 ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
760 ; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16
761 define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
763 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
764 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
765 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
769 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
770 ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
771 ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
772 ; SIVI: buffer_store_dword [[RET]]
774 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
775 define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
777 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
778 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
779 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
780 store i32 %val, i32 addrspace(1)* %out2
784 ; GCN-LABEL: {{^}}atomic_or_i32:
785 ; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
787 ; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
788 define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
790 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
794 ; GCN-LABEL: {{^}}atomic_or_i32_ret:
795 ; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
796 ; SIVI: buffer_store_dword [[RET]]
798 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
799 define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
801 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
802 store i32 %val, i32 addrspace(1)* %out2
806 ; GCN-LABEL: {{^}}atomic_or_i32_addr64:
807 ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
808 ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
809 ; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
810 define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
812 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
813 %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
817 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
818 ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
819 ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
820 ; SIVI: buffer_store_dword [[RET]]
822 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
823 define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
825 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
826 %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
827 store i32 %val, i32 addrspace(1)* %out2
831 ; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
832 ; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
834 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
835 define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
837 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
838 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
842 ; GCN-LABEL: {{^}}atomic_xchg_f32_offset:
843 ; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
845 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
846 define amdgpu_kernel void @atomic_xchg_f32_offset(float addrspace(1)* %out, float %in) {
848 %gep = getelementptr float, float addrspace(1)* %out, i64 4
849 %val = atomicrmw volatile xchg float addrspace(1)* %gep, float %in seq_cst
853 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
854 ; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
855 ; SIVI: buffer_store_dword [[RET]]
857 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
858 define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
860 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
861 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
862 store i32 %val, i32 addrspace(1)* %out2
866 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
867 ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
868 ; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
869 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
870 define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
872 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
873 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
874 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
878 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
879 ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
880 ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
881 ; SIVI: buffer_store_dword [[RET]]
883 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
884 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
886 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
887 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
888 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
889 store i32 %val, i32 addrspace(1)* %out2
893 ; GCN-LABEL: {{^}}atomic_xchg_i32:
894 ; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
895 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
896 define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
898 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
902 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret:
903 ; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
904 ; SIVI: buffer_store_dword [[RET]]
906 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
907 define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
909 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
910 store i32 %val, i32 addrspace(1)* %out2
914 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
915 ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
916 ; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
917 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
918 define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
920 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
921 %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
925 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
926 ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
927 ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
928 ; SIVI: buffer_store_dword [[RET]]
930 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
931 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
933 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
934 %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
935 store i32 %val, i32 addrspace(1)* %out2
939 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
940 ; SIVI: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
942 ; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:16{{$}}
943 define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
945 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
946 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
950 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
951 ; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
952 ; SIVI: buffer_store_dword v[[RET]]
954 ; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:16 glc{{$}}
955 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
957 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
958 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
959 %extract0 = extractvalue { i32, i1 } %val, 0
960 store i32 %extract0, i32 addrspace(1)* %out2
964 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
965 ; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
967 ; VI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
968 ; GFX9: global_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16{{$}}
969 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
971 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
972 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
973 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
977 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
978 ; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
979 ; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
980 ; SIVI: buffer_store_dword v[[RET]]
982 ; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}
983 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
985 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
986 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
987 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
988 %extract0 = extractvalue { i32, i1 } %val, 0
989 store i32 %extract0, i32 addrspace(1)* %out2
993 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32:
994 ; SIVI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
996 ; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
997 define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
999 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
1003 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
1004 ; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1005 ; SIVI: buffer_store_dword v[[RET]]
1007 ; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off glc{{$}}
1008 define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
1010 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
1011 %extract0 = extractvalue { i32, i1 } %val, 0
1012 store i32 %extract0, i32 addrspace(1)* %out2
1016 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
1017 ; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1018 ; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
1019 ; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
1020 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
1022 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1023 %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
1027 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
1028 ; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1029 ; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1030 ; SIVI: buffer_store_dword v[[RET]]
1032 ; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off glc{{$}}
1033 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
1035 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1036 %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
1037 %extract0 = extractvalue { i32, i1 } %val, 0
1038 store i32 %extract0, i32 addrspace(1)* %out2
1042 ; GCN-LABEL: {{^}}atomic_xor_i32_offset:
1043 ; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1045 ; GFX9: global_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
1046 define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
1048 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1049 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1053 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
1054 ; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1055 ; SIVI: buffer_store_dword [[RET]]
1057 ; GFX9: global_atomic_xor v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
1058 define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
1060 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1061 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1062 store i32 %val, i32 addrspace(1)* %out2
1066 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
1067 ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1068 ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1069 ; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
1070 define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
1072 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1073 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1074 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1078 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
1079 ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1080 ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
1081 ; SIVI: buffer_store_dword [[RET]]
1083 ; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
1084 define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
1086 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1087 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1088 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1089 store i32 %val, i32 addrspace(1)* %out2
1093 ; GCN-LABEL: {{^}}atomic_xor_i32:
1094 ; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1095 ; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
1096 define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
1098 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
1102 ; GCN-LABEL: {{^}}atomic_xor_i32_ret:
1103 ; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1104 ; SIVI: buffer_store_dword [[RET]]
1106 ; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
1107 define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
1109 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
1110 store i32 %val, i32 addrspace(1)* %out2
1114 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
1115 ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1116 ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1117 ; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
1118 define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
1120 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1121 %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
1125 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
1126 ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1127 ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
1128 ; SIVI: buffer_store_dword [[RET]]
1130 ; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
1131 define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
1133 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1134 %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
1135 store i32 %val, i32 addrspace(1)* %out2
1139 ; GCN-LABEL: {{^}}atomic_load_i32_offset:
1140 ; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1141 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1142 ; SIVI: buffer_store_dword [[RET]]
1144 ; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16 glc{{$}}
1145 define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1147 %gep = getelementptr i32, i32 addrspace(1)* %in, i64 4
1148 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
1149 store i32 %val, i32 addrspace(1)* %out
1153 ; GCN-LABEL: {{^}}atomic_load_i32:
1154 ; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1155 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
1156 ; SIVI: buffer_store_dword [[RET]]
1158 ; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], off glc
1159 define amdgpu_kernel void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1161 %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4
1162 store i32 %val, i32 addrspace(1)* %out
1166 ; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
1167 ; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1168 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1169 ; SIVI: buffer_store_dword [[RET]]
1171 ; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}
1172 define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
1174 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
1175 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1176 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
1177 store i32 %val, i32 addrspace(1)* %out
1181 ; GCN-LABEL: {{^}}atomic_load_i32_addr64:
1182 ; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1183 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1184 ; SIVI: buffer_store_dword [[RET]]
1186 ; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off glc{{$}}
1187 define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
1189 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
1190 %val = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4
1191 store i32 %val, i32 addrspace(1)* %out
1195 ; GCN-LABEL: {{^}}atomic_store_i32_offset:
1196 ; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1197 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1198 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}}
1199 define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
1201 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1202 store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
1206 ; GCN-LABEL: {{^}}atomic_store_i32:
1207 ; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1208 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1209 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}
1210 define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
1212 store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4
1216 ; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
1217 ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1218 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1219 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}}
1220 define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
1222 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1223 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1224 store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
1228 ; GCN-LABEL: {{^}}atomic_store_i32_addr64:
1229 ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1230 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1231 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}
1232 define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
1234 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1235 store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4