1 ; RUN: llc -march=amdgcn -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-atomic-optimizations=false -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
5 ; GCN-LABEL: {{^}}atomic_add_i32_offset:
6 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
7 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
8 define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
10 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
11 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
15 ; GCN-LABEL: {{^}}atomic_add_i32_max_neg_offset:
16 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:-4096{{$}}
17 define amdgpu_kernel void @atomic_add_i32_max_neg_offset(i32 addrspace(1)* %out, i32 %in) {
19 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 -1024
20 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
24 ; GCN-LABEL: {{^}}atomic_add_i32_soffset:
25 ; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0
26 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
28 ; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0x8000,
29 ; GFX9-NEXT: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
30 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:3232{{$}}
31 define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) {
33 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000
34 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
38 ; GCN-LABEL: {{^}}atomic_add_i32_huge_offset:
39 ; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac
40 ; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd
41 ; SI: buffer_atomic_add v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
45 ; GFX9: v_mov_b32_e32 [[HIGH_K:v[0-9]+]], 0xabcd
46 ; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xd000,
47 ; GFX9-NEXT: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, [[HIGH_K]], v{{[0-9]+}}, vcc
48 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:3756{{$}}
49 define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
51 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595
53 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
57 ; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
58 ; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
59 ; SIVI: buffer_store_dword [[RET]]
61 ; GFX9: global_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
62 define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
64 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
65 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
66 store i32 %val, i32 addrspace(1)* %out2
70 ; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
71 ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
72 ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
73 ; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
74 define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
76 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
77 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
78 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
82 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
83 ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
84 ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
85 ; SIVI: buffer_store_dword [[RET]]
87 ; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
88 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
89 define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
91 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
92 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
93 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
94 store i32 %val, i32 addrspace(1)* %out2
98 ; GCN-LABEL: {{^}}atomic_add_i32:
99 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
100 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
101 define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
103 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
107 ; GCN-LABEL: {{^}}atomic_add_i32_ret:
108 ; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
109 ; SIVI: buffer_store_dword [[RET]]
111 ; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
112 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
113 define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
115 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
116 store i32 %val, i32 addrspace(1)* %out2
120 ; GCN-LABEL: {{^}}atomic_add_i32_addr64:
121 ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
122 ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
123 ; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
124 define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
126 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
127 %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
131 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
132 ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
133 ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
134 ; SIVI: buffer_store_dword [[RET]]
136 ; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
137 define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
139 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
140 %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
141 store i32 %val, i32 addrspace(1)* %out2
145 ; GCN-LABEL: {{^}}atomic_and_i32_offset:
146 ; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
148 ; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
149 define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
151 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
152 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
156 ; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
157 ; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
158 ; SIVI: buffer_store_dword [[RET]]
160 ; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
161 define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
163 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
164 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
165 store i32 %val, i32 addrspace(1)* %out2
169 ; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
170 ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
171 ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
173 ; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
174 define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
176 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
177 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
178 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
182 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
183 ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
184 ; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
185 ; SIVI: buffer_store_dword [[RET]]
187 ; GFX9: global_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
188 define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
190 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
191 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
192 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
193 store i32 %val, i32 addrspace(1)* %out2
197 ; GCN-LABEL: {{^}}atomic_and_i32:
198 ; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
200 ; GFX9: global_atomic_and v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
201 define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
203 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
207 ; GCN-LABEL: {{^}}atomic_and_i32_ret:
208 ; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
209 ; SIVI: buffer_store_dword [[RET]]
211 ; GFX9: global_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
212 define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
214 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
215 store i32 %val, i32 addrspace(1)* %out2
219 ; GCN-LABEL: {{^}}atomic_and_i32_addr64:
220 ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
221 ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
223 ; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
224 define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
226 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
227 %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
231 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
232 ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
233 ; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
234 ; SIVI: buffer_store_dword [[RET]]
236 ; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
237 define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
239 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
240 %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
241 store i32 %val, i32 addrspace(1)* %out2
245 ; GCN-LABEL: {{^}}atomic_sub_i32_offset:
246 ; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
248 ; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
249 define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
251 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
252 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
256 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
257 ; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
258 ; SIVI: buffer_store_dword [[RET]]
260 ; GFX9: global_atomic_sub v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
261 define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
263 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
264 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
265 store i32 %val, i32 addrspace(1)* %out2
269 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
270 ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
271 ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
273 ; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
274 define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
276 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
277 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
278 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
282 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
283 ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
284 ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
285 ; SIVI: buffer_store_dword [[RET]]
287 ; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
288 define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
290 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
291 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
292 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
293 store i32 %val, i32 addrspace(1)* %out2
297 ; GCN-LABEL: {{^}}atomic_sub_i32:
298 ; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
300 ; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
301 define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
303 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
307 ; GCN-LABEL: {{^}}atomic_sub_i32_ret:
308 ; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
309 ; SIVI: buffer_store_dword [[RET]]
311 ; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
312 define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
314 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
315 store i32 %val, i32 addrspace(1)* %out2
319 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
320 ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
321 ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
323 ; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
324 define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
326 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
327 %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
331 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
332 ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
333 ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
334 ; SIVI: buffer_store_dword [[RET]]
336 ; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
337 define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
339 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
340 %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
341 store i32 %val, i32 addrspace(1)* %out2
345 ; GCN-LABEL: {{^}}atomic_max_i32_offset:
346 ; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
348 ; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
349 define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
351 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
352 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
356 ; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
357 ; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
358 ; SIVI: buffer_store_dword [[RET]]
360 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
361 define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
363 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
364 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
365 store i32 %val, i32 addrspace(1)* %out2
369 ; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
370 ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
371 ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
373 ; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
374 define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
376 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
377 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
378 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
382 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
383 ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
384 ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
385 ; SIVI: buffer_store_dword [[RET]]
387 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
388 define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
390 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
391 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
392 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
393 store i32 %val, i32 addrspace(1)* %out2
397 ; GCN-LABEL: {{^}}atomic_max_i32:
398 ; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
400 ; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
401 define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
403 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
407 ; GCN-LABEL: {{^}}atomic_max_i32_ret:
408 ; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
409 ; SIVI: buffer_store_dword [[RET]]
411 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
412 define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
414 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
415 store i32 %val, i32 addrspace(1)* %out2
419 ; GCN-LABEL: {{^}}atomic_max_i32_addr64:
420 ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
421 ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
423 ; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
424 define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
426 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
427 %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
431 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
432 ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
433 ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
434 ; SIVI: buffer_store_dword [[RET]]
436 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
437 define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
439 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
440 %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
441 store i32 %val, i32 addrspace(1)* %out2
445 ; GCN-LABEL: {{^}}atomic_umax_i32_offset:
446 ; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
448 ; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
449 define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
451 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
452 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
456 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
457 ; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
458 ; SIVI: buffer_store_dword [[RET]]
460 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
461 define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
463 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
464 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
465 store i32 %val, i32 addrspace(1)* %out2
469 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
470 ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
471 ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
472 ; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
473 define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
475 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
476 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
477 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
481 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
482 ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
483 ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
484 ; SIVI: buffer_store_dword [[RET]]
486 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
487 define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
489 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
490 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
491 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
492 store i32 %val, i32 addrspace(1)* %out2
496 ; GCN-LABEL: {{^}}atomic_umax_i32:
497 ; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
499 ; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
500 define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
502 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
506 ; GCN-LABEL: {{^}}atomic_umax_i32_ret:
507 ; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
508 ; SIVI: buffer_store_dword [[RET]]
510 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
511 define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
513 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
514 store i32 %val, i32 addrspace(1)* %out2
518 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
519 ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
520 ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
521 ; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
522 define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
524 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
525 %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
529 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
530 ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
531 ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
532 ; SIVI: buffer_store_dword [[RET]]
534 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
535 define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
537 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
538 %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
539 store i32 %val, i32 addrspace(1)* %out2
543 ; GCN-LABEL: {{^}}atomic_min_i32_offset:
544 ; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
546 ; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
547 define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
549 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
550 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
554 ; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
555 ; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
556 ; SIVI: buffer_store_dword [[RET]]
558 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
559 define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
561 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
562 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
563 store i32 %val, i32 addrspace(1)* %out2
567 ; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
568 ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
569 ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
570 ; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16
571 define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
573 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
574 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
575 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
579 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
580 ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
581 ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
582 ; SIVI: buffer_store_dword [[RET]]
584 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
585 define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
587 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
588 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
589 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
590 store i32 %val, i32 addrspace(1)* %out2
594 ; GCN-LABEL: {{^}}atomic_min_i32:
595 ; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
597 ; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
598 define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
600 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
604 ; GCN-LABEL: {{^}}atomic_min_i32_ret:
605 ; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
606 ; SIVI: buffer_store_dword [[RET]]
608 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
609 define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
611 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
612 store i32 %val, i32 addrspace(1)* %out2
616 ; GCN-LABEL: {{^}}atomic_min_i32_addr64:
617 ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
618 ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
619 ; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
620 define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
622 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
623 %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
627 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
628 ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
629 ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
630 ; SIVI: buffer_store_dword [[RET]]
632 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
633 define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
635 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
636 %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
637 store i32 %val, i32 addrspace(1)* %out2
641 ; GCN-LABEL: {{^}}atomic_umin_i32_offset:
642 ; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
644 ; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
645 define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
647 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
648 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
652 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
653 ; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
654 ; SIVI: buffer_store_dword [[RET]]
656 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
657 define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
659 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
660 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
661 store i32 %val, i32 addrspace(1)* %out2
665 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
666 ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
667 ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
668 ; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
669 define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
671 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
672 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
673 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
677 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
678 ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
679 ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
680 ; SIVI: buffer_store_dword [[RET]]
682 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
683 define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
685 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
686 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
687 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
688 store i32 %val, i32 addrspace(1)* %out2
692 ; GCN-LABEL: {{^}}atomic_umin_i32:
693 ; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
694 ; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
695 define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
697 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
701 ; GCN-LABEL: {{^}}atomic_umin_i32_ret:
702 ; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
703 ; SIVI: buffer_store_dword [[RET]]
705 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
706 define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
708 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
709 store i32 %val, i32 addrspace(1)* %out2
713 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
714 ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
715 ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
716 ; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
717 define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
719 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
720 %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
724 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
725 ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
726 ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
727 ; SIVI: buffer_store_dword [[RET]]
729 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
730 define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
732 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
733 %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
734 store i32 %val, i32 addrspace(1)* %out2
738 ; GCN-LABEL: {{^}}atomic_or_i32_offset:
739 ; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
741 ; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
742 define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
744 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
745 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
749 ; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
750 ; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
751 ; SIVI: buffer_store_dword [[RET]]
753 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
754 define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
756 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
757 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
758 store i32 %val, i32 addrspace(1)* %out2
762 ; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
763 ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
764 ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
765 ; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16
766 define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
768 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
769 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
770 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
774 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
775 ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
776 ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
777 ; SIVI: buffer_store_dword [[RET]]
779 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
780 define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
782 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
783 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
784 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
785 store i32 %val, i32 addrspace(1)* %out2
789 ; GCN-LABEL: {{^}}atomic_or_i32:
790 ; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
792 ; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
793 define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
795 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
799 ; GCN-LABEL: {{^}}atomic_or_i32_ret:
800 ; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
801 ; SIVI: buffer_store_dword [[RET]]
803 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
804 define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
806 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
807 store i32 %val, i32 addrspace(1)* %out2
811 ; GCN-LABEL: {{^}}atomic_or_i32_addr64:
812 ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
813 ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
814 ; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
815 define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
817 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
818 %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
822 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
823 ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
824 ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
825 ; SIVI: buffer_store_dword [[RET]]
827 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
828 define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
830 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
831 %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
832 store i32 %val, i32 addrspace(1)* %out2
836 ; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
837 ; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
839 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
840 define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
842 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
843 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
847 ; GCN-LABEL: {{^}}atomic_xchg_f32_offset:
848 ; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
850 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
851 define amdgpu_kernel void @atomic_xchg_f32_offset(float addrspace(1)* %out, float %in) {
853 %gep = getelementptr float, float addrspace(1)* %out, i64 4
854 %val = atomicrmw volatile xchg float addrspace(1)* %gep, float %in seq_cst
858 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
859 ; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
860 ; SIVI: buffer_store_dword [[RET]]
862 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
863 define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
865 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
866 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
867 store i32 %val, i32 addrspace(1)* %out2
871 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
872 ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
873 ; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
874 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
875 define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
877 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
878 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
879 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
883 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
884 ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
885 ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
886 ; SIVI: buffer_store_dword [[RET]]
888 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
889 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
891 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
892 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
893 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
894 store i32 %val, i32 addrspace(1)* %out2
898 ; GCN-LABEL: {{^}}atomic_xchg_i32:
899 ; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
900 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
901 define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
903 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
907 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret:
908 ; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
909 ; SIVI: buffer_store_dword [[RET]]
911 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
912 define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
914 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
915 store i32 %val, i32 addrspace(1)* %out2
919 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
920 ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
921 ; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
922 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
923 define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
925 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
926 %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
930 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
931 ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
932 ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
933 ; SIVI: buffer_store_dword [[RET]]
935 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
936 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
938 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
939 %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
940 store i32 %val, i32 addrspace(1)* %out2
944 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
945 ; SIVI: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
947 ; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:16{{$}}
948 define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
950 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
951 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
955 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
956 ; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
957 ; SIVI: buffer_store_dword v[[RET]]
959 ; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:16 glc{{$}}
960 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
962 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
963 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
964 %extract0 = extractvalue { i32, i1 } %val, 0
965 store i32 %extract0, i32 addrspace(1)* %out2
969 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
970 ; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
972 ; VI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
973 ; GFX9: global_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16{{$}}
974 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
976 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
977 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
978 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
982 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
983 ; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
984 ; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
985 ; SIVI: buffer_store_dword v[[RET]]
987 ; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}
988 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
990 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
991 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
992 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
993 %extract0 = extractvalue { i32, i1 } %val, 0
994 store i32 %extract0, i32 addrspace(1)* %out2
998 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32:
999 ; SIVI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1001 ; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
1002 define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
1004 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
1008 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
1009 ; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1010 ; SIVI: buffer_store_dword v[[RET]]
1012 ; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off glc{{$}}
1013 define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
1015 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
1016 %extract0 = extractvalue { i32, i1 } %val, 0
1017 store i32 %extract0, i32 addrspace(1)* %out2
1021 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
1022 ; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1023 ; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
1024 ; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
1025 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
1027 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1028 %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
1032 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
1033 ; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1034 ; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1035 ; SIVI: buffer_store_dword v[[RET]]
1037 ; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off glc{{$}}
1038 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
1040 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1041 %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
1042 %extract0 = extractvalue { i32, i1 } %val, 0
1043 store i32 %extract0, i32 addrspace(1)* %out2
1047 ; GCN-LABEL: {{^}}atomic_xor_i32_offset:
1048 ; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1050 ; GFX9: global_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
1051 define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
1053 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1054 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1058 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
1059 ; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1060 ; SIVI: buffer_store_dword [[RET]]
1062 ; GFX9: global_atomic_xor v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
1063 define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
1065 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1066 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1067 store i32 %val, i32 addrspace(1)* %out2
1071 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
1072 ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1073 ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1074 ; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
1075 define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
1077 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1078 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1079 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1083 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
1084 ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1085 ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
1086 ; SIVI: buffer_store_dword [[RET]]
1088 ; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
1089 define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
1091 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1092 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1093 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1094 store i32 %val, i32 addrspace(1)* %out2
1098 ; GCN-LABEL: {{^}}atomic_xor_i32:
1099 ; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1100 ; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
1101 define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
1103 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
1107 ; GCN-LABEL: {{^}}atomic_xor_i32_ret:
1108 ; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1109 ; SIVI: buffer_store_dword [[RET]]
1111 ; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
1112 define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
1114 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
1115 store i32 %val, i32 addrspace(1)* %out2
1119 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
1120 ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1121 ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1122 ; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
1123 define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
1125 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1126 %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
1130 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
1131 ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1132 ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
1133 ; SIVI: buffer_store_dword [[RET]]
1135 ; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
1136 define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
1138 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1139 %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
1140 store i32 %val, i32 addrspace(1)* %out2
1144 ; GCN-LABEL: {{^}}atomic_load_i32_offset:
1145 ; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1146 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1147 ; SIVI: buffer_store_dword [[RET]]
1149 ; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16 glc{{$}}
1150 define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1152 %gep = getelementptr i32, i32 addrspace(1)* %in, i64 4
1153 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
1154 store i32 %val, i32 addrspace(1)* %out
1158 ; GCN-LABEL: {{^}}atomic_load_i32:
1159 ; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1160 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
1161 ; SIVI: buffer_store_dword [[RET]]
1163 ; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], off glc
1164 define amdgpu_kernel void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1166 %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4
1167 store i32 %val, i32 addrspace(1)* %out
1171 ; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
1172 ; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1173 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1174 ; SIVI: buffer_store_dword [[RET]]
1176 ; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}
1177 define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
1179 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
1180 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1181 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
1182 store i32 %val, i32 addrspace(1)* %out
1186 ; GCN-LABEL: {{^}}atomic_load_i32_addr64:
1187 ; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1188 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1189 ; SIVI: buffer_store_dword [[RET]]
1191 ; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off glc{{$}}
1192 define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
1194 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
1195 %val = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4
1196 store i32 %val, i32 addrspace(1)* %out
1200 ; GCN-LABEL: {{^}}atomic_store_i32_offset:
1201 ; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1202 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1203 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}}
1204 define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
1206 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1207 store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
1211 ; GCN-LABEL: {{^}}atomic_store_i32:
1212 ; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1213 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1214 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}
1215 define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
1217 store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4
1221 ; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
1222 ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1223 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1224 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}}
1225 define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
1227 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1228 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1229 store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
1233 ; GCN-LABEL: {{^}}atomic_store_i32_addr64:
1234 ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1235 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1236 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}
1237 define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
1239 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1240 store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4