1 ; RUN: llc -march=amdgcn -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-atomic-optimizations=false -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
5 ; GCN-LABEL: {{^}}atomic_add_i32_offset:
6 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
7 ; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
8 define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
10 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
11 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
15 ; GCN-LABEL: {{^}}atomic_add_i32_max_neg_offset:
16 ; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-4096{{$}}
17 define amdgpu_kernel void @atomic_add_i32_max_neg_offset(i32 addrspace(1)* %out, i32 %in) {
19 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 -1024
20 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
24 ; GCN-LABEL: {{^}}atomic_add_i32_soffset:
25 ; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0
26 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
28 ; GFX9: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8000{{$}}
29 ; GFX9: global_atomic_add [[OFFSET]], v{{[0-9]+}}, s{{\[[0-9]:[0-9]+\]}} offset:3232{{$}}
30 define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) {
32 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000
33 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
37 ; GCN-LABEL: {{^}}atomic_add_i32_huge_offset:
38 ; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac
39 ; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd
40 ; SI: buffer_atomic_add v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
44 ; GFX9: s_add_u32 s[[LOW_K:[0-9]+]], s{{[0-9]+}}, 0xdeac
45 ; GFX9: s_addc_u32 s[[HIGH_K:[0-9]+]], s{{[0-9]+}}, 0xabcd
46 ; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[LOW_K]]:[[HIGH_K]]]{{$}}
47 define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
49 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595
51 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
55 ; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
56 ; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
57 ; SIVI: buffer_store_dword [[RET]]
59 ; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
60 define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
62 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
63 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
64 store i32 %val, i32 addrspace(1)* %out2
68 ; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
69 ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
70 ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
71 ; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
72 define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
74 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
75 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
76 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
80 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
81 ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
82 ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
83 ; SIVI: buffer_store_dword [[RET]]
85 ; GFX9: global_atomic_add [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
86 ; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s
87 define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
89 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
90 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
91 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
92 store i32 %val, i32 addrspace(1)* %out2
96 ; GCN-LABEL: {{^}}atomic_add_i32:
97 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
98 ; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
99 define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
101 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
105 ; GCN-LABEL: {{^}}atomic_add_i32_ret:
106 ; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
107 ; SIVI: buffer_store_dword [[RET]]
109 ; GFX9: global_atomic_add [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
110 ; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s
111 define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
113 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
114 store i32 %val, i32 addrspace(1)* %out2
118 ; GCN-LABEL: {{^}}atomic_add_i32_addr64:
119 ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
120 ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
121 ; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
122 define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
124 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
125 %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
129 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
130 ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
131 ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
132 ; SIVI: buffer_store_dword [[RET]]
134 ; GFX9: global_atomic_add [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
135 define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
137 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
138 %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
139 store i32 %val, i32 addrspace(1)* %out2
143 ; GCN-LABEL: {{^}}atomic_and_i32_offset:
144 ; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
146 ; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
147 define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
149 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
150 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
154 ; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
155 ; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
156 ; SIVI: buffer_store_dword [[RET]]
158 ; GFX9: global_atomic_and [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
159 define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
161 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
162 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
163 store i32 %val, i32 addrspace(1)* %out2
167 ; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
168 ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
169 ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
171 ; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
172 define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
174 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
175 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
176 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
180 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
181 ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
182 ; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
183 ; SIVI: buffer_store_dword [[RET]]
185 ; GFX9: global_atomic_and [[RET:v[0-9]]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
186 define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
188 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
189 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
190 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
191 store i32 %val, i32 addrspace(1)* %out2
195 ; GCN-LABEL: {{^}}atomic_and_i32:
196 ; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
198 ; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
199 define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
201 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
205 ; GCN-LABEL: {{^}}atomic_and_i32_ret:
206 ; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
207 ; SIVI: buffer_store_dword [[RET]]
209 ; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
210 define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
212 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
213 store i32 %val, i32 addrspace(1)* %out2
217 ; GCN-LABEL: {{^}}atomic_and_i32_addr64:
218 ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
219 ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
221 ; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
222 define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
224 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
225 %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
229 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
230 ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
231 ; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
232 ; SIVI: buffer_store_dword [[RET]]
234 ; GFX9: global_atomic_and [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
235 define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
237 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
238 %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
239 store i32 %val, i32 addrspace(1)* %out2
243 ; GCN-LABEL: {{^}}atomic_sub_i32_offset:
244 ; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
246 ; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
247 define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
249 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
250 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
254 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
255 ; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
256 ; SIVI: buffer_store_dword [[RET]]
258 ; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
259 define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
261 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
262 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
263 store i32 %val, i32 addrspace(1)* %out2
267 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
268 ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
269 ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
271 ; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
272 define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
274 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
275 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
276 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
280 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
281 ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
282 ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
283 ; SIVI: buffer_store_dword [[RET]]
285 ; GFX9: global_atomic_sub [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
286 define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
288 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
289 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
290 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
291 store i32 %val, i32 addrspace(1)* %out2
295 ; GCN-LABEL: {{^}}atomic_sub_i32:
296 ; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
298 ; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
299 define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
301 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
305 ; GCN-LABEL: {{^}}atomic_sub_i32_ret:
306 ; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
307 ; SIVI: buffer_store_dword [[RET]]
309 ; GFX9: global_atomic_sub [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
310 define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
312 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
313 store i32 %val, i32 addrspace(1)* %out2
317 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
318 ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
319 ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
321 ; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
322 define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
324 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
325 %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
329 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
330 ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
331 ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
332 ; SIVI: buffer_store_dword [[RET]]
334 ; GFX9: global_atomic_sub [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
335 define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
337 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
338 %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
339 store i32 %val, i32 addrspace(1)* %out2
343 ; GCN-LABEL: {{^}}atomic_max_i32_offset:
344 ; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
346 ; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
347 define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
349 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
350 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
354 ; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
355 ; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
356 ; SIVI: buffer_store_dword [[RET]]
358 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
359 define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
361 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
362 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
363 store i32 %val, i32 addrspace(1)* %out2
367 ; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
368 ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
369 ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
371 ; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
372 define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
374 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
375 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
376 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
380 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
381 ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
382 ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
383 ; SIVI: buffer_store_dword [[RET]]
385 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
386 define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
388 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
389 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
390 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
391 store i32 %val, i32 addrspace(1)* %out2
395 ; GCN-LABEL: {{^}}atomic_max_i32:
396 ; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
398 ; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
399 define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
401 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
405 ; GCN-LABEL: {{^}}atomic_max_i32_ret:
406 ; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
407 ; SIVI: buffer_store_dword [[RET]]
409 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
410 define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
412 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
413 store i32 %val, i32 addrspace(1)* %out2
417 ; GCN-LABEL: {{^}}atomic_max_i32_addr64:
418 ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
419 ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
421 ; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
422 define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
424 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
425 %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
429 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
430 ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
431 ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
432 ; SIVI: buffer_store_dword [[RET]]
434 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
435 define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
437 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
438 %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
439 store i32 %val, i32 addrspace(1)* %out2
443 ; GCN-LABEL: {{^}}atomic_umax_i32_offset:
444 ; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
446 ; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
447 define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
449 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
450 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
454 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
455 ; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
456 ; SIVI: buffer_store_dword [[RET]]
458 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
459 define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
461 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
462 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
463 store i32 %val, i32 addrspace(1)* %out2
467 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
468 ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
469 ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
470 ; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
471 define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
473 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
474 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
475 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
479 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
480 ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
481 ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
482 ; SIVI: buffer_store_dword [[RET]]
484 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
485 define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
487 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
488 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
489 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
490 store i32 %val, i32 addrspace(1)* %out2
494 ; GCN-LABEL: {{^}}atomic_umax_i32:
495 ; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
497 ; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
498 define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
500 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
504 ; GCN-LABEL: {{^}}atomic_umax_i32_ret:
505 ; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
506 ; SIVI: buffer_store_dword [[RET]]
508 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
509 define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
511 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
512 store i32 %val, i32 addrspace(1)* %out2
516 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
517 ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
518 ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
519 ; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
520 define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
522 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
523 %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
527 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
528 ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
529 ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
530 ; SIVI: buffer_store_dword [[RET]]
532 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
533 define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
535 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
536 %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
537 store i32 %val, i32 addrspace(1)* %out2
541 ; GCN-LABEL: {{^}}atomic_min_i32_offset:
542 ; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
544 ; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
545 define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
547 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
548 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
552 ; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
553 ; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
554 ; SIVI: buffer_store_dword [[RET]]
556 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
557 define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
559 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
560 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
561 store i32 %val, i32 addrspace(1)* %out2
565 ; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
566 ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
567 ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
568 ; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16
569 define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
571 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
572 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
573 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
577 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
578 ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
579 ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
580 ; SIVI: buffer_store_dword [[RET]]
582 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
583 define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
585 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
586 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
587 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
588 store i32 %val, i32 addrspace(1)* %out2
592 ; GCN-LABEL: {{^}}atomic_min_i32:
593 ; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
595 ; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
596 define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
598 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
602 ; GCN-LABEL: {{^}}atomic_min_i32_ret:
603 ; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
604 ; SIVI: buffer_store_dword [[RET]]
606 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
607 define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
609 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
610 store i32 %val, i32 addrspace(1)* %out2
614 ; GCN-LABEL: {{^}}atomic_min_i32_addr64:
615 ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
616 ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
617 ; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
618 define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
620 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
621 %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
625 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
626 ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
627 ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
628 ; SIVI: buffer_store_dword [[RET]]
630 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
631 define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
633 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
634 %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
635 store i32 %val, i32 addrspace(1)* %out2
639 ; GCN-LABEL: {{^}}atomic_umin_i32_offset:
640 ; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
642 ; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
643 define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
645 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
646 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
650 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
651 ; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
652 ; SIVI: buffer_store_dword [[RET]]
654 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
655 define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
657 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
658 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
659 store i32 %val, i32 addrspace(1)* %out2
663 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
664 ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
665 ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
666 ; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
667 define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
669 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
670 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
671 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
675 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
676 ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
677 ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
678 ; SIVI: buffer_store_dword [[RET]]
680 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
681 define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
683 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
684 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
685 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
686 store i32 %val, i32 addrspace(1)* %out2
690 ; GCN-LABEL: {{^}}atomic_umin_i32:
691 ; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
692 ; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
693 define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
695 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
699 ; GCN-LABEL: {{^}}atomic_umin_i32_ret:
700 ; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
701 ; SIVI: buffer_store_dword [[RET]]
703 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
704 define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
706 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
707 store i32 %val, i32 addrspace(1)* %out2
711 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
712 ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
713 ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
714 ; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
715 define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
717 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
718 %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
722 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
723 ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
724 ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
725 ; SIVI: buffer_store_dword [[RET]]
727 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
728 define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
730 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
731 %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
732 store i32 %val, i32 addrspace(1)* %out2
736 ; GCN-LABEL: {{^}}atomic_or_i32_offset:
737 ; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
739 ; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
740 define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
742 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
743 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
747 ; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
748 ; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
749 ; SIVI: buffer_store_dword [[RET]]
751 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
752 define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
754 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
755 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
756 store i32 %val, i32 addrspace(1)* %out2
760 ; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
761 ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
762 ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
763 ; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16
764 define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
766 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
767 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
768 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
772 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
773 ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
774 ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
775 ; SIVI: buffer_store_dword [[RET]]
777 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
778 define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
780 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
781 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
782 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
783 store i32 %val, i32 addrspace(1)* %out2
787 ; GCN-LABEL: {{^}}atomic_or_i32:
788 ; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
790 ; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
791 define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
793 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
797 ; GCN-LABEL: {{^}}atomic_or_i32_ret:
798 ; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
799 ; SIVI: buffer_store_dword [[RET]]
801 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
802 define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
804 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
805 store i32 %val, i32 addrspace(1)* %out2
809 ; GCN-LABEL: {{^}}atomic_or_i32_addr64:
810 ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
811 ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
812 ; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
813 define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
815 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
816 %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
820 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
821 ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
822 ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
823 ; SIVI: buffer_store_dword [[RET]]
825 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
826 define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
828 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
829 %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
830 store i32 %val, i32 addrspace(1)* %out2
834 ; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
835 ; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
837 ; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
838 define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
840 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
841 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
845 ; GCN-LABEL: {{^}}atomic_xchg_f32_offset:
846 ; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
848 ; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
849 define amdgpu_kernel void @atomic_xchg_f32_offset(float addrspace(1)* %out, float %in) {
851 %gep = getelementptr float, float addrspace(1)* %out, i64 4
852 %val = atomicrmw volatile xchg float addrspace(1)* %gep, float %in seq_cst
856 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
857 ; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
858 ; SIVI: buffer_store_dword [[RET]]
860 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
861 define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
863 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
864 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
865 store i32 %val, i32 addrspace(1)* %out2
869 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
870 ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
871 ; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
872 ; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
873 define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
875 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
876 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
877 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
881 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
882 ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
883 ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
884 ; SIVI: buffer_store_dword [[RET]]
886 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
887 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
889 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
890 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
891 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
892 store i32 %val, i32 addrspace(1)* %out2
896 ; GCN-LABEL: {{^}}atomic_xchg_i32:
897 ; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
898 ; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
899 define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
901 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
905 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret:
906 ; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
907 ; SIVI: buffer_store_dword [[RET]]
909 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
910 define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
912 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
913 store i32 %val, i32 addrspace(1)* %out2
917 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
918 ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
919 ; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
920 ; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
921 define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
923 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
924 %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
928 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
929 ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
930 ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
931 ; SIVI: buffer_store_dword [[RET]]
933 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
934 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
936 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
937 %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
938 store i32 %val, i32 addrspace(1)* %out2
942 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
943 ; SIVI: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
945 ; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
946 define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
948 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
949 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
953 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
954 ; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
955 ; SIVI: buffer_store_dword v[[RET]]
957 ; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
958 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
960 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
961 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
962 %extract0 = extractvalue { i32, i1 } %val, 0
963 store i32 %extract0, i32 addrspace(1)* %out2
967 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
968 ; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
970 ; VI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
971 ; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
972 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
974 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
975 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
976 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
980 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
981 ; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
982 ; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
983 ; SIVI: buffer_store_dword v[[RET]]
985 ; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
986 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
988 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
989 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
990 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
991 %extract0 = extractvalue { i32, i1 } %val, 0
992 store i32 %extract0, i32 addrspace(1)* %out2
996 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32:
997 ; SIVI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
999 ; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}}
1000 define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
1002 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
1006 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
1007 ; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1008 ; SIVI: buffer_store_dword v[[RET]]
1010 ; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1011 define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
1013 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
1014 %extract0 = extractvalue { i32, i1 } %val, 0
1015 store i32 %extract0, i32 addrspace(1)* %out2
1019 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
1020 ; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1021 ; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
1022 ; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}}
1023 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
1025 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1026 %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
1030 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
1031 ; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1032 ; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1033 ; SIVI: buffer_store_dword v[[RET]]
1035 ; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}}
1036 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
1038 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1039 %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
1040 %extract0 = extractvalue { i32, i1 } %val, 0
1041 store i32 %extract0, i32 addrspace(1)* %out2
1045 ; GCN-LABEL: {{^}}atomic_xor_i32_offset:
1046 ; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1048 ; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
1049 define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
1051 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1052 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1056 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
1057 ; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1058 ; SIVI: buffer_store_dword [[RET]]
1060 ; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
1061 define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
1063 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1064 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1065 store i32 %val, i32 addrspace(1)* %out2
1069 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
1070 ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1071 ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1072 ; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
1073 define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
1075 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1076 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1077 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1081 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
1082 ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1083 ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
1084 ; SIVI: buffer_store_dword [[RET]]
1086 ; GFX9: global_atomic_xor [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
1087 define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
1089 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1090 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1091 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1092 store i32 %val, i32 addrspace(1)* %out2
1096 ; GCN-LABEL: {{^}}atomic_xor_i32:
1097 ; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1098 ; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
1099 define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
1101 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
1105 ; GCN-LABEL: {{^}}atomic_xor_i32_ret:
1106 ; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1107 ; SIVI: buffer_store_dword [[RET]]
1109 ; GFX9: global_atomic_xor [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
1110 define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
1112 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
1113 store i32 %val, i32 addrspace(1)* %out2
1117 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
1118 ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1119 ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1120 ; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
1121 define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
1123 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1124 %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
1128 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
1129 ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1130 ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
1131 ; SIVI: buffer_store_dword [[RET]]
1133 ; GFX9: global_atomic_xor [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
1134 define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
1136 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1137 %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
1138 store i32 %val, i32 addrspace(1)* %out2
1142 ; GCN-LABEL: {{^}}atomic_load_i32_offset:
1143 ; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1144 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1145 ; SIVI: buffer_store_dword [[RET]]
1147 ; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
1148 define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1150 %gep = getelementptr i32, i32 addrspace(1)* %in, i64 4
1151 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
1152 store i32 %val, i32 addrspace(1)* %out
1156 ; GCN-LABEL: {{^}}atomic_load_i32_negoffset:
1157 ; SI: buffer_load_dword [[RET:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1159 ; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00
1160 ; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1
1161 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1163 ; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-512 glc{{$}}
1164 define amdgpu_kernel void @atomic_load_i32_negoffset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1166 %gep = getelementptr i32, i32 addrspace(1)* %in, i64 -128
1167 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
1168 store i32 %val, i32 addrspace(1)* %out
1172 ; GCN-LABEL: {{^}}atomic_load_f32_offset:
1173 ; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1174 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1175 ; SIVI: buffer_store_dword [[RET]]
1177 ; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
1178 define amdgpu_kernel void @atomic_load_f32_offset(float addrspace(1)* %in, float addrspace(1)* %out) {
1180 %gep = getelementptr float, float addrspace(1)* %in, i64 4
1181 %val = load atomic float, float addrspace(1)* %gep seq_cst, align 4
1182 store float %val, float addrspace(1)* %out
1186 ; GCN-LABEL: {{^}}atomic_load_i32:
1187 ; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1188 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
1189 ; SIVI: buffer_store_dword [[RET]]
1191 ; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc
1192 define amdgpu_kernel void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1194 %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4
1195 store i32 %val, i32 addrspace(1)* %out
1199 ; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
1200 ; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1201 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1202 ; SIVI: buffer_store_dword [[RET]]
1204 ; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
1205 define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
1207 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
1208 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1209 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
1210 store i32 %val, i32 addrspace(1)* %out
1214 ; GCN-LABEL: {{^}}atomic_load_i32_addr64:
1215 ; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1216 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1217 ; SIVI: buffer_store_dword [[RET]]
1219 ; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
1220 define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
1222 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
1223 %val = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4
1224 store i32 %val, i32 addrspace(1)* %out
1228 ; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset:
1229 ; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1230 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1231 ; SIVI: buffer_store_dword [[RET]]
1233 ; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
1234 define amdgpu_kernel void @atomic_load_f32_addr64_offset(float addrspace(1)* %in, float addrspace(1)* %out, i64 %index) {
1236 %ptr = getelementptr float, float addrspace(1)* %in, i64 %index
1237 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4
1238 %val = load atomic float, float addrspace(1)* %gep seq_cst, align 4
1239 store float %val, float addrspace(1)* %out
1243 ; GCN-LABEL: {{^}}atomic_store_i32_offset:
1244 ; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1245 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1246 ; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
1247 define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
1249 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1250 store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
1254 ; GCN-LABEL: {{^}}atomic_store_i32:
1255 ; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1256 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1257 ; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
1258 define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
1260 store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4
1264 ; GCN-LABEL: {{^}}atomic_store_f32:
1265 ; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1266 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1267 ; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1268 define amdgpu_kernel void @atomic_store_f32(float %in, float addrspace(1)* %out) {
1270 store atomic float %in, float addrspace(1)* %out seq_cst, align 4
1274 ; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
1275 ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1276 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1277 ; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
1278 define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
1280 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1281 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1282 store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
1286 ; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset:
1287 ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1288 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1289 ; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
1290 define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float addrspace(1)* %out, i64 %index) {
1292 %ptr = getelementptr float, float addrspace(1)* %out, i64 %index
1293 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4
1294 store atomic float %in, float addrspace(1)* %gep seq_cst, align 4
1298 ; GCN-LABEL: {{^}}atomic_store_i32_addr64:
1299 ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1300 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1301 ; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1302 define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
1304 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1305 store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4
1309 ; GCN-LABEL: {{^}}atomic_store_f32_addr64:
1310 ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1311 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1312 ; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1313 define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float addrspace(1)* %out, i64 %index) {
1315 %ptr = getelementptr float, float addrspace(1)* %out, i64 %index
1316 store atomic float %in, float addrspace(1)* %ptr seq_cst, align 4