1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; GCN-LABEL: {{^}}atomic_add_i32_offset:
6 ; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
7 ; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
8 define amdgpu_kernel void @atomic_add_i32_offset(i32* %out, i32 %in) {
10 %gep = getelementptr i32, i32* %out, i32 4
11 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
15 ; GCN-LABEL: {{^}}atomic_add_i32_max_offset:
16 ; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
17 ; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}}
18 define amdgpu_kernel void @atomic_add_i32_max_offset(i32* %out, i32 %in) {
20 %gep = getelementptr i32, i32* %out, i32 1023
21 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
25 ; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1:
26 ; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
27 define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32* %out, i32 %in) {
29 %gep = getelementptr i32, i32* %out, i32 1024
30 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
34 ; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
35 ; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
36 ; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
37 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
38 define amdgpu_kernel void @atomic_add_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
40 %gep = getelementptr i32, i32* %out, i32 4
41 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
42 store i32 %val, i32* %out2
46 ; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
47 ; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
48 ; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
49 define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
51 %ptr = getelementptr i32, i32* %out, i64 %index
52 %gep = getelementptr i32, i32* %ptr, i32 4
53 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
57 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
58 ; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
59 ; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
60 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
61 define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
63 %ptr = getelementptr i32, i32* %out, i64 %index
64 %gep = getelementptr i32, i32* %ptr, i32 4
65 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
66 store i32 %val, i32* %out2
70 ; GCN-LABEL: {{^}}atomic_add_i32:
71 ; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
72 define amdgpu_kernel void @atomic_add_i32(i32* %out, i32 %in) {
74 %val = atomicrmw volatile add i32* %out, i32 %in seq_cst
78 ; GCN-LABEL: {{^}}atomic_add_i32_ret:
79 ; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
80 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
81 define amdgpu_kernel void @atomic_add_i32_ret(i32* %out, i32* %out2, i32 %in) {
83 %val = atomicrmw volatile add i32* %out, i32 %in seq_cst
84 store i32 %val, i32* %out2
88 ; GCN-LABEL: {{^}}atomic_add_i32_addr64:
89 ; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
90 define amdgpu_kernel void @atomic_add_i32_addr64(i32* %out, i32 %in, i64 %index) {
92 %ptr = getelementptr i32, i32* %out, i64 %index
93 %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst
97 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
98 ; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
99 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
100 define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
102 %ptr = getelementptr i32, i32* %out, i64 %index
103 %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst
104 store i32 %val, i32* %out2
108 ; GCN-LABEL: {{^}}atomic_and_i32_offset:
109 ; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
110 ; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
111 define amdgpu_kernel void @atomic_and_i32_offset(i32* %out, i32 %in) {
113 %gep = getelementptr i32, i32* %out, i32 4
114 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
118 ; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
119 ; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
120 ; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
121 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
122 define amdgpu_kernel void @atomic_and_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
124 %gep = getelementptr i32, i32* %out, i32 4
125 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
126 store i32 %val, i32* %out2
130 ; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
131 ; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
132 ; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
133 define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
135 %ptr = getelementptr i32, i32* %out, i64 %index
136 %gep = getelementptr i32, i32* %ptr, i32 4
137 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
141 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
142 ; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
143 ; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
144 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
145 define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
147 %ptr = getelementptr i32, i32* %out, i64 %index
148 %gep = getelementptr i32, i32* %ptr, i32 4
149 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
150 store i32 %val, i32* %out2
154 ; GCN-LABEL: {{^}}atomic_and_i32:
155 ; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
156 define amdgpu_kernel void @atomic_and_i32(i32* %out, i32 %in) {
158 %val = atomicrmw volatile and i32* %out, i32 %in seq_cst
162 ; GCN-LABEL: {{^}}atomic_and_i32_ret:
163 ; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
164 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
165 define amdgpu_kernel void @atomic_and_i32_ret(i32* %out, i32* %out2, i32 %in) {
167 %val = atomicrmw volatile and i32* %out, i32 %in seq_cst
168 store i32 %val, i32* %out2
172 ; GCN-LABEL: {{^}}atomic_and_i32_addr64:
173 ; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
174 define amdgpu_kernel void @atomic_and_i32_addr64(i32* %out, i32 %in, i64 %index) {
176 %ptr = getelementptr i32, i32* %out, i64 %index
177 %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst
181 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
182 ; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
183 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
184 define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
186 %ptr = getelementptr i32, i32* %out, i64 %index
187 %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst
188 store i32 %val, i32* %out2
192 ; GCN-LABEL: {{^}}atomic_sub_i32_offset:
193 ; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
194 ; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
195 define amdgpu_kernel void @atomic_sub_i32_offset(i32* %out, i32 %in) {
197 %gep = getelementptr i32, i32* %out, i32 4
198 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
202 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
203 ; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
204 ; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
205 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
206 define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
208 %gep = getelementptr i32, i32* %out, i32 4
209 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
210 store i32 %val, i32* %out2
214 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
215 ; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
216 ; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
217 define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
219 %ptr = getelementptr i32, i32* %out, i64 %index
220 %gep = getelementptr i32, i32* %ptr, i32 4
221 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
225 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
226 ; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
227 ; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
228 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
229 define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
231 %ptr = getelementptr i32, i32* %out, i64 %index
232 %gep = getelementptr i32, i32* %ptr, i32 4
233 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
234 store i32 %val, i32* %out2
238 ; GCN-LABEL: {{^}}atomic_sub_i32:
239 ; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
240 define amdgpu_kernel void @atomic_sub_i32(i32* %out, i32 %in) {
242 %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst
246 ; GCN-LABEL: {{^}}atomic_sub_i32_ret:
247 ; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
248 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
249 define amdgpu_kernel void @atomic_sub_i32_ret(i32* %out, i32* %out2, i32 %in) {
251 %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst
252 store i32 %val, i32* %out2
256 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
257 ; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
258 define amdgpu_kernel void @atomic_sub_i32_addr64(i32* %out, i32 %in, i64 %index) {
260 %ptr = getelementptr i32, i32* %out, i64 %index
261 %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst
265 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
266 ; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
267 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
268 define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
270 %ptr = getelementptr i32, i32* %out, i64 %index
271 %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst
272 store i32 %val, i32* %out2
276 ; GCN-LABEL: {{^}}atomic_max_i32_offset:
277 ; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
278 ; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
279 define amdgpu_kernel void @atomic_max_i32_offset(i32* %out, i32 %in) {
281 %gep = getelementptr i32, i32* %out, i32 4
282 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
286 ; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
287 ; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
288 ; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
289 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
290 define amdgpu_kernel void @atomic_max_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
292 %gep = getelementptr i32, i32* %out, i32 4
293 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
294 store i32 %val, i32* %out2
298 ; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
299 ; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
300 ; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
301 define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
303 %ptr = getelementptr i32, i32* %out, i64 %index
304 %gep = getelementptr i32, i32* %ptr, i32 4
305 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
309 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
310 ; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
311 ; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
312 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
313 define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
315 %ptr = getelementptr i32, i32* %out, i64 %index
316 %gep = getelementptr i32, i32* %ptr, i32 4
317 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
318 store i32 %val, i32* %out2
322 ; GCN-LABEL: {{^}}atomic_max_i32:
323 ; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
324 define amdgpu_kernel void @atomic_max_i32(i32* %out, i32 %in) {
326 %val = atomicrmw volatile max i32* %out, i32 %in seq_cst
330 ; GCN-LABEL: {{^}}atomic_max_i32_ret:
331 ; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
332 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
333 define amdgpu_kernel void @atomic_max_i32_ret(i32* %out, i32* %out2, i32 %in) {
335 %val = atomicrmw volatile max i32* %out, i32 %in seq_cst
336 store i32 %val, i32* %out2
340 ; GCN-LABEL: {{^}}atomic_max_i32_addr64:
341 ; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
342 define amdgpu_kernel void @atomic_max_i32_addr64(i32* %out, i32 %in, i64 %index) {
344 %ptr = getelementptr i32, i32* %out, i64 %index
345 %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst
349 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
350 ; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
351 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
352 define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
354 %ptr = getelementptr i32, i32* %out, i64 %index
355 %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst
356 store i32 %val, i32* %out2
360 ; GCN-LABEL: {{^}}atomic_umax_i32_offset:
361 ; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
362 ; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
363 define amdgpu_kernel void @atomic_umax_i32_offset(i32* %out, i32 %in) {
365 %gep = getelementptr i32, i32* %out, i32 4
366 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
370 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
371 ; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
372 ; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
373 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
374 define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
376 %gep = getelementptr i32, i32* %out, i32 4
377 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
378 store i32 %val, i32* %out2
382 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
383 ; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
384 ; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
385 define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
387 %ptr = getelementptr i32, i32* %out, i64 %index
388 %gep = getelementptr i32, i32* %ptr, i32 4
389 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
393 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
394 ; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
395 ; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
396 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
397 define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
399 %ptr = getelementptr i32, i32* %out, i64 %index
400 %gep = getelementptr i32, i32* %ptr, i32 4
401 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
402 store i32 %val, i32* %out2
406 ; GCN-LABEL: {{^}}atomic_umax_i32:
407 ; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
408 define amdgpu_kernel void @atomic_umax_i32(i32* %out, i32 %in) {
410 %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst
414 ; GCN-LABEL: {{^}}atomic_umax_i32_ret:
415 ; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
416 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
417 define amdgpu_kernel void @atomic_umax_i32_ret(i32* %out, i32* %out2, i32 %in) {
419 %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst
420 store i32 %val, i32* %out2
424 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
425 ; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
426 define amdgpu_kernel void @atomic_umax_i32_addr64(i32* %out, i32 %in, i64 %index) {
428 %ptr = getelementptr i32, i32* %out, i64 %index
429 %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst
433 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
434 ; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
435 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
436 define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
438 %ptr = getelementptr i32, i32* %out, i64 %index
439 %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst
440 store i32 %val, i32* %out2
444 ; GCN-LABEL: {{^}}atomic_min_i32_offset:
445 ; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
446 ; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
447 define amdgpu_kernel void @atomic_min_i32_offset(i32* %out, i32 %in) {
449 %gep = getelementptr i32, i32* %out, i32 4
450 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
454 ; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
455 ; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
456 ; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
457 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
458 define amdgpu_kernel void @atomic_min_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
460 %gep = getelementptr i32, i32* %out, i32 4
461 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
462 store i32 %val, i32* %out2
466 ; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
467 ; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
468 ; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
469 define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
471 %ptr = getelementptr i32, i32* %out, i64 %index
472 %gep = getelementptr i32, i32* %ptr, i32 4
473 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
477 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
478 ; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
479 ; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
480 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
481 define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
483 %ptr = getelementptr i32, i32* %out, i64 %index
484 %gep = getelementptr i32, i32* %ptr, i32 4
485 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
486 store i32 %val, i32* %out2
490 ; GCN-LABEL: {{^}}atomic_min_i32:
491 ; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
492 define amdgpu_kernel void @atomic_min_i32(i32* %out, i32 %in) {
494 %val = atomicrmw volatile min i32* %out, i32 %in seq_cst
498 ; GCN-LABEL: {{^}}atomic_min_i32_ret:
499 ; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
500 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
501 define amdgpu_kernel void @atomic_min_i32_ret(i32* %out, i32* %out2, i32 %in) {
503 %val = atomicrmw volatile min i32* %out, i32 %in seq_cst
504 store i32 %val, i32* %out2
508 ; GCN-LABEL: {{^}}atomic_min_i32_addr64:
509 ; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
510 define amdgpu_kernel void @atomic_min_i32_addr64(i32* %out, i32 %in, i64 %index) {
512 %ptr = getelementptr i32, i32* %out, i64 %index
513 %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst
517 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
518 ; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
519 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
520 define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
522 %ptr = getelementptr i32, i32* %out, i64 %index
523 %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst
524 store i32 %val, i32* %out2
528 ; GCN-LABEL: {{^}}atomic_umin_i32_offset:
529 ; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
530 ; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
531 define amdgpu_kernel void @atomic_umin_i32_offset(i32* %out, i32 %in) {
533 %gep = getelementptr i32, i32* %out, i32 4
534 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
538 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
539 ; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
540 ; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
541 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
542 define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
544 %gep = getelementptr i32, i32* %out, i32 4
545 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
546 store i32 %val, i32* %out2
550 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
551 ; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
552 ; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
553 define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
555 %ptr = getelementptr i32, i32* %out, i64 %index
556 %gep = getelementptr i32, i32* %ptr, i32 4
557 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
561 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
562 ; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
563 ; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
564 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
565 define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
567 %ptr = getelementptr i32, i32* %out, i64 %index
568 %gep = getelementptr i32, i32* %ptr, i32 4
569 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
570 store i32 %val, i32* %out2
574 ; GCN-LABEL: {{^}}atomic_umin_i32:
575 ; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
576 define amdgpu_kernel void @atomic_umin_i32(i32* %out, i32 %in) {
578 %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst
582 ; GCN-LABEL: {{^}}atomic_umin_i32_ret:
583 ; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
584 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
585 define amdgpu_kernel void @atomic_umin_i32_ret(i32* %out, i32* %out2, i32 %in) {
587 %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst
588 store i32 %val, i32* %out2
592 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
593 ; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
594 define amdgpu_kernel void @atomic_umin_i32_addr64(i32* %out, i32 %in, i64 %index) {
596 %ptr = getelementptr i32, i32* %out, i64 %index
597 %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst
601 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
602 ; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
603 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]{{$}}
604 define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
606 %ptr = getelementptr i32, i32* %out, i64 %index
607 %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst
608 store i32 %val, i32* %out2
612 ; GCN-LABEL: {{^}}atomic_or_i32_offset:
613 ; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
614 ; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
615 define amdgpu_kernel void @atomic_or_i32_offset(i32* %out, i32 %in) {
617 %gep = getelementptr i32, i32* %out, i32 4
618 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
622 ; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
623 ; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
624 ; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
625 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
626 define amdgpu_kernel void @atomic_or_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
628 %gep = getelementptr i32, i32* %out, i32 4
629 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
630 store i32 %val, i32* %out2
634 ; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
635 ; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
636 ; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
637 define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
639 %ptr = getelementptr i32, i32* %out, i64 %index
640 %gep = getelementptr i32, i32* %ptr, i32 4
641 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
645 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
646 ; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
647 ; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
648 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
649 define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
651 %ptr = getelementptr i32, i32* %out, i64 %index
652 %gep = getelementptr i32, i32* %ptr, i32 4
653 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
654 store i32 %val, i32* %out2
658 ; GCN-LABEL: {{^}}atomic_or_i32:
659 ; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
660 define amdgpu_kernel void @atomic_or_i32(i32* %out, i32 %in) {
662 %val = atomicrmw volatile or i32* %out, i32 %in seq_cst
666 ; GCN-LABEL: {{^}}atomic_or_i32_ret:
667 ; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
668 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
669 define amdgpu_kernel void @atomic_or_i32_ret(i32* %out, i32* %out2, i32 %in) {
671 %val = atomicrmw volatile or i32* %out, i32 %in seq_cst
672 store i32 %val, i32* %out2
676 ; GCN-LABEL: {{^}}atomic_or_i32_addr64:
677 ; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
678 define amdgpu_kernel void @atomic_or_i32_addr64(i32* %out, i32 %in, i64 %index) {
680 %ptr = getelementptr i32, i32* %out, i64 %index
681 %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst
685 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
686 ; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
687 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
688 define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
690 %ptr = getelementptr i32, i32* %out, i64 %index
691 %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst
692 store i32 %val, i32* %out2
696 ; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
697 ; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
698 ; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
699 define amdgpu_kernel void @atomic_xchg_i32_offset(i32* %out, i32 %in) {
701 %gep = getelementptr i32, i32* %out, i32 4
702 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
706 ; GCN-LABEL: {{^}}atomic_xchg_f32_offset:
707 ; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
708 ; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
709 define amdgpu_kernel void @atomic_xchg_f32_offset(float* %out, float %in) {
711 %gep = getelementptr float, float* %out, i32 4
712 %val = atomicrmw volatile xchg float* %gep, float %in seq_cst
716 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
717 ; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
718 ; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
719 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
720 define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
722 %gep = getelementptr i32, i32* %out, i32 4
723 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
724 store i32 %val, i32* %out2
728 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
729 ; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
730 ; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
731 define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
733 %ptr = getelementptr i32, i32* %out, i64 %index
734 %gep = getelementptr i32, i32* %ptr, i32 4
735 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
739 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
740 ; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
741 ; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
742 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
743 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
745 %ptr = getelementptr i32, i32* %out, i64 %index
746 %gep = getelementptr i32, i32* %ptr, i32 4
747 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
748 store i32 %val, i32* %out2
752 ; GCN-LABEL: {{^}}atomic_xchg_i32:
753 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
754 define amdgpu_kernel void @atomic_xchg_i32(i32* %out, i32 %in) {
756 %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
760 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret:
761 ; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
762 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
763 define amdgpu_kernel void @atomic_xchg_i32_ret(i32* %out, i32* %out2, i32 %in) {
765 %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
766 store i32 %val, i32* %out2
770 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
771 ; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
772 define amdgpu_kernel void @atomic_xchg_i32_addr64(i32* %out, i32 %in, i64 %index) {
774 %ptr = getelementptr i32, i32* %out, i64 %index
775 %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst
779 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
780 ; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
781 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
782 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
784 %ptr = getelementptr i32, i32* %out, i64 %index
785 %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst
786 store i32 %val, i32* %out2
792 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
793 ; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
794 ; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
795 define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32* %out, i32 %in, i32 %old) {
797 %gep = getelementptr i32, i32* %out, i32 4
798 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
802 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
803 ; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
804 ; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
805 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
806 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in, i32 %old) {
808 %gep = getelementptr i32, i32* %out, i32 4
809 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
810 %flag = extractvalue { i32, i1 } %val, 0
811 store i32 %flag, i32* %out2
815 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
816 ; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
817 ; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
818 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index, i32 %old) {
820 %ptr = getelementptr i32, i32* %out, i64 %index
821 %gep = getelementptr i32, i32* %ptr, i32 4
822 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
826 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
827 ; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
828 ; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
829 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
830 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) {
832 %ptr = getelementptr i32, i32* %out, i64 %index
833 %gep = getelementptr i32, i32* %ptr, i32 4
834 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
835 %flag = extractvalue { i32, i1 } %val, 0
836 store i32 %flag, i32* %out2
840 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32:
841 ; GCN: flat_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
842 define amdgpu_kernel void @atomic_cmpxchg_i32(i32* %out, i32 %in, i32 %old) {
844 %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst
848 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
849 ; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc
850 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
851 define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32* %out, i32* %out2, i32 %in, i32 %old) {
853 %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst
854 %flag = extractvalue { i32, i1 } %val, 0
855 store i32 %flag, i32* %out2
859 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
860 ; GCN: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
861 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32* %out, i32 %in, i64 %index, i32 %old) {
863 %ptr = getelementptr i32, i32* %out, i64 %index
864 %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst
868 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
869 ; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
870 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
871 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) {
873 %ptr = getelementptr i32, i32* %out, i64 %index
874 %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst
875 %flag = extractvalue { i32, i1 } %val, 0
876 store i32 %flag, i32* %out2
880 ; GCN-LABEL: {{^}}atomic_xor_i32_offset:
881 ; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
882 ; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
883 define amdgpu_kernel void @atomic_xor_i32_offset(i32* %out, i32 %in) {
885 %gep = getelementptr i32, i32* %out, i32 4
886 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
890 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
891 ; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
892 ; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
893 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
894 define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
896 %gep = getelementptr i32, i32* %out, i32 4
897 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
898 store i32 %val, i32* %out2
902 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
903 ; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
904 ; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
905 define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
907 %ptr = getelementptr i32, i32* %out, i64 %index
908 %gep = getelementptr i32, i32* %ptr, i32 4
909 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
913 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
914 ; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
915 ; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
916 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
917 define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
919 %ptr = getelementptr i32, i32* %out, i64 %index
920 %gep = getelementptr i32, i32* %ptr, i32 4
921 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
922 store i32 %val, i32* %out2
926 ; GCN-LABEL: {{^}}atomic_xor_i32:
927 ; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
928 define amdgpu_kernel void @atomic_xor_i32(i32* %out, i32 %in) {
930 %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst
934 ; GCN-LABEL: {{^}}atomic_xor_i32_ret:
935 ; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
936 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
937 define amdgpu_kernel void @atomic_xor_i32_ret(i32* %out, i32* %out2, i32 %in) {
939 %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst
940 store i32 %val, i32* %out2
944 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
945 ; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
946 define amdgpu_kernel void @atomic_xor_i32_addr64(i32* %out, i32 %in, i64 %index) {
948 %ptr = getelementptr i32, i32* %out, i64 %index
949 %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst
953 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
954 ; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
955 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
956 define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
958 %ptr = getelementptr i32, i32* %out, i64 %index
959 %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst
960 store i32 %val, i32* %out2
964 ; GCN-LABEL: {{^}}atomic_load_i32_offset:
965 ; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
966 ; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
967 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
968 define amdgpu_kernel void @atomic_load_i32_offset(i32* %in, i32* %out) {
970 %gep = getelementptr i32, i32* %in, i32 4
971 %val = load atomic i32, i32* %gep seq_cst, align 4
972 store i32 %val, i32* %out
976 ; GCN-LABEL: {{^}}atomic_load_i32:
977 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
978 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
979 define amdgpu_kernel void @atomic_load_i32(i32* %in, i32* %out) {
981 %val = load atomic i32, i32* %in seq_cst, align 4
982 store i32 %val, i32* %out
986 ; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
987 ; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
988 ; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
989 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
990 define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32* %in, i32* %out, i64 %index) {
992 %ptr = getelementptr i32, i32* %in, i64 %index
993 %gep = getelementptr i32, i32* %ptr, i32 4
994 %val = load atomic i32, i32* %gep seq_cst, align 4
995 store i32 %val, i32* %out
999 ; GCN-LABEL: {{^}}atomic_load_i32_addr64:
1000 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1001 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1002 define amdgpu_kernel void @atomic_load_i32_addr64(i32* %in, i32* %out, i64 %index) {
1004 %ptr = getelementptr i32, i32* %in, i64 %index
1005 %val = load atomic i32, i32* %ptr seq_cst, align 4
1006 store i32 %val, i32* %out
1010 ; GCN-LABEL: {{^}}atomic_store_i32_offset:
1011 ; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1012 ; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
1013 define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32* %out) {
1015 %gep = getelementptr i32, i32* %out, i32 4
1016 store atomic i32 %in, i32* %gep seq_cst, align 4
1020 ; GCN-LABEL: {{^}}atomic_store_i32:
1021 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1022 define amdgpu_kernel void @atomic_store_i32(i32 %in, i32* %out) {
1024 store atomic i32 %in, i32* %out seq_cst, align 4
1028 ; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
1029 ; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1030 ; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
1031 define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32* %out, i64 %index) {
1033 %ptr = getelementptr i32, i32* %out, i64 %index
1034 %gep = getelementptr i32, i32* %ptr, i32 4
1035 store atomic i32 %in, i32* %gep seq_cst, align 4
1039 ; GCN-LABEL: {{^}}atomic_store_i32_addr64:
1040 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1041 define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32* %out, i64 %index) {
1043 %ptr = getelementptr i32, i32* %out, i64 %index
1044 store atomic i32 %in, i32* %ptr seq_cst, align 4
1048 ; GCN-LABEL: {{^}}atomic_load_f32_offset:
1049 ; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1050 ; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
1051 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1052 define amdgpu_kernel void @atomic_load_f32_offset(float* %in, float* %out) {
1054 %gep = getelementptr float, float* %in, i32 4
1055 %val = load atomic float, float* %gep seq_cst, align 4
1056 store float %val, float* %out
1060 ; GCN-LABEL: {{^}}atomic_load_f32:
1061 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
1062 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1063 define amdgpu_kernel void @atomic_load_f32(float* %in, float* %out) {
1065 %val = load atomic float, float* %in seq_cst, align 4
1066 store float %val, float* %out
1070 ; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset:
1071 ; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1072 ; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
1073 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1074 define amdgpu_kernel void @atomic_load_f32_addr64_offset(float* %in, float* %out, i64 %index) {
1076 %ptr = getelementptr float, float* %in, i64 %index
1077 %gep = getelementptr float, float* %ptr, i32 4
1078 %val = load atomic float, float* %gep seq_cst, align 4
1079 store float %val, float* %out
1083 ; GCN-LABEL: {{^}}atomic_load_f32_addr64:
1084 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1085 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1086 define amdgpu_kernel void @atomic_load_f32_addr64(float* %in, float* %out, i64 %index) {
1088 %ptr = getelementptr float, float* %in, i64 %index
1089 %val = load atomic float, float* %ptr seq_cst, align 4
1090 store float %val, float* %out
1094 ; GCN-LABEL: {{^}}atomic_store_f32_offset:
1095 ; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1096 ; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
1097 define amdgpu_kernel void @atomic_store_f32_offset(float %in, float* %out) {
1099 %gep = getelementptr float, float* %out, i32 4
1100 store atomic float %in, float* %gep seq_cst, align 4
1104 ; GCN-LABEL: {{^}}atomic_store_f32:
1105 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1106 define amdgpu_kernel void @atomic_store_f32(float %in, float* %out) {
1108 store atomic float %in, float* %out seq_cst, align 4
1112 ; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset:
1113 ; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1114 ; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
1115 define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float* %out, i64 %index) {
1117 %ptr = getelementptr float, float* %out, i64 %index
1118 %gep = getelementptr float, float* %ptr, i32 4
1119 store atomic float %in, float* %gep seq_cst, align 4
1123 ; GCN-LABEL: {{^}}atomic_store_f32_addr64:
1124 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1125 define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float* %out, i64 %index) {
1127 %ptr = getelementptr float, float* %out, i64 %index
1128 store atomic float %in, float* %ptr seq_cst, align 4