1 ; RUN: llc -march=amdgcn -mcpu=bonaire -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-atomic-optimizations=false -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
5 ; GCN-LABEL: {{^}}atomic_add_i64_offset:
6 ; CIVI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
8 ; GFX9: global_atomic_add_x2 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}}
9 define amdgpu_kernel void @atomic_add_i64_offset(i64 addrspace(1)* %out, i64 %in) {
11 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
12 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst
16 ; GCN-LABEL: {{^}}atomic_add_i64_ret_offset:
17 ; CIVI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
18 ; CIVI: buffer_store_dwordx2 [[RET]]
20 ; GFX9: global_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32 glc{{$}}
21 define amdgpu_kernel void @atomic_add_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
23 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
24 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst
25 store i64 %tmp0, i64 addrspace(1)* %out2
29 ; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset:
30 ; CI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
31 ; VI: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
32 ; GFX9: global_atomic_add_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
33 define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
35 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
36 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
37 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst
41 ; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset:
42 ; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
43 ; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
44 ; CIVI: buffer_store_dwordx2 [[RET]]
46 ; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
47 define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
49 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
50 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
51 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst
52 store i64 %tmp0, i64 addrspace(1)* %out2
56 ; GCN-LABEL: {{^}}atomic_add_i64:
57 ; SIVI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
58 ; GFX9: global_atomic_add_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
59 define amdgpu_kernel void @atomic_add_i64(i64 addrspace(1)* %out, i64 %in) {
61 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst
65 ; GCN-LABEL: {{^}}atomic_add_i64_ret:
66 ; CIVI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
67 ; CIVI: buffer_store_dwordx2 [[RET]]
69 ; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
70 define amdgpu_kernel void @atomic_add_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
72 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst
73 store i64 %tmp0, i64 addrspace(1)* %out2
77 ; GCN-LABEL: {{^}}atomic_add_i64_addr64:
78 ; CI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
79 ; VI: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
80 ; GFX9: global_atomic_add_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
81 define amdgpu_kernel void @atomic_add_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
83 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
84 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %ptr, i64 %in seq_cst
88 ; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64:
89 ; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
90 ; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
91 ; CIVI: buffer_store_dwordx2 [[RET]]
93 ; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
94 define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
96 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
97 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %ptr, i64 %in seq_cst
98 store i64 %tmp0, i64 addrspace(1)* %out2
102 ; GCN-LABEL: {{^}}atomic_and_i64_offset:
103 ; CIVI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
104 ; GFX9: global_atomic_and_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
105 define amdgpu_kernel void @atomic_and_i64_offset(i64 addrspace(1)* %out, i64 %in) {
107 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
108 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst
112 ; GCN-LABEL: {{^}}atomic_and_i64_ret_offset:
113 ; CIVI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
114 ; CIVI: buffer_store_dwordx2 [[RET]]
116 ; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
117 define amdgpu_kernel void @atomic_and_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
119 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
120 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst
121 store i64 %tmp0, i64 addrspace(1)* %out2
125 ; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset:
126 ; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
127 ; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
128 ; GFX9: global_atomic_and_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
129 define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
131 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
132 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
133 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst
137 ; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset:
138 ; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
139 ; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
140 ; CIVI: buffer_store_dwordx2 [[RET]]
142 ; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
143 define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
145 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
146 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
147 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst
148 store i64 %tmp0, i64 addrspace(1)* %out2
152 ; GCN-LABEL: {{^}}atomic_and_i64:
153 ; CIVI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
154 ; GFX9: global_atomic_and_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
155 define amdgpu_kernel void @atomic_and_i64(i64 addrspace(1)* %out, i64 %in) {
157 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst
161 ; GCN-LABEL: {{^}}atomic_and_i64_ret:
162 ; CIVI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
163 ; CIVI: buffer_store_dwordx2 [[RET]]
165 ; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
166 define amdgpu_kernel void @atomic_and_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
168 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst
169 store i64 %tmp0, i64 addrspace(1)* %out2
173 ; GCN-LABEL: {{^}}atomic_and_i64_addr64:
174 ; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
175 ; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
176 ; GFX9: global_atomic_and_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
177 define amdgpu_kernel void @atomic_and_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
179 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
180 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %ptr, i64 %in seq_cst
184 ; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64:
185 ; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
186 ; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
187 ; CIVI: buffer_store_dwordx2 [[RET]]
189 ; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
190 define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
192 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
193 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %ptr, i64 %in seq_cst
194 store i64 %tmp0, i64 addrspace(1)* %out2
198 ; GCN-LABEL: {{^}}atomic_sub_i64_offset:
199 ; CIVI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
200 ; GFX9: global_atomic_sub_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
201 define amdgpu_kernel void @atomic_sub_i64_offset(i64 addrspace(1)* %out, i64 %in) {
203 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
204 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst
208 ; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset:
209 ; CIVI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
210 ; CIVI: buffer_store_dwordx2 [[RET]]
212 ; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
213 define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
215 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
216 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst
217 store i64 %tmp0, i64 addrspace(1)* %out2
221 ; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset:
222 ; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
223 ; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
224 ; GFX9: global_atomic_sub_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
225 define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
227 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
228 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
229 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst
233 ; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset:
234 ; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
235 ; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
236 ; CIVI: buffer_store_dwordx2 [[RET]]
238 ; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
239 define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
241 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
242 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
243 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst
244 store i64 %tmp0, i64 addrspace(1)* %out2
248 ; GCN-LABEL: {{^}}atomic_sub_i64:
249 ; CIVI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
250 ; GFX9: global_atomic_sub_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
251 define amdgpu_kernel void @atomic_sub_i64(i64 addrspace(1)* %out, i64 %in) {
253 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst
257 ; GCN-LABEL: {{^}}atomic_sub_i64_ret:
258 ; CIVI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
259 ; CIVI: buffer_store_dwordx2 [[RET]]
261 ; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
262 define amdgpu_kernel void @atomic_sub_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
264 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst
265 store i64 %tmp0, i64 addrspace(1)* %out2
269 ; GCN-LABEL: {{^}}atomic_sub_i64_addr64:
270 ; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
271 ; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
272 ; GFX9: global_atomic_sub_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
273 define amdgpu_kernel void @atomic_sub_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
275 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
276 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %ptr, i64 %in seq_cst
280 ; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64:
281 ; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
282 ; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
283 ; CIVI: buffer_store_dwordx2 [[RET]]
285 ; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
286 define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
288 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
289 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %ptr, i64 %in seq_cst
290 store i64 %tmp0, i64 addrspace(1)* %out2
294 ; GCN-LABEL: {{^}}atomic_max_i64_offset:
295 ; CIVI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
296 ; GFX9: global_atomic_smax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
297 define amdgpu_kernel void @atomic_max_i64_offset(i64 addrspace(1)* %out, i64 %in) {
299 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
300 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst
304 ; GCN-LABEL: {{^}}atomic_max_i64_ret_offset:
305 ; CIVI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
306 ; CIVI: buffer_store_dwordx2 [[RET]]
308 ; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
309 define amdgpu_kernel void @atomic_max_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
311 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
312 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst
313 store i64 %tmp0, i64 addrspace(1)* %out2
317 ; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset:
318 ; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
319 ; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
320 ; GFX9: global_atomic_smax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
321 define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
323 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
324 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
325 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst
329 ; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset:
330 ; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
331 ; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
332 ; CIVI: buffer_store_dwordx2 [[RET]]
334 ; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
335 define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
337 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
338 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
339 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst
340 store i64 %tmp0, i64 addrspace(1)* %out2
344 ; GCN-LABEL: {{^}}atomic_max_i64:
345 ; CIVI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
346 ; GFX9: global_atomic_smax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
347 define amdgpu_kernel void @atomic_max_i64(i64 addrspace(1)* %out, i64 %in) {
349 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst
353 ; GCN-LABEL: {{^}}atomic_max_i64_ret:
354 ; CIVI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
355 ; CIVI: buffer_store_dwordx2 [[RET]]
357 ; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
358 define amdgpu_kernel void @atomic_max_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
360 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst
361 store i64 %tmp0, i64 addrspace(1)* %out2
365 ; GCN-LABEL: {{^}}atomic_max_i64_addr64:
366 ; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
367 ; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
368 ; GFX9: global_atomic_smax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
369 define amdgpu_kernel void @atomic_max_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
371 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
372 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %in seq_cst
376 ; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64:
377 ; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
378 ; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
379 ; CIVI: buffer_store_dwordx2 [[RET]]
381 ; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
382 define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
384 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
385 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %in seq_cst
386 store i64 %tmp0, i64 addrspace(1)* %out2
390 ; GCN-LABEL: {{^}}atomic_umax_i64_offset:
391 ; CIVI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
392 ; GFX9: global_atomic_umax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
393 define amdgpu_kernel void @atomic_umax_i64_offset(i64 addrspace(1)* %out, i64 %in) {
395 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
396 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst
400 ; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset:
401 ; CIVI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
402 ; CIVI: buffer_store_dwordx2 [[RET]]
404 ; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
405 define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
407 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
408 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst
409 store i64 %tmp0, i64 addrspace(1)* %out2
413 ; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset:
414 ; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
415 ; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
416 ; GFX9: global_atomic_umax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
417 define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
419 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
420 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
421 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst
425 ; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset:
426 ; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
427 ; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
428 ; CIVI: buffer_store_dwordx2 [[RET]]
430 ; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
431 define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
433 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
434 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
435 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst
436 store i64 %tmp0, i64 addrspace(1)* %out2
440 ; GCN-LABEL: {{^}}atomic_umax_i64:
441 ; CIVI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
442 ; GFX9: global_atomic_umax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
443 define amdgpu_kernel void @atomic_umax_i64(i64 addrspace(1)* %out, i64 %in) {
445 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst
449 ; GCN-LABEL: {{^}}atomic_umax_i64_ret:
450 ; CIVI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
451 ; CIVI: buffer_store_dwordx2 [[RET]]
453 ; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
454 define amdgpu_kernel void @atomic_umax_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
456 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst
457 store i64 %tmp0, i64 addrspace(1)* %out2
461 ; GCN-LABEL: {{^}}atomic_umax_i64_addr64:
462 ; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
463 ; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
464 ; GFX9: global_atomic_umax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
465 define amdgpu_kernel void @atomic_umax_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
467 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
468 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %in seq_cst
472 ; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64:
473 ; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
474 ; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
475 ; CIVI: buffer_store_dwordx2 [[RET]]
477 ; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
478 define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
480 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
481 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %in seq_cst
482 store i64 %tmp0, i64 addrspace(1)* %out2
486 ; GCN-LABEL: {{^}}atomic_min_i64_offset:
487 ; CIVI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
488 ; GFX9: global_atomic_smin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
489 define amdgpu_kernel void @atomic_min_i64_offset(i64 addrspace(1)* %out, i64 %in) {
491 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
492 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst
496 ; GCN-LABEL: {{^}}atomic_min_i64_ret_offset:
497 ; CIVI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
498 ; CIVI: buffer_store_dwordx2 [[RET]]
500 ; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
501 define amdgpu_kernel void @atomic_min_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
503 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
504 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst
505 store i64 %tmp0, i64 addrspace(1)* %out2
509 ; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset:
510 ; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
511 ; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
512 ; GFX9: global_atomic_smin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
513 define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
515 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
516 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
517 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst
521 ; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset:
522 ; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
523 ; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
524 ; CIVI: buffer_store_dwordx2 [[RET]]
526 ; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
527 define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
529 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
530 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
531 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst
532 store i64 %tmp0, i64 addrspace(1)* %out2
536 ; GCN-LABEL: {{^}}atomic_min_i64:
537 ; CIVI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
538 ; GFX9: global_atomic_smin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
539 define amdgpu_kernel void @atomic_min_i64(i64 addrspace(1)* %out, i64 %in) {
541 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst
545 ; GCN-LABEL: {{^}}atomic_min_i64_ret:
546 ; CIVI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
547 ; CIVI: buffer_store_dwordx2 [[RET]]
549 ; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
550 define amdgpu_kernel void @atomic_min_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
552 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst
553 store i64 %tmp0, i64 addrspace(1)* %out2
557 ; GCN-LABEL: {{^}}atomic_min_i64_addr64:
558 ; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
559 ; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
560 ; GFX9: global_atomic_smin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
561 define amdgpu_kernel void @atomic_min_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
563 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
564 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %in seq_cst
568 ; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64:
569 ; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
570 ; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
571 ; CIVI: buffer_store_dwordx2 [[RET]]
573 ; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
574 define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
576 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
577 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %in seq_cst
578 store i64 %tmp0, i64 addrspace(1)* %out2
582 ; GCN-LABEL: {{^}}atomic_umin_i64_offset:
583 ; CIVI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
585 ; GFX9: global_atomic_umin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
586 define amdgpu_kernel void @atomic_umin_i64_offset(i64 addrspace(1)* %out, i64 %in) {
588 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
589 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst
593 ; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset:
594 ; CIVI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
595 ; CIVI: buffer_store_dwordx2 [[RET]]
597 ; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
598 define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
600 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
601 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst
602 store i64 %tmp0, i64 addrspace(1)* %out2
606 ; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset:
607 ; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
608 ; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
609 ; GFX9: global_atomic_umin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
610 define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
612 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
613 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
614 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst
618 ; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset:
619 ; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
620 ; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
621 ; CIVI: buffer_store_dwordx2 [[RET]]
623 ; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
624 define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
626 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
627 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
628 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst
629 store i64 %tmp0, i64 addrspace(1)* %out2
633 ; GCN-LABEL: {{^}}atomic_umin_i64:
634 ; CIVI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
635 ; GFX9: global_atomic_umin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
636 define amdgpu_kernel void @atomic_umin_i64(i64 addrspace(1)* %out, i64 %in) {
638 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst
642 ; GCN-LABEL: {{^}}atomic_umin_i64_ret:
643 ; CIVI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
644 ; CIVI: buffer_store_dwordx2 [[RET]]
646 ; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
647 define amdgpu_kernel void @atomic_umin_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
649 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst
650 store i64 %tmp0, i64 addrspace(1)* %out2
654 ; GCN-LABEL: {{^}}atomic_umin_i64_addr64:
655 ; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
656 ; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
657 ; GFX9: global_atomic_umin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
658 define amdgpu_kernel void @atomic_umin_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
660 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
661 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %in seq_cst
665 ; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64:
666 ; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
667 ; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
668 ; CIVI: buffer_store_dwordx2 [[RET]]
670 ; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
671 define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
673 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
674 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %in seq_cst
675 store i64 %tmp0, i64 addrspace(1)* %out2
679 ; GCN-LABEL: {{^}}atomic_or_i64_offset:
680 ; CIVI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
681 ; GFX9: global_atomic_or_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
682 define amdgpu_kernel void @atomic_or_i64_offset(i64 addrspace(1)* %out, i64 %in) {
684 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
685 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst
689 ; GCN-LABEL: {{^}}atomic_or_i64_ret_offset:
690 ; CIVI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
691 ; CIVI: buffer_store_dwordx2 [[RET]]
693 ; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
694 define amdgpu_kernel void @atomic_or_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
696 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
697 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst
698 store i64 %tmp0, i64 addrspace(1)* %out2
702 ; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset:
703 ; CI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
704 ; VI: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
705 ; GFX9: global_atomic_or_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
706 define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
708 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
709 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
710 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst
714 ; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset:
715 ; CI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
716 ; VI: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
717 ; CIVI: buffer_store_dwordx2 [[RET]]
719 ; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
720 define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
722 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
723 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
724 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst
725 store i64 %tmp0, i64 addrspace(1)* %out2
729 ; GCN-LABEL: {{^}}atomic_or_i64:
730 ; CIVI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
731 ; GFX9: global_atomic_or_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
732 define amdgpu_kernel void @atomic_or_i64(i64 addrspace(1)* %out, i64 %in) {
734 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %out, i64 %in seq_cst
738 ; GCN-LABEL: {{^}}atomic_or_i64_ret:
739 ; CIVI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
740 ; CIVI: buffer_store_dwordx2 [[RET]]
742 ; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
743 define amdgpu_kernel void @atomic_or_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
745 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %out, i64 %in seq_cst
746 store i64 %tmp0, i64 addrspace(1)* %out2
750 ; GCN-LABEL: {{^}}atomic_or_i64_addr64:
751 ; CI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
752 ; VI: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
753 ; GFX9: global_atomic_or_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
754 define amdgpu_kernel void @atomic_or_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
756 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
757 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %ptr, i64 %in seq_cst
761 ; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64:
762 ; CI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
763 ; VI: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
764 ; CIVI: buffer_store_dwordx2 [[RET]]
766 ; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
767 define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
769 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
770 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %ptr, i64 %in seq_cst
771 store i64 %tmp0, i64 addrspace(1)* %out2
775 ; GCN-LABEL: {{^}}atomic_xchg_i64_offset:
776 ; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
778 ; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
779 define amdgpu_kernel void @atomic_xchg_i64_offset(i64 addrspace(1)* %out, i64 %in) {
781 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
782 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst
786 ; GCN-LABEL: {{^}}atomic_xchg_f64_offset:
787 ; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
789 ; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
790 define amdgpu_kernel void @atomic_xchg_f64_offset(double addrspace(1)* %out, double %in) {
792 %gep = getelementptr double, double addrspace(1)* %out, i64 4
793 %tmp0 = atomicrmw volatile xchg double addrspace(1)* %gep, double %in seq_cst
797 ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
798 ; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
799 ; CIVI: buffer_store_dwordx2 [[RET]]
801 ; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
802 define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
804 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
805 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst
806 store i64 %tmp0, i64 addrspace(1)* %out2
810 ; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset:
811 ; CI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
812 ; VI: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
813 ; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
814 define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
816 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
817 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
818 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst
822 ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset:
823 ; CI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
824 ; VI: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
825 ; CIVI: buffer_store_dwordx2 [[RET]]
827 ; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
828 define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
830 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
831 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
832 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst
833 store i64 %tmp0, i64 addrspace(1)* %out2
837 ; GCN-LABEL: {{^}}atomic_xchg_i64:
838 ; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
839 ; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
840 define amdgpu_kernel void @atomic_xchg_i64(i64 addrspace(1)* %out, i64 %in) {
842 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %out, i64 %in seq_cst
846 ; GCN-LABEL: {{^}}atomic_xchg_i64_ret:
847 ; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
848 ; CIVI: buffer_store_dwordx2 [[RET]]
850 ; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
851 define amdgpu_kernel void @atomic_xchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
853 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %out, i64 %in seq_cst
854 store i64 %tmp0, i64 addrspace(1)* %out2
858 ; GCN-LABEL: {{^}}atomic_xchg_i64_addr64:
859 ; CI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
860 ; VI: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
861 ; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
862 define amdgpu_kernel void @atomic_xchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
864 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
865 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %ptr, i64 %in seq_cst
869 ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64:
870 ; CI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
871 ; VI: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
872 ; CIVI: buffer_store_dwordx2 [[RET]]
874 ; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
875 define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
877 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
878 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %ptr, i64 %in seq_cst
879 store i64 %tmp0, i64 addrspace(1)* %out2
883 ; GCN-LABEL: {{^}}atomic_xor_i64_offset:
884 ; CIVI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
885 ; GFX9: global_atomic_xor_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
886 define amdgpu_kernel void @atomic_xor_i64_offset(i64 addrspace(1)* %out, i64 %in) {
888 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
889 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst
893 ; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset:
894 ; CIVI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
895 ; CIVI: buffer_store_dwordx2 [[RET]]
897 ; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
898 define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
900 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
901 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst
902 store i64 %tmp0, i64 addrspace(1)* %out2
906 ; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset:
907 ; CI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
908 ; VI: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
909 ; GFX9: global_atomic_xor_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
910 define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
912 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
913 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
914 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst
918 ; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset:
919 ; CI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
920 ; VI: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
921 ; CIVI: buffer_store_dwordx2 [[RET]]
923 ; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
924 define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
926 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
927 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
928 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst
929 store i64 %tmp0, i64 addrspace(1)* %out2
933 ; GCN-LABEL: {{^}}atomic_xor_i64:
934 ; CIVI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
935 ; GFX9: global_atomic_xor_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
936 define amdgpu_kernel void @atomic_xor_i64(i64 addrspace(1)* %out, i64 %in) {
938 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %out, i64 %in seq_cst
942 ; GCN-LABEL: {{^}}atomic_xor_i64_ret:
943 ; CIVI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
944 ; CIVI: buffer_store_dwordx2 [[RET]]
946 ; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
947 define amdgpu_kernel void @atomic_xor_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
949 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %out, i64 %in seq_cst
950 store i64 %tmp0, i64 addrspace(1)* %out2
954 ; GCN-LABEL: {{^}}atomic_xor_i64_addr64:
955 ; CI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
956 ; VI: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
957 ; GFX9: global_atomic_xor_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
958 define amdgpu_kernel void @atomic_xor_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
960 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
961 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %ptr, i64 %in seq_cst
965 ; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64:
966 ; CI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
967 ; VI: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
968 ; CIVI: buffer_store_dwordx2 [[RET]]
970 ; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
971 define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
973 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
974 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %ptr, i64 %in seq_cst
975 store i64 %tmp0, i64 addrspace(1)* %out2
980 ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_offset:
981 ; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
982 ; GFX9: global_atomic_cmpswap_x2 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
983 define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64 addrspace(1)* %out, i64 %in, i64 %old) {
985 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
986 %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
990 ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_soffset:
991 ; CIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x11940
992 ; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
994 ; GFX9: v_mov_b32_e32 [[VOFFSET:v[0-9]+]], 0x11000{{$}}
995 ; GFX9: global_atomic_cmpswap_x2 [[VOFFSET]], v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:2368{{$}}
996 define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64 addrspace(1)* %out, i64 %in, i64 %old) {
998 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 9000
999 %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
1003 ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset:
1004 ; CIVI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
1005 ; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:
1007 ; GFX9: global_atomic_cmpswap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
1008 define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) {
1010 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
1011 %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
1012 %extract0 = extractvalue { i64, i1 } %val, 0
1013 store i64 %extract0, i64 addrspace(1)* %out2
1017 ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset:
1018 ; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
1019 ; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1020 ; GFX9: global_atomic_cmpswap_x2 v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}}
1021 define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) {
1023 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
1024 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
1025 %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
1029 ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:
1030 ; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
1031 ; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1032 ; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:
1034 ; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}}
1035 define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {
1037 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
1038 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
1039 %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
1040 %extract0 = extractvalue { i64, i1 } %val, 0
1041 store i64 %extract0, i64 addrspace(1)* %out2
1045 ; GCN-LABEL: {{^}}atomic_cmpxchg_i64:
1046 ; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1047 ; GFX9: global_atomic_cmpswap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}}
1048 define amdgpu_kernel void @atomic_cmpxchg_i64(i64 addrspace(1)* %out, i64 %in, i64 %old) {
1050 %val = cmpxchg volatile i64 addrspace(1)* %out, i64 %old, i64 %in seq_cst seq_cst
1054 ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret:
1055 ; CIVI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1056 ; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:
1058 ; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}}
1059 define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) {
1061 %val = cmpxchg volatile i64 addrspace(1)* %out, i64 %old, i64 %in seq_cst seq_cst
1062 %extract0 = extractvalue { i64, i1 } %val, 0
1063 store i64 %extract0, i64 addrspace(1)* %out2
1067 ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64:
1068 ; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1069 ; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
1070 ; GFX9: global_atomic_cmpswap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}}
1071 define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) {
1073 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
1074 %val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst
1078 ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:
1079 ; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1080 ; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1081 ; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:
1083 ; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}}
1084 define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {
1086 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
1087 %val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst
1088 %extract0 = extractvalue { i64, i1 } %val, 0
1089 store i64 %extract0, i64 addrspace(1)* %out2
1093 ; GCN-LABEL: {{^}}atomic_load_i64_offset:
1094 ; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
1095 ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1096 ; CIVI: buffer_store_dwordx2 [[RET]]
1098 ; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32 glc{{$}}
1099 define amdgpu_kernel void @atomic_load_i64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
1101 %gep = getelementptr i64, i64 addrspace(1)* %in, i64 4
1102 %val = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8
1103 store i64 %val, i64 addrspace(1)* %out
1107 ; GCN-LABEL: {{^}}atomic_load_i64_neg_offset:
1108 ; CI: v_mov_b32_e32 v[[LO:[0-9]+]], 0xffffffe0
1109 ; CI: v_mov_b32_e32 v[[HI:[0-9]+]], -1
1110 ; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1112 ; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xffffffe0
1113 ; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1
1114 ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1116 ; CIVI: buffer_store_dwordx2 [[RET]]
1118 ; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-32 glc{{$}}
1119 define amdgpu_kernel void @atomic_load_i64_neg_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
1121 %gep = getelementptr i64, i64 addrspace(1)* %in, i64 -4
1122 %val = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8
1123 store i64 %val, i64 addrspace(1)* %out
1127 ; GCN-LABEL: {{^}}atomic_load_i64:
1128 ; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1129 ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
1130 ; CIVI: buffer_store_dwordx2 [[RET]]
1132 ; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1133 define amdgpu_kernel void @atomic_load_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
1135 %val = load atomic i64, i64 addrspace(1)* %in seq_cst, align 8
1136 store i64 %val, i64 addrspace(1)* %out
1140 ; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset:
1141 ; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
1142 ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1143 ; CIVI: buffer_store_dwordx2 [[RET]]
1145 ; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}}
1146 define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) {
1148 %ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index
1149 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
1150 %val = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8
1151 store i64 %val, i64 addrspace(1)* %out
1155 ; GCN-LABEL: {{^}}atomic_load_i64_addr64:
1156 ; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1157 ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1158 ; CIVI: buffer_store_dwordx2 [[RET]]
1160 ; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
1161 define amdgpu_kernel void @atomic_load_i64_addr64(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) {
1163 %ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index
1164 %val = load atomic i64, i64 addrspace(1)* %ptr seq_cst, align 8
1165 store i64 %val, i64 addrspace(1)* %out
1169 ; GCN-LABEL: {{^}}atomic_load_f64_addr64_offset:
1170 ; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
1171 ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1172 ; CIVI: buffer_store_dwordx2 [[RET]]
1174 ; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}}
1175 define amdgpu_kernel void @atomic_load_f64_addr64_offset(double addrspace(1)* %in, double addrspace(1)* %out, i64 %index) {
1177 %ptr = getelementptr double, double addrspace(1)* %in, i64 %index
1178 %gep = getelementptr double, double addrspace(1)* %ptr, i64 4
1179 %val = load atomic double, double addrspace(1)* %gep seq_cst, align 8
1180 store double %val, double addrspace(1)* %out
1184 ; GCN-LABEL: {{^}}atomic_store_i64_offset:
1185 ; CI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
1186 ; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1187 ; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}}
1188 define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64 addrspace(1)* %out) {
1190 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
1191 store atomic i64 %in, i64 addrspace(1)* %gep seq_cst, align 8
1195 ; GCN-LABEL: {{^}}atomic_store_i64:
1196 ; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1197 ; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1198 ; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]\]}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1199 define amdgpu_kernel void @atomic_store_i64(i64 %in, i64 addrspace(1)* %out) {
1201 store atomic i64 %in, i64 addrspace(1)* %out seq_cst, align 8
1205 ; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset:
1206 ; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
1207 ; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
1208 ; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}] offset:32{{$}}
1209 define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(1)* %out, i64 %index) {
1211 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
1212 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
1213 store atomic i64 %in, i64 addrspace(1)* %gep seq_cst, align 8
1217 ; GCN-LABEL: {{^}}atomic_store_i64_addr64:
1218 ; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1219 ; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
1220 ; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}]{{$}}
1221 define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64 addrspace(1)* %out, i64 %index) {
1223 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
1224 store atomic i64 %in, i64 addrspace(1)* %ptr seq_cst, align 8
1228 ; GCN-LABEL: {{^}}atomic_store_f64_addr64_offset:
1229 ; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
1230 ; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
1231 ; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}] offset:32{{$}}
1232 define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, double addrspace(1)* %out, i64 %index) {
1234 %ptr = getelementptr double, double addrspace(1)* %out, i64 %index
1235 %gep = getelementptr double, double addrspace(1)* %ptr, i64 4
1236 store atomic double %in, double addrspace(1)* %gep seq_cst, align 8