1 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10WGP %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10CU %s
6 ; GCN-LABEL: {{^}}system_one_as_monotonic_monotonic:
7 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
8 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
9 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
10 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
11 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
12 ; GFX8-NOT: buffer_wbinvl1_vol
13 ; GFX10-NOT: buffer_gl{{[01]}}_inv
14 ; GFX10: .amdhsa_kernel system_one_as_monotonic_monotonic
15 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
16 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
17 ; GFX10-NOT: .amdhsa_memory_ordered 0
18 define amdgpu_kernel void @system_one_as_monotonic_monotonic(
19 i32* %out, i32 %in, i32 %old) {
21 %gep = getelementptr i32, i32* %out, i32 4
22 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic
26 ; GCN-LABEL: {{^}}system_one_as_acquire_monotonic:
27 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
28 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
29 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
30 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
31 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
32 ; GFX8-NEXT: buffer_wbinvl1_vol
33 ; GFX10-NEXT: buffer_gl0_inv
34 ; GFX10-NEXT: buffer_gl1_inv
35 ; GFX10: .amdhsa_kernel system_one_as_acquire_monotonic
36 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
37 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
38 ; GFX10-NOT: .amdhsa_memory_ordered 0
39 define amdgpu_kernel void @system_one_as_acquire_monotonic(
40 i32* %out, i32 %in, i32 %old) {
42 %gep = getelementptr i32, i32* %out, i32 4
43 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic
47 ; GCN-LABEL: {{^}}system_one_as_release_monotonic:
48 ; GCN: s_waitcnt vmcnt(0){{$}}
49 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
50 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
51 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
52 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
53 ; GFX8-NOT: buffer_wbinvl1_vol
54 ; GFX10-NOT: buffer_gl._inv
55 ; GFX10: .amdhsa_kernel system_one_as_release_monotonic
56 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
57 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
58 ; GFX10-NOT: .amdhsa_memory_ordered 0
59 define amdgpu_kernel void @system_one_as_release_monotonic(
60 i32* %out, i32 %in, i32 %old) {
62 %gep = getelementptr i32, i32* %out, i32 4
63 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") release monotonic
67 ; GCN-LABEL: {{^}}system_one_as_acq_rel_monotonic:
68 ; GCN: s_waitcnt vmcnt(0){{$}}
69 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
70 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
71 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
72 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
73 ; GFX8-NEXT: buffer_wbinvl1_vol
74 ; GFX10-NEXT: buffer_gl0_inv
75 ; GFX10-NEXT: buffer_gl1_inv
76 ; GFX10: .amdhsa_kernel system_one_as_acq_rel_monotonic
77 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
78 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
79 ; GFX10-NOT: .amdhsa_memory_ordered 0
80 define amdgpu_kernel void @system_one_as_acq_rel_monotonic(
81 i32* %out, i32 %in, i32 %old) {
83 %gep = getelementptr i32, i32* %out, i32 4
84 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic
88 ; GCN-LABEL: {{^}}system_one_as_seq_cst_monotonic:
89 ; GCN: s_waitcnt vmcnt(0){{$}}
90 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
91 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
92 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
93 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
94 ; GFX8-NEXT: buffer_wbinvl1_vol
95 ; GFX10-NEXT: buffer_gl0_inv
96 ; GFX10-NEXT: buffer_gl1_inv
97 ; GFX10: .amdhsa_kernel system_one_as_seq_cst_monotonic
98 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
99 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
100 ; GFX10-NOT: .amdhsa_memory_ordered 0
101 define amdgpu_kernel void @system_one_as_seq_cst_monotonic(
102 i32* %out, i32 %in, i32 %old) {
104 %gep = getelementptr i32, i32* %out, i32 4
105 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic
109 ; GCN-LABEL: {{^}}system_one_as_acquire_acquire:
110 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
111 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
112 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
113 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
114 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
115 ; GFX8-NEXT: buffer_wbinvl1_vol
116 ; GFX10-NEXT: buffer_gl0_inv
117 ; GFX10-NEXT: buffer_gl1_inv
118 ; GFX10: .amdhsa_kernel system_one_as_acquire_acquire
119 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
120 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
121 ; GFX10-NOT: .amdhsa_memory_ordered 0
122 define amdgpu_kernel void @system_one_as_acquire_acquire(
123 i32* %out, i32 %in, i32 %old) {
125 %gep = getelementptr i32, i32* %out, i32 4
126 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire
130 ; GCN-LABEL: {{^}}system_one_as_release_acquire:
131 ; GCN: s_waitcnt vmcnt(0){{$}}
132 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
133 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
134 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
135 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
136 ; GFX8-NEXT: buffer_wbinvl1_vol
137 ; GFX10-NEXT: buffer_gl0_inv
138 ; GFX10-NEXT: buffer_gl1_inv
139 ; GFX10: .amdhsa_kernel system_one_as_release_acquire
140 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
141 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
142 ; GFX10-NOT: .amdhsa_memory_ordered 0
143 define amdgpu_kernel void @system_one_as_release_acquire(
144 i32* %out, i32 %in, i32 %old) {
146 %gep = getelementptr i32, i32* %out, i32 4
147 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") release acquire
151 ; GCN-LABEL: {{^}}system_one_as_acq_rel_acquire:
152 ; GCN: s_waitcnt vmcnt(0){{$}}
153 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
154 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
155 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
156 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
157 ; GFX8-NEXT: buffer_wbinvl1_vol
158 ; GFX10-NEXT: buffer_gl0_inv
159 ; GFX10-NEXT: buffer_gl1_inv
160 ; GFX10: .amdhsa_kernel system_one_as_acq_rel_acquire
161 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
162 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
163 ; GFX10-NOT: .amdhsa_memory_ordered 0
164 define amdgpu_kernel void @system_one_as_acq_rel_acquire(
165 i32* %out, i32 %in, i32 %old) {
167 %gep = getelementptr i32, i32* %out, i32 4
168 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire
172 ; GCN-LABEL: {{^}}system_one_as_seq_cst_acquire:
173 ; GCN: s_waitcnt vmcnt(0){{$}}
174 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
175 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
176 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
177 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
178 ; GFX8-NEXT: buffer_wbinvl1_vol
179 ; GFX10-NEXT: buffer_gl0_inv
180 ; GFX10-NEXT: buffer_gl1_inv
181 ; GFX10: .amdhsa_kernel system_one_as_seq_cst_acquire
182 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
183 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
184 ; GFX10-NOT: .amdhsa_memory_ordered 0
185 define amdgpu_kernel void @system_one_as_seq_cst_acquire(
186 i32* %out, i32 %in, i32 %old) {
188 %gep = getelementptr i32, i32* %out, i32 4
189 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire
193 ; GCN-LABEL: {{^}}system_one_as_seq_cst_seq_cst:
194 ; GCN: s_waitcnt vmcnt(0){{$}}
195 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
196 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
197 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
198 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
199 ; GFX8-NEXT: buffer_wbinvl1_vol
200 ; GFX10-NEXT: buffer_gl0_inv
201 ; GFX10-NEXT: buffer_gl1_inv
202 ; GFX10: .amdhsa_kernel system_one_as_seq_cst_seq_cst
203 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
204 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
205 ; GFX10-NOT: .amdhsa_memory_ordered 0
206 define amdgpu_kernel void @system_one_as_seq_cst_seq_cst(
207 i32* %out, i32 %in, i32 %old) {
209 %gep = getelementptr i32, i32* %out, i32 4
210 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst
214 ; GCN-LABEL: {{^}}singlethread_one_as_monotonic_monotonic:
215 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
216 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
217 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
218 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
219 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
220 ; GFX8-NOT: buffer_wbinvl1_vol
221 ; GFX10-NOT: buffer_gl{{[01]}}_inv
222 ; GFX10: .amdhsa_kernel singlethread_one_as_monotonic_monotonic
223 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
224 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
225 ; GFX10-NOT: .amdhsa_memory_ordered 0
226 define amdgpu_kernel void @singlethread_one_as_monotonic_monotonic(
227 i32* %out, i32 %in, i32 %old) {
229 %gep = getelementptr i32, i32* %out, i32 4
230 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic
234 ; GCN-LABEL: {{^}}singlethread_one_as_acquire_monotonic:
235 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
236 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
237 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
238 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
239 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
240 ; GFX8-NOT: buffer_wbinvl1_vol
241 ; GFX10-NOT: buffer_gl{{[01]}}_inv
242 ; GFX10: .amdhsa_kernel singlethread_one_as_acquire_monotonic
243 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
244 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
245 ; GFX10-NOT: .amdhsa_memory_ordered 0
246 define amdgpu_kernel void @singlethread_one_as_acquire_monotonic(
247 i32* %out, i32 %in, i32 %old) {
249 %gep = getelementptr i32, i32* %out, i32 4
250 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic
254 ; GCN-LABEL: {{^}}singlethread_one_as_release_monotonic:
255 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
256 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
257 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
258 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
259 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
260 ; Gfx8-NOT: buffer_wbinvl1_vol
261 ; GCN-NOT: buffer_gl{{[01]}}_inv
262 ; GFX10: .amdhsa_kernel singlethread_one_as_release_monotonic
263 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
264 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
265 ; GFX10-NOT: .amdhsa_memory_ordered 0
266 define amdgpu_kernel void @singlethread_one_as_release_monotonic(
267 i32* %out, i32 %in, i32 %old) {
269 %gep = getelementptr i32, i32* %out, i32 4
270 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic
274 ; GCN-LABEL: {{^}}singlethread_one_as_acq_rel_monotonic:
275 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
276 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
277 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
278 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
279 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
280 ; GFX8-NOT: buffer_wbinvl1_vol
281 ; GFX10-NOT: buffer_gl{{[01]}}._inv
282 ; GFX10: .amdhsa_kernel singlethread_one_as_acq_rel_monotonic
283 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
284 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
285 ; GFX10-NOT: .amdhsa_memory_ordered 0
286 define amdgpu_kernel void @singlethread_one_as_acq_rel_monotonic(
287 i32* %out, i32 %in, i32 %old) {
289 %gep = getelementptr i32, i32* %out, i32 4
290 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic
294 ; GCN-LABEL: {{^}}singlethread_one_as_seq_cst_monotonic:
295 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
296 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
297 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
298 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
299 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
300 ; GFX8-NOT: buffer_wbinvl1_vol
301 ; GFX10-NOT: buffer_gl{{[01]}}._inv
302 ; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst_monotonic
303 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
304 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
305 ; GFX10-NOT: .amdhsa_memory_ordered 0
306 define amdgpu_kernel void @singlethread_one_as_seq_cst_monotonic(
307 i32* %out, i32 %in, i32 %old) {
309 %gep = getelementptr i32, i32* %out, i32 4
310 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic
314 ; GCN-LABEL: {{^}}singlethread_one_as_acquire_acquire:
315 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
316 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
317 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
318 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
319 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
320 ; GFX8-NOT: buffer_wbinvl1_vol
321 ; GFX10-NOT: buffer_gl{{[01]}}._inv
322 ; GFX10: .amdhsa_kernel singlethread_one_as_acquire_acquire
323 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
324 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
325 ; GFX10-NOT: .amdhsa_memory_ordered 0
326 define amdgpu_kernel void @singlethread_one_as_acquire_acquire(
327 i32* %out, i32 %in, i32 %old) {
329 %gep = getelementptr i32, i32* %out, i32 4
330 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire
334 ; GCN-LABEL: {{^}}singlethread_one_as_release_acquire:
335 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
336 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
337 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
338 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
339 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
340 ; GFX8-NOT: buffer_wbinvl1_vol
341 ; GFX10-NOT: buffer_gl{{[01]}}._inv
342 ; GFX10: .amdhsa_kernel singlethread_one_as_release_acquire
343 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
344 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
345 ; GFX10-NOT: .amdhsa_memory_ordered 0
346 define amdgpu_kernel void @singlethread_one_as_release_acquire(
347 i32* %out, i32 %in, i32 %old) {
349 %gep = getelementptr i32, i32* %out, i32 4
350 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire
354 ; GCN-LABEL: {{^}}singlethread_one_as_acq_rel_acquire:
355 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
356 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
357 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
358 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
359 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
360 ; GFX8-NOT: buffer_wbinvl1_vol
361 ; GFX10-NOT: buffer_gl{{[01]}}._inv
362 ; GFX10: .amdhsa_kernel singlethread_one_as_acq_rel_acquire
363 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
364 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
365 ; GFX10-NOT: .amdhsa_memory_ordered 0
366 define amdgpu_kernel void @singlethread_one_as_acq_rel_acquire(
367 i32* %out, i32 %in, i32 %old) {
369 %gep = getelementptr i32, i32* %out, i32 4
370 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire
374 ; GCN-LABEL: {{^}}singlethread_one_as_seq_cst_acquire:
375 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
376 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
377 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
378 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
379 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
380 ; GFX8-NOT: buffer_wbinvl1_vol
381 ; GFX10-NOT: buffer_gl{{[01]}}._inv
382 ; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst_acquire
383 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
384 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
385 ; GFX10-NOT: .amdhsa_memory_ordered 0
386 define amdgpu_kernel void @singlethread_one_as_seq_cst_acquire(
387 i32* %out, i32 %in, i32 %old) {
389 %gep = getelementptr i32, i32* %out, i32 4
390 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire
394 ; GCN-LABEL: {{^}}singlethread_one_as_seq_cst_seq_cst:
395 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
396 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
397 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
398 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
399 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
400 ; GFX8-NOT: buffer_wbinvl1_vol
401 ; GFX10-NOT: buffer_gl{{[01]}}._inv
402 ; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst_seq_cst
403 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
404 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
405 ; GFX10-NOT: .amdhsa_memory_ordered 0
406 define amdgpu_kernel void @singlethread_one_as_seq_cst_seq_cst(
407 i32* %out, i32 %in, i32 %old) {
409 %gep = getelementptr i32, i32* %out, i32 4
410 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst
414 ; GCN-LABEL: {{^}}agent_one_as_monotonic_monotonic:
415 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
416 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
417 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
418 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
419 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
420 ; GFX8-NOT: buffer_wbinvl1_vol
421 ; GFX10-NOT: buffer_gl{{[01]}}._inv
422 ; GFX10: .amdhsa_kernel agent_one_as_monotonic_monotonic
423 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
424 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
425 ; GFX10-NOT: .amdhsa_memory_ordered 0
426 define amdgpu_kernel void @agent_one_as_monotonic_monotonic(
427 i32* %out, i32 %in, i32 %old) {
429 %gep = getelementptr i32, i32* %out, i32 4
430 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic
434 ; GCN-LABEL: {{^}}agent_one_as_acquire_monotonic:
435 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
436 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
437 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
438 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
439 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
440 ; GFX8-NEXT: buffer_wbinvl1_vol
441 ; GFX10-NEXT: buffer_gl0_inv
442 ; GFX10-NEXT: buffer_gl1_inv
443 ; GFX10: .amdhsa_kernel agent_one_as_acquire_monotonic
444 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
445 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
446 ; GFX10-NOT: .amdhsa_memory_ordered 0
447 define amdgpu_kernel void @agent_one_as_acquire_monotonic(
448 i32* %out, i32 %in, i32 %old) {
450 %gep = getelementptr i32, i32* %out, i32 4
451 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic
455 ; GCN-LABEL: {{^}}agent_one_as_release_monotonic:
456 ; GCN: s_waitcnt vmcnt(0){{$}}
457 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
458 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
459 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
460 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
461 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
462 ; GFX10: .amdhsa_kernel agent_one_as_release_monotonic
463 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
464 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
465 ; GFX10-NOT: .amdhsa_memory_ordered 0
466 define amdgpu_kernel void @agent_one_as_release_monotonic(
467 i32* %out, i32 %in, i32 %old) {
469 %gep = getelementptr i32, i32* %out, i32 4
470 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic
474 ; GCN-LABEL: {{^}}agent_one_as_acq_rel_monotonic:
475 ; GCN: s_waitcnt vmcnt(0){{$}}
476 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
477 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
478 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
479 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
480 ; GFX8-NEXT: buffer_wbinvl1_vol
481 ; GFX10-NEXT: buffer_gl0_inv
482 ; GFX10-NEXT: buffer_gl1_inv
483 ; GFX10: .amdhsa_kernel agent_one_as_acq_rel_monotonic
484 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
485 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
486 ; GFX10-NOT: .amdhsa_memory_ordered 0
487 define amdgpu_kernel void @agent_one_as_acq_rel_monotonic(
488 i32* %out, i32 %in, i32 %old) {
490 %gep = getelementptr i32, i32* %out, i32 4
491 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic
495 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_monotonic:
496 ; GCN: s_waitcnt vmcnt(0){{$}}
497 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
498 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
499 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
500 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
501 ; GFX8-NEXT: buffer_wbinvl1_vol
502 ; GFX10-NEXT: buffer_gl0_inv
503 ; GFX10-NEXT: buffer_gl1_inv
504 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst_monotonic
505 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
506 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
507 ; GFX10-NOT: .amdhsa_memory_ordered 0
508 define amdgpu_kernel void @agent_one_as_seq_cst_monotonic(
509 i32* %out, i32 %in, i32 %old) {
511 %gep = getelementptr i32, i32* %out, i32 4
512 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic
516 ; GCN-LABEL: {{^}}agent_one_as_acquire_acquire:
517 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
518 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
519 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
520 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
521 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
522 ; GFX8-NEXT: buffer_wbinvl1_vol
523 ; GFX10-NEXT: buffer_gl0_inv
524 ; GFX10-NEXT: buffer_gl1_inv
525 ; GFX10: .amdhsa_kernel agent_one_as_acquire_acquire
526 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
527 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
528 ; GFX10-NOT: .amdhsa_memory_ordered 0
529 define amdgpu_kernel void @agent_one_as_acquire_acquire(
530 i32* %out, i32 %in, i32 %old) {
532 %gep = getelementptr i32, i32* %out, i32 4
533 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire
537 ; GCN-LABEL: {{^}}agent_one_as_release_acquire:
538 ; GCN: s_waitcnt vmcnt(0){{$}}
539 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
540 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
541 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
542 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
543 ; GFX8-NEXT: buffer_wbinvl1_vol
544 ; GFX10-NEXT: buffer_gl0_inv
545 ; GFX10-NEXT: buffer_gl1_inv
546 ; GFX10: .amdhsa_kernel agent_one_as_release_acquire
547 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
548 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
549 ; GFX10-NOT: .amdhsa_memory_ordered 0
550 define amdgpu_kernel void @agent_one_as_release_acquire(
551 i32* %out, i32 %in, i32 %old) {
553 %gep = getelementptr i32, i32* %out, i32 4
554 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire
558 ; GCN-LABEL: {{^}}agent_one_as_acq_rel_acquire:
559 ; GCN: s_waitcnt vmcnt(0){{$}}
560 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
561 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
562 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
563 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
564 ; GFX8-NEXT: buffer_wbinvl1_vol
565 ; GFX10-NEXT: buffer_gl0_inv
566 ; GFX10-NEXT: buffer_gl1_inv
567 ; GFX10: .amdhsa_kernel agent_one_as_acq_rel_acquire
568 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
569 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
570 ; GFX10-NOT: .amdhsa_memory_ordered 0
571 define amdgpu_kernel void @agent_one_as_acq_rel_acquire(
572 i32* %out, i32 %in, i32 %old) {
574 %gep = getelementptr i32, i32* %out, i32 4
575 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire
579 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_acquire:
580 ; GCN: s_waitcnt vmcnt(0){{$}}
581 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
582 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
583 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
584 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
585 ; GFX8-NEXT: buffer_wbinvl1_vol
586 ; GFX10-NEXT: buffer_gl0_inv
587 ; GFX10-NEXT: buffer_gl1_inv
588 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst_acquire
589 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
590 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
591 ; GFX10-NOT: .amdhsa_memory_ordered 0
592 define amdgpu_kernel void @agent_one_as_seq_cst_acquire(
593 i32* %out, i32 %in, i32 %old) {
595 %gep = getelementptr i32, i32* %out, i32 4
596 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire
600 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_seq_cst:
601 ; GCN: s_waitcnt vmcnt(0){{$}}
602 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
603 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
604 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
605 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
606 ; GFX8-NEXT: buffer_wbinvl1_vol
607 ; GFX10-NEXT: buffer_gl0_inv
608 ; GFX10-NEXT: buffer_gl1_inv
609 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst_seq_cst
610 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
611 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
612 ; GFX10-NOT: .amdhsa_memory_ordered 0
613 define amdgpu_kernel void @agent_one_as_seq_cst_seq_cst(
614 i32* %out, i32 %in, i32 %old) {
616 %gep = getelementptr i32, i32* %out, i32 4
617 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
621 ; GCN-LABEL: {{^}}workgroup_one_as_monotonic_monotonic:
622 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
623 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
624 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
625 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
626 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
627 ; GFX8-NOT: buffer_wbinvl1_vol
628 ; GFX10-NOT: buffer_gl{{[01]}}._inv
629 ; GFX10: .amdhsa_kernel workgroup_one_as_monotonic_monotonic
630 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
631 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
632 ; GFX10-NOT: .amdhsa_memory_ordered 0
633 define amdgpu_kernel void @workgroup_one_as_monotonic_monotonic(
634 i32* %out, i32 %in, i32 %old) {
636 %gep = getelementptr i32, i32* %out, i32 4
637 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic
641 ; GCN-LABEL: {{^}}workgroup_one_as_acquire_monotonic:
642 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
643 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
644 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
645 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
646 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
647 ; GFX10WGP-NEXT: buffer_gl0_inv
648 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
649 ; GFX10CU-NOT: buffer_gl0_inv
650 ; GFX8-NOT: buffer_wbinvl1_vol
651 ; GFX10: .amdhsa_kernel workgroup_one_as_acquire_monotonic
652 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
653 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
654 ; GFX10-NOT: .amdhsa_memory_ordered 0
655 define amdgpu_kernel void @workgroup_one_as_acquire_monotonic(
656 i32* %out, i32 %in, i32 %old) {
658 %gep = getelementptr i32, i32* %out, i32 4
659 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic
663 ; GCN-LABEL: {{^}}workgroup_one_as_release_monotonic:
664 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
665 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
666 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
667 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
668 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
669 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
670 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
671 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
672 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
673 ; GFX10: .amdhsa_kernel workgroup_one_as_release_monotonic
674 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
675 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
676 ; GFX10-NOT: .amdhsa_memory_ordered 0
677 define amdgpu_kernel void @workgroup_one_as_release_monotonic(
678 i32* %out, i32 %in, i32 %old) {
680 %gep = getelementptr i32, i32* %out, i32 4
681 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic
685 ; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_monotonic:
686 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
687 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
688 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
689 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
690 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
691 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
692 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
693 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
694 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
695 ; GFX8-NOT: buffer_wbinvl1_vol
696 ; GFX10WGP-NEXT: buffer_gl0_inv
697 ; GFX10CU-NOT: buffer_gl0_inv
698 ; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_monotonic
699 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
700 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
701 ; GFX10-NOT: .amdhsa_memory_ordered 0
702 define amdgpu_kernel void @workgroup_one_as_acq_rel_monotonic(
703 i32* %out, i32 %in, i32 %old) {
705 %gep = getelementptr i32, i32* %out, i32 4
706 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic
710 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_monotonic:
711 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
712 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
713 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
714 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
715 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
716 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
717 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
718 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
719 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
720 ; GFX8-NOT: buffer_wbinvl1_vol
721 ; GFX10WGP-NEXT: buffer_gl0_inv
722 ; GFX10CU-NOT: buffer_gl0_inv
723 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_monotonic
724 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
725 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
726 ; GFX10-NOT: .amdhsa_memory_ordered 0
727 define amdgpu_kernel void @workgroup_one_as_seq_cst_monotonic(
728 i32* %out, i32 %in, i32 %old) {
730 %gep = getelementptr i32, i32* %out, i32 4
731 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic
735 ; GCN-LABEL: {{^}}workgroup_one_as_acquire_acquire:
736 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
737 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
738 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
739 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
740 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
741 ; GFX10WGP-NEXT: buffer_gl0_inv
742 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
743 ; GFX10CU-NOT: buffer_gl0_inv
744 ; GFX8-NOT: buffer_wbinvl1_vol
745 ; GFX10: .amdhsa_kernel workgroup_one_as_acquire_acquire
746 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
747 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
748 ; GFX10-NOT: .amdhsa_memory_ordered 0
749 define amdgpu_kernel void @workgroup_one_as_acquire_acquire(
750 i32* %out, i32 %in, i32 %old) {
752 %gep = getelementptr i32, i32* %out, i32 4
753 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire
757 ; GCN-LABEL: {{^}}workgroup_one_as_release_acquire:
758 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
759 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
760 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
761 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
762 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
763 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
764 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
765 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
766 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
767 ; GFX8-NOT: buffer_wbinvl1_vol
768 ; GFX10WGP-NEXT: buffer_gl0_inv
769 ; GFX10CU-NOT: buffer_gl0_inv
770 ; GFX10: .amdhsa_kernel workgroup_one_as_release_acquire
771 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
772 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
773 ; GFX10-NOT: .amdhsa_memory_ordered 0
774 define amdgpu_kernel void @workgroup_one_as_release_acquire(
775 i32* %out, i32 %in, i32 %old) {
777 %gep = getelementptr i32, i32* %out, i32 4
778 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire
782 ; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_acquire:
783 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
784 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
785 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
786 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
787 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
788 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
789 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
790 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
791 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
792 ; GFX8-NOT: buffer_wbinvl1_vol
793 ; GFX10WGP: buffer_gl0_inv
794 ; GFX10CU-NOT: buffer_gl0_inv
795 ; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_acquire
796 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
797 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
798 ; GFX10-NOT: .amdhsa_memory_ordered 0
799 define amdgpu_kernel void @workgroup_one_as_acq_rel_acquire(
800 i32* %out, i32 %in, i32 %old) {
802 %gep = getelementptr i32, i32* %out, i32 4
803 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire
807 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_acquire:
808 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
809 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
810 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
811 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
812 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
813 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
814 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
815 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
816 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
817 ; GFX8-NOT: buffer_wbinvl1_vol
818 ; GFX10WGP-NEXT: buffer_gl0_inv
819 ; GFX10CU-NOT: buffer_gl0_inv
820 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_acquire
821 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
822 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
823 ; GFX10-NOT: .amdhsa_memory_ordered 0
824 define amdgpu_kernel void @workgroup_one_as_seq_cst_acquire(
825 i32* %out, i32 %in, i32 %old) {
827 %gep = getelementptr i32, i32* %out, i32 4
828 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire
832 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_seq_cst:
833 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
834 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
835 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
836 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
837 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
838 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
839 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
840 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
841 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
842 ; GFX8-NOT: buffer_wbinvl1_vol
843 ; GFX10WGP: buffer_gl0_inv
844 ; GFX10CU-NOT: buffer_gl0_inv
845 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_seq_cst
846 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
847 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
848 ; GFX10-NOT: .amdhsa_memory_ordered 0
849 define amdgpu_kernel void @workgroup_one_as_seq_cst_seq_cst(
850 i32* %out, i32 %in, i32 %old) {
852 %gep = getelementptr i32, i32* %out, i32 4
853 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst
857 ; GCN-LABEL: {{^}}wavefront_one_as_monotonic_monotonic:
858 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
859 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
860 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
861 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
862 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
863 ; GFX8-NOT: buffer_wbinvl1_vol
864 ; GFX10-NOT: buffer_gl{{[01]}}._inv
865 ; GFX10: .amdhsa_kernel wavefront_one_as_monotonic_monotonic
866 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
867 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
868 ; GFX10-NOT: .amdhsa_memory_ordered 0
869 define amdgpu_kernel void @wavefront_one_as_monotonic_monotonic(
870 i32* %out, i32 %in, i32 %old) {
872 %gep = getelementptr i32, i32* %out, i32 4
873 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic
877 ; GCN-LABEL: {{^}}wavefront_one_as_acquire_monotonic:
878 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
879 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
880 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
881 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
882 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
883 ; GFX8-NOT: buffer_wbinvl1_vol
884 ; GFX10-NOT: buffer_gl{{[01]}}._inv
885 ; GFX10: .amdhsa_kernel wavefront_one_as_acquire_monotonic
886 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
887 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
888 ; GFX10-NOT: .amdhsa_memory_ordered 0
889 define amdgpu_kernel void @wavefront_one_as_acquire_monotonic(
890 i32* %out, i32 %in, i32 %old) {
892 %gep = getelementptr i32, i32* %out, i32 4
893 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic
897 ; GCN-LABEL: {{^}}wavefront_one_as_release_monotonic:
898 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
899 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
900 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
901 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
902 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
903 ; GFX8-NOT: buffer_wbinvl1_vol
904 ; GFX10-NOT: buffer_gl{{[01]}}._inv
905 ; GFX10: .amdhsa_kernel wavefront_one_as_release_monotonic
906 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
907 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
908 ; GFX10-NOT: .amdhsa_memory_ordered 0
909 define amdgpu_kernel void @wavefront_one_as_release_monotonic(
910 i32* %out, i32 %in, i32 %old) {
912 %gep = getelementptr i32, i32* %out, i32 4
913 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic
917 ; GCN-LABEL: {{^}}wavefront_one_as_acq_rel_monotonic:
918 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
919 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
920 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
921 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
922 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
923 ; GFX8-NOT: buffer_wbinvl1_vol
924 ; GFX10-NOT: buffer_gl{{[01]}}._inv
925 ; GFX10: .amdhsa_kernel wavefront_one_as_acq_rel_monotonic
926 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
927 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
928 ; GFX10-NOT: .amdhsa_memory_ordered 0
929 define amdgpu_kernel void @wavefront_one_as_acq_rel_monotonic(
930 i32* %out, i32 %in, i32 %old) {
932 %gep = getelementptr i32, i32* %out, i32 4
933 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic
937 ; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_monotonic:
938 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
939 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
940 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
941 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
942 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
943 ; GFX8-NOT: buffer_wbinvl1_vol
944 ; GFX10-NOT: buffer_gl{{[01]}}._inv
945 ; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst_monotonic
946 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
947 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
948 ; GFX10-NOT: .amdhsa_memory_ordered 0
949 define amdgpu_kernel void @wavefront_one_as_seq_cst_monotonic(
950 i32* %out, i32 %in, i32 %old) {
952 %gep = getelementptr i32, i32* %out, i32 4
953 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic
957 ; GCN-LABEL: {{^}}wavefront_one_as_acquire_acquire:
958 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
959 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
960 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
961 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
962 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
963 ; GFX8-NOT: buffer_wbinvl1_vol
964 ; GFX10-NOT: buffer_gl{{[01]}}._inv
965 ; GFX10: .amdhsa_kernel wavefront_one_as_acquire_acquire
966 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
967 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
968 ; GFX10-NOT: .amdhsa_memory_ordered 0
969 define amdgpu_kernel void @wavefront_one_as_acquire_acquire(
970 i32* %out, i32 %in, i32 %old) {
972 %gep = getelementptr i32, i32* %out, i32 4
973 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire
977 ; GCN-LABEL: {{^}}wavefront_one_as_release_acquire:
978 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
979 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
980 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
981 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
982 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
983 ; GFX8-NOT: buffer_wbinvl1_vol
984 ; GFX10-NOT: buffer_gl{{[01]}}._inv
985 ; GFX10: .amdhsa_kernel wavefront_one_as_release_acquire
986 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
987 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
988 ; GFX10-NOT: .amdhsa_memory_ordered 0
989 define amdgpu_kernel void @wavefront_one_as_release_acquire(
990 i32* %out, i32 %in, i32 %old) {
992 %gep = getelementptr i32, i32* %out, i32 4
993 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire
997 ; GCN-LABEL: {{^}}wavefront_one_as_acq_rel_acquire:
998 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
999 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1000 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1001 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1002 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1003 ; GFX8-NOT: buffer_wbinvl1_vol
1004 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1005 ; GFX10: .amdhsa_kernel wavefront_one_as_acq_rel_acquire
1006 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1007 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1008 ; GFX10-NOT: .amdhsa_memory_ordered 0
1009 define amdgpu_kernel void @wavefront_one_as_acq_rel_acquire(
1010 i32* %out, i32 %in, i32 %old) {
1012 %gep = getelementptr i32, i32* %out, i32 4
1013 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire
1017 ; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_acquire:
1018 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1019 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1020 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1021 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1022 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1023 ; GFX8-NOT: buffer_wbinvl1_vol
1024 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1025 ; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst_acquire
1026 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1027 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1028 ; GFX10-NOT: .amdhsa_memory_ordered 0
1029 define amdgpu_kernel void @wavefront_one_as_seq_cst_acquire(
1030 i32* %out, i32 %in, i32 %old) {
1032 %gep = getelementptr i32, i32* %out, i32 4
1033 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire
1037 ; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_seq_cst:
1038 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1039 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1040 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1041 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1042 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1043 ; GFX8-NOT: buffer_wbinvl1_vol
1044 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1045 ; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst_seq_cst
1046 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1047 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1048 ; GFX10-NOT: .amdhsa_memory_ordered 0
1049 define amdgpu_kernel void @wavefront_one_as_seq_cst_seq_cst(
1050 i32* %out, i32 %in, i32 %old) {
1052 %gep = getelementptr i32, i32* %out, i32 4
1053 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst
1057 ; GCN-LABEL: {{^}}system_one_as_acquire_monotonic_ret:
1058 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1059 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1060 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1061 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1062 ; GFX8-NEXT: buffer_wbinvl1_vol
1063 ; GFX10-NEXT: buffer_gl0_inv
1064 ; GFX10-NEXT: buffer_gl1_inv
1065 ; GFX10: .amdhsa_kernel system_one_as_acquire_monotonic_ret
1066 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1067 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1068 ; GFX10-NOT: .amdhsa_memory_ordered 0
1069 define amdgpu_kernel void @system_one_as_acquire_monotonic_ret(
1070 i32* %out, i32 %in, i32 %old) {
1072 %gep = getelementptr i32, i32* %out, i32 4
1073 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic
1074 %val0 = extractvalue { i32, i1 } %val, 0
1075 store i32 %val0, i32* %out, align 4
1079 ; GCN-LABEL: {{^}}system_one_as_acq_rel_monotonic_ret:
1080 ; GCN: s_waitcnt vmcnt(0){{$}}
1081 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1082 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1083 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1084 ; GFX8-NEXT: buffer_wbinvl1_vol
1085 ; GFX10-NEXT: buffer_gl0_inv
1086 ; GFX10-NEXT: buffer_gl1_inv
1087 ; GFX10: .amdhsa_kernel system_one_as_acq_rel_monotonic_ret
1088 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1089 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1090 ; GFX10-NOT: .amdhsa_memory_ordered 0
1091 define amdgpu_kernel void @system_one_as_acq_rel_monotonic_ret(
1092 i32* %out, i32 %in, i32 %old) {
1094 %gep = getelementptr i32, i32* %out, i32 4
1095 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic
1096 %val0 = extractvalue { i32, i1 } %val, 0
1097 store i32 %val0, i32* %out, align 4
1101 ; GCN-LABEL: {{^}}system_one_as_seq_cst_monotonic_ret:
1102 ; GCN: s_waitcnt vmcnt(0){{$}}
1103 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1104 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1105 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1106 ; GFX8-NEXT: buffer_wbinvl1_vol
1107 ; GFX10-NEXT: buffer_gl0_inv
1108 ; GFX10-NEXT: buffer_gl1_inv
1109 ; GFX10: .amdhsa_kernel system_one_as_seq_cst_monotonic_ret
1110 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1111 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1112 ; GFX10-NOT: .amdhsa_memory_ordered 0
1113 define amdgpu_kernel void @system_one_as_seq_cst_monotonic_ret(
1114 i32* %out, i32 %in, i32 %old) {
1116 %gep = getelementptr i32, i32* %out, i32 4
1117 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic
1118 %val0 = extractvalue { i32, i1 } %val, 0
1119 store i32 %val0, i32* %out, align 4
1123 ; GCN-LABEL: {{^}}system_one_as_acquire_acquire_ret:
1124 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1125 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1126 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1127 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1128 ; GFX8-NEXT: buffer_wbinvl1_vol
1129 ; GFX10-NEXT: buffer_gl0_inv
1130 ; GFX10-NEXT: buffer_gl1_inv
1131 ; GFX10: .amdhsa_kernel system_one_as_acquire_acquire_ret
1132 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1133 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1134 ; GFX10-NOT: .amdhsa_memory_ordered 0
1135 define amdgpu_kernel void @system_one_as_acquire_acquire_ret(
1136 i32* %out, i32 %in, i32 %old) {
1138 %gep = getelementptr i32, i32* %out, i32 4
1139 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire
1140 %val0 = extractvalue { i32, i1 } %val, 0
1141 store i32 %val0, i32* %out, align 4
1145 ; GCN-LABEL: {{^}}system_one_as_release_acquire_ret:
1146 ; GCN: s_waitcnt vmcnt(0){{$}}
1147 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1148 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1149 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1150 ; GFX8-NEXT: buffer_wbinvl1_vol
1151 ; GFX10-NEXT: buffer_gl0_inv
1152 ; GFX10-NEXT: buffer_gl1_inv
1153 ; GFX10: .amdhsa_kernel system_one_as_release_acquire_ret
1154 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1155 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1156 ; GFX10-NOT: .amdhsa_memory_ordered 0
1157 define amdgpu_kernel void @system_one_as_release_acquire_ret(
1158 i32* %out, i32 %in, i32 %old) {
1160 %gep = getelementptr i32, i32* %out, i32 4
1161 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") release acquire
1162 %val0 = extractvalue { i32, i1 } %val, 0
1163 store i32 %val0, i32* %out, align 4
1167 ; GCN-LABEL: {{^}}system_one_as_acq_rel_acquire_ret:
1168 ; GCN: s_waitcnt vmcnt(0){{$}}
1169 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1170 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1171 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1172 ; GFX8-NEXT: buffer_wbinvl1_vol
1173 ; GFX10-NEXT: buffer_gl0_inv
1174 ; GFX10-NEXT: buffer_gl1_inv
1175 ; GFX10: .amdhsa_kernel system_one_as_acq_rel_acquire_ret
1176 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1177 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1178 ; GFX10-NOT: .amdhsa_memory_ordered 0
1179 define amdgpu_kernel void @system_one_as_acq_rel_acquire_ret(
1180 i32* %out, i32 %in, i32 %old) {
1182 %gep = getelementptr i32, i32* %out, i32 4
1183 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire
1184 %val0 = extractvalue { i32, i1 } %val, 0
1185 store i32 %val0, i32* %out, align 4
1189 ; GCN-LABEL: {{^}}system_one_as_seq_cst_acquire_ret:
1190 ; GCN: s_waitcnt vmcnt(0){{$}}
1191 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1192 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1193 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1194 ; GFX8-NEXT: buffer_wbinvl1_vol
1195 ; GFX10-NEXT: buffer_gl0_inv
1196 ; GFX10-NEXT: buffer_gl1_inv
1197 ; GFX10: .amdhsa_kernel system_one_as_seq_cst_acquire_ret
1198 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1199 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1200 ; GFX10-NOT: .amdhsa_memory_ordered 0
1201 define amdgpu_kernel void @system_one_as_seq_cst_acquire_ret(
1202 i32* %out, i32 %in, i32 %old) {
1204 %gep = getelementptr i32, i32* %out, i32 4
1205 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire
1206 %val0 = extractvalue { i32, i1 } %val, 0
1207 store i32 %val0, i32* %out, align 4
1211 ; GCN-LABEL: {{^}}system_one_as_seq_cst_seq_cst_ret:
1212 ; GCN: s_waitcnt vmcnt(0){{$}}
1213 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1214 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1215 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1216 ; GFX8-NEXT: buffer_wbinvl1_vol
1217 ; GFX10-NEXT: buffer_gl0_inv
1218 ; GFX10-NEXT: buffer_gl1_inv
1219 ; GFX10: .amdhsa_kernel system_one_as_seq_cst_seq_cst_ret
1220 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1221 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1222 ; GFX10-NOT: .amdhsa_memory_ordered 0
1223 define amdgpu_kernel void @system_one_as_seq_cst_seq_cst_ret(
1224 i32* %out, i32 %in, i32 %old) {
1226 %gep = getelementptr i32, i32* %out, i32 4
1227 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst
1228 %val0 = extractvalue { i32, i1 } %val, 0
1229 store i32 %val0, i32* %out, align 4
1233 ; GCN-LABEL: {{^}}agent_one_as_acquire_monotonic_ret:
1234 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1235 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1236 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1237 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1238 ; GFX8-NEXT: buffer_wbinvl1_vol
1239 ; GFX10-NEXT: buffer_gl0_inv
1240 ; GFX10-NEXT: buffer_gl1_inv
1241 ; GFX10: .amdhsa_kernel agent_one_as_acquire_monotonic_ret
1242 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1243 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1244 ; GFX10-NOT: .amdhsa_memory_ordered 0
1245 define amdgpu_kernel void @agent_one_as_acquire_monotonic_ret(
1246 i32* %out, i32 %in, i32 %old) {
1248 %gep = getelementptr i32, i32* %out, i32 4
1249 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic
1250 %val0 = extractvalue { i32, i1 } %val, 0
1251 store i32 %val0, i32* %out, align 4
1255 ; GCN-LABEL: {{^}}agent_one_as_acq_rel_monotonic_ret:
1256 ; GCN: s_waitcnt vmcnt(0){{$}}
1257 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1258 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1259 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1260 ; GFX8-NEXT: buffer_wbinvl1_vol
1261 ; GFX10-NEXT: buffer_gl0_inv
1262 ; GFX10-NEXT: buffer_gl1_inv
1263 ; GFX10: .amdhsa_kernel agent_one_as_acq_rel_monotonic_ret
1264 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1265 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1266 ; GFX10-NOT: .amdhsa_memory_ordered 0
1267 define amdgpu_kernel void @agent_one_as_acq_rel_monotonic_ret(
1268 i32* %out, i32 %in, i32 %old) {
1270 %gep = getelementptr i32, i32* %out, i32 4
1271 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic
1272 %val0 = extractvalue { i32, i1 } %val, 0
1273 store i32 %val0, i32* %out, align 4
1277 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_monotonic_ret:
1278 ; GCN: s_waitcnt vmcnt(0){{$}}
1279 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1280 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1281 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1282 ; GFX8-NEXT: buffer_wbinvl1_vol
1283 ; GFX10-NEXT: buffer_gl0_inv
1284 ; GFX10-NEXT: buffer_gl1_inv
1285 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst_monotonic_ret
1286 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1287 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1288 ; GFX10-NOT: .amdhsa_memory_ordered 0
1289 define amdgpu_kernel void @agent_one_as_seq_cst_monotonic_ret(
1290 i32* %out, i32 %in, i32 %old) {
1292 %gep = getelementptr i32, i32* %out, i32 4
1293 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic
1294 %val0 = extractvalue { i32, i1 } %val, 0
1295 store i32 %val0, i32* %out, align 4
1299 ; GCN-LABEL: {{^}}agent_one_as_acquire_acquire_ret:
1300 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1301 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1302 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1303 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1304 ; GFX8-NEXT: buffer_wbinvl1_vol
1305 ; GFX10-NEXT: buffer_gl0_inv
1306 ; GFX10-NEXT: buffer_gl1_inv
1307 ; GFX10: .amdhsa_kernel agent_one_as_acquire_acquire_ret
1308 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1309 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1310 ; GFX10-NOT: .amdhsa_memory_ordered 0
1311 define amdgpu_kernel void @agent_one_as_acquire_acquire_ret(
1312 i32* %out, i32 %in, i32 %old) {
1314 %gep = getelementptr i32, i32* %out, i32 4
1315 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire
1316 %val0 = extractvalue { i32, i1 } %val, 0
1317 store i32 %val0, i32* %out, align 4
1321 ; GCN-LABEL: {{^}}agent_one_as_release_acquire_ret:
1322 ; GCN: s_waitcnt vmcnt(0){{$}}
1323 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1324 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1325 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1326 ; GFX8-NEXT: buffer_wbinvl1_vol
1327 ; GFX10-NEXT: buffer_gl0_inv
1328 ; GFX10-NEXT: buffer_gl1_inv
1329 ; GFX10: .amdhsa_kernel agent_one_as_release_acquire_ret
1330 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1331 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1332 ; GFX10-NOT: .amdhsa_memory_ordered 0
1333 define amdgpu_kernel void @agent_one_as_release_acquire_ret(
1334 i32* %out, i32 %in, i32 %old) {
1336 %gep = getelementptr i32, i32* %out, i32 4
1337 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire
1338 %val0 = extractvalue { i32, i1 } %val, 0
1339 store i32 %val0, i32* %out, align 4
1343 ; GCN-LABEL: {{^}}agent_one_as_acq_rel_acquire_ret:
1344 ; GCN: s_waitcnt vmcnt(0){{$}}
1345 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1346 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1347 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1348 ; GFX8-NEXT: buffer_wbinvl1_vol
1349 ; GFX10-NEXT: buffer_gl0_inv
1350 ; GFX10-NEXT: buffer_gl1_inv
1351 ; GFX10: .amdhsa_kernel agent_one_as_acq_rel_acquire_ret
1352 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1353 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1354 ; GFX10-NOT: .amdhsa_memory_ordered 0
1355 define amdgpu_kernel void @agent_one_as_acq_rel_acquire_ret(
1356 i32* %out, i32 %in, i32 %old) {
1358 %gep = getelementptr i32, i32* %out, i32 4
1359 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire
1360 %val0 = extractvalue { i32, i1 } %val, 0
1361 store i32 %val0, i32* %out, align 4
1365 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_acquire_ret:
1366 ; GCN: s_waitcnt vmcnt(0){{$}}
1367 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1368 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1369 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1370 ; GFX8-NEXT: buffer_wbinvl1_vol
1371 ; GFX10-NEXT: buffer_gl0_inv
1372 ; GFX10-NEXT: buffer_gl1_inv
1373 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst_acquire_ret
1374 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1375 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1376 ; GFX10-NOT: .amdhsa_memory_ordered 0
1377 define amdgpu_kernel void @agent_one_as_seq_cst_acquire_ret(
1378 i32* %out, i32 %in, i32 %old) {
1380 %gep = getelementptr i32, i32* %out, i32 4
1381 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire
1382 %val0 = extractvalue { i32, i1 } %val, 0
1383 store i32 %val0, i32* %out, align 4
1387 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_seq_cst_ret:
1388 ; GCN: s_waitcnt vmcnt(0){{$}}
1389 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1390 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1391 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1392 ; GFX8-NEXT: buffer_wbinvl1_vol
1393 ; GFX10-NEXT: buffer_gl0_inv
1394 ; GFX10-NEXT: buffer_gl1_inv
1395 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst_seq_cst_ret
1396 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1397 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1398 ; GFX10-NOT: .amdhsa_memory_ordered 0
1399 define amdgpu_kernel void @agent_one_as_seq_cst_seq_cst_ret(
1400 i32* %out, i32 %in, i32 %old) {
1402 %gep = getelementptr i32, i32* %out, i32 4
1403 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
1404 %val0 = extractvalue { i32, i1 } %val, 0
1405 store i32 %val0, i32* %out, align 4
1409 ; GCN-LABEL: {{^}}workgroup_one_as_acquire_monotonic_ret:
1410 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1411 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1412 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1413 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1414 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1415 ; GFX10WGP-NEXT: buffer_gl0_inv
1416 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1417 ; GFX10CU-NOT: buffer_gl0_inv
1418 ; GFX8-NOT: buffer_wbinvl1_vol
1419 ; GFX10: .amdhsa_kernel workgroup_one_as_acquire_monotonic_ret
1420 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1421 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1422 ; GFX10-NOT: .amdhsa_memory_ordered 0
1423 define amdgpu_kernel void @workgroup_one_as_acquire_monotonic_ret(
1424 i32* %out, i32 %in, i32 %old) {
1426 %gep = getelementptr i32, i32* %out, i32 4
1427 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic
1428 %val0 = extractvalue { i32, i1 } %val, 0
1429 store i32 %val0, i32* %out, align 4
1433 ; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_monotonic_ret:
1434 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1435 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
1436 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1437 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1438 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1439 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1440 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1441 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1442 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1443 ; GFX8-NOT: buffer_wbinvl1_vol
1444 ; GFX10WGP-NEXT: buffer_gl0_inv
1445 ; GFX10CU-NOT: buffer_gl0_inv
1446 ; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_monotonic_ret
1447 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1448 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1449 ; GFX10-NOT: .amdhsa_memory_ordered 0
1450 define amdgpu_kernel void @workgroup_one_as_acq_rel_monotonic_ret(
1451 i32* %out, i32 %in, i32 %old) {
1453 %gep = getelementptr i32, i32* %out, i32 4
1454 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic
1455 %val0 = extractvalue { i32, i1 } %val, 0
1456 store i32 %val0, i32* %out, align 4
1460 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_monotonic_ret:
1461 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1462 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
1463 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1464 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1465 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1466 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1467 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1468 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1469 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1470 ; GFX8-NOT: buffer_wbinvl1_vol
1471 ; GFX10WGP-NEXT: buffer_gl0_inv
1472 ; GFX10CU-NOT: buffer_gl0_inv
1473 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_monotonic_ret
1474 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1475 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1476 ; GFX10-NOT: .amdhsa_memory_ordered 0
1477 define amdgpu_kernel void @workgroup_one_as_seq_cst_monotonic_ret(
1478 i32* %out, i32 %in, i32 %old) {
1480 %gep = getelementptr i32, i32* %out, i32 4
1481 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic
1482 %val0 = extractvalue { i32, i1 } %val, 0
1483 store i32 %val0, i32* %out, align 4
1487 ; GCN-LABEL: {{^}}workgroup_one_as_acquire_acquire_ret:
1488 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1489 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1490 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1491 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1492 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1493 ; GFX10WGP-NEXT: buffer_gl0_inv
1494 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1495 ; GFX10CU-NOT: buffer_gl0_inv
1496 ; GFX8-NOT: buffer_wbinvl1_vol
1497 ; GFX10: .amdhsa_kernel workgroup_one_as_acquire_acquire_ret
1498 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1499 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1500 ; GFX10-NOT: .amdhsa_memory_ordered 0
1501 define amdgpu_kernel void @workgroup_one_as_acquire_acquire_ret(
1502 i32* %out, i32 %in, i32 %old) {
1504 %gep = getelementptr i32, i32* %out, i32 4
1505 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire
1506 %val0 = extractvalue { i32, i1 } %val, 0
1507 store i32 %val0, i32* %out, align 4
1511 ; GCN-LABEL: {{^}}workgroup_one_as_release_acquire_ret:
1512 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1513 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
1514 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1515 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1516 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1517 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1518 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1519 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1520 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1521 ; GFX8-NOT: buffer_wbinvl1_vol
1522 ; GFX10WGP-NEXT: buffer_gl0_inv
1523 ; GFX10CU-NOT: buffer_gl0_inv
1524 ; GFX10: .amdhsa_kernel workgroup_one_as_release_acquire_ret
1525 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1526 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1527 ; GFX10-NOT: .amdhsa_memory_ordered 0
1528 define amdgpu_kernel void @workgroup_one_as_release_acquire_ret(
1529 i32* %out, i32 %in, i32 %old) {
1531 %gep = getelementptr i32, i32* %out, i32 4
1532 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire
1533 %val0 = extractvalue { i32, i1 } %val, 0
1534 store i32 %val0, i32* %out, align 4
1538 ; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_acquire_ret:
1539 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1540 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
1541 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1542 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1543 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1544 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1545 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1546 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1547 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1548 ; GFX8-NOT: buffer_wbinvl1_vol
1549 ; GFX10WGP: buffer_gl0_inv
1550 ; GFX10CU-NOT: buffer_gl0_inv
1551 ; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_acquire_ret
1552 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1553 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1554 ; GFX10-NOT: .amdhsa_memory_ordered 0
1555 define amdgpu_kernel void @workgroup_one_as_acq_rel_acquire_ret(
1556 i32* %out, i32 %in, i32 %old) {
1558 %gep = getelementptr i32, i32* %out, i32 4
1559 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire
1560 %val0 = extractvalue { i32, i1 } %val, 0
1561 store i32 %val0, i32* %out, align 4
1565 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_acquire_ret:
1566 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1567 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
1568 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1569 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1570 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1571 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1572 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1573 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1574 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1575 ; GFX8-NOT: buffer_wbinvl1_vol
1576 ; GFX10WGP-NEXT: buffer_gl0_inv
1577 ; GFX10CU-NOT: buffer_gl0_inv
1578 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_acquire_ret
1579 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1580 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1581 ; GFX10-NOT: .amdhsa_memory_ordered 0
1582 define amdgpu_kernel void @workgroup_one_as_seq_cst_acquire_ret(
1583 i32* %out, i32 %in, i32 %old) {
1585 %gep = getelementptr i32, i32* %out, i32 4
1586 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire
1587 %val0 = extractvalue { i32, i1 } %val, 0
1588 store i32 %val0, i32* %out, align 4
1592 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_seq_cst_ret:
1593 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1594 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
1595 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1596 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1597 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1598 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1599 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1600 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1601 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1602 ; GFX8-NOT: buffer_wbinvl1_vol
1603 ; GFX10WGP: buffer_gl0_inv
1604 ; GFX10CU-NOT: buffer_gl0_inv
1605 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_seq_cst_ret
1606 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1607 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1608 ; GFX10-NOT: .amdhsa_memory_ordered 0
1609 define amdgpu_kernel void @workgroup_one_as_seq_cst_seq_cst_ret(
1610 i32* %out, i32 %in, i32 %old) {
1612 %gep = getelementptr i32, i32* %out, i32 4
1613 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst
1614 %val0 = extractvalue { i32, i1 } %val, 0
1615 store i32 %val0, i32* %out, align 4
1619 ; GCN-LABEL: {{^}}system_monotonic_monotonic:
1620 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1621 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1622 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1623 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1624 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1625 ; GFX8-NOT: buffer_wbinvl1_vol
1626 ; GFX10-NOT: buffer_gl{{[01]}}_inv
1627 ; GFX10: .amdhsa_kernel system_monotonic_monotonic
1628 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1629 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1630 ; GFX10-NOT: .amdhsa_memory_ordered 0
1631 define amdgpu_kernel void @system_monotonic_monotonic(
1632 i32* %out, i32 %in, i32 %old) {
1634 %gep = getelementptr i32, i32* %out, i32 4
1635 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic
1639 ; GCN-LABEL: {{^}}system_acquire_monotonic:
1640 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1641 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1642 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1643 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1644 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1645 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1646 ; GFX8-NEXT: buffer_wbinvl1_vol
1647 ; GFX10-NEXT: buffer_gl0_inv
1648 ; GFX10-NEXT: buffer_gl1_inv
1649 ; GFX10: .amdhsa_kernel system_acquire_monotonic
1650 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1651 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1652 ; GFX10-NOT: .amdhsa_memory_ordered 0
1653 define amdgpu_kernel void @system_acquire_monotonic(
1654 i32* %out, i32 %in, i32 %old) {
1656 %gep = getelementptr i32, i32* %out, i32 4
1657 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic
1661 ; GCN-LABEL: {{^}}system_release_monotonic:
1662 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1663 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1664 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1665 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1666 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1667 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1668 ; GFX8-NOT: buffer_wbinvl1_vol
1669 ; GFX10-NOT: buffer_gl._inv
1670 ; GFX10: .amdhsa_kernel system_release_monotonic
1671 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1672 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1673 ; GFX10-NOT: .amdhsa_memory_ordered 0
1674 define amdgpu_kernel void @system_release_monotonic(
1675 i32* %out, i32 %in, i32 %old) {
1677 %gep = getelementptr i32, i32* %out, i32 4
1678 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic
1682 ; GCN-LABEL: {{^}}system_acq_rel_monotonic:
1683 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1684 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1685 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1686 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1687 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1688 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1689 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1690 ; GFX8-NEXT: buffer_wbinvl1_vol
1691 ; GFX10-NEXT: buffer_gl0_inv
1692 ; GFX10-NEXT: buffer_gl1_inv
1693 ; GFX10: .amdhsa_kernel system_acq_rel_monotonic
1694 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1695 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1696 ; GFX10-NOT: .amdhsa_memory_ordered 0
1697 define amdgpu_kernel void @system_acq_rel_monotonic(
1698 i32* %out, i32 %in, i32 %old) {
1700 %gep = getelementptr i32, i32* %out, i32 4
1701 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic
1705 ; GCN-LABEL: {{^}}system_seq_cst_monotonic:
1706 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1707 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1708 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1709 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1710 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1711 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1712 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1713 ; GFX8-NEXT: buffer_wbinvl1_vol
1714 ; GFX10-NEXT: buffer_gl0_inv
1715 ; GFX10-NEXT: buffer_gl1_inv
1716 ; GFX10: .amdhsa_kernel system_seq_cst_monotonic
1717 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1718 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1719 ; GFX10-NOT: .amdhsa_memory_ordered 0
1720 define amdgpu_kernel void @system_seq_cst_monotonic(
1721 i32* %out, i32 %in, i32 %old) {
1723 %gep = getelementptr i32, i32* %out, i32 4
1724 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic
1728 ; GCN-LABEL: {{^}}system_acquire_acquire:
1729 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1730 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1731 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1732 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1733 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1734 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1735 ; GFX8-NEXT: buffer_wbinvl1_vol
1736 ; GFX10-NEXT: buffer_gl0_inv
1737 ; GFX10-NEXT: buffer_gl1_inv
1738 ; GFX10: .amdhsa_kernel system_acquire_acquire
1739 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1740 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1741 ; GFX10-NOT: .amdhsa_memory_ordered 0
1742 define amdgpu_kernel void @system_acquire_acquire(
1743 i32* %out, i32 %in, i32 %old) {
1745 %gep = getelementptr i32, i32* %out, i32 4
1746 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire
1750 ; GCN-LABEL: {{^}}system_release_acquire:
1751 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1752 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1753 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1754 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1755 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1756 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1757 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1758 ; GFX8-NEXT: buffer_wbinvl1_vol
1759 ; GFX10-NEXT: buffer_gl0_inv
1760 ; GFX10-NEXT: buffer_gl1_inv
1761 ; GFX10: .amdhsa_kernel system_release_acquire
1762 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1763 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1764 ; GFX10-NOT: .amdhsa_memory_ordered 0
1765 define amdgpu_kernel void @system_release_acquire(
1766 i32* %out, i32 %in, i32 %old) {
1768 %gep = getelementptr i32, i32* %out, i32 4
1769 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire
1773 ; GCN-LABEL: {{^}}system_acq_rel_acquire:
1774 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1775 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1776 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1777 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1778 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1779 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1780 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1781 ; GFX8-NEXT: buffer_wbinvl1_vol
1782 ; GFX10-NEXT: buffer_gl0_inv
1783 ; GFX10-NEXT: buffer_gl1_inv
1784 ; GFX10: .amdhsa_kernel system_acq_rel_acquire
1785 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1786 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1787 ; GFX10-NOT: .amdhsa_memory_ordered 0
1788 define amdgpu_kernel void @system_acq_rel_acquire(
1789 i32* %out, i32 %in, i32 %old) {
1791 %gep = getelementptr i32, i32* %out, i32 4
1792 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire
1796 ; GCN-LABEL: {{^}}system_seq_cst_acquire:
1797 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1798 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1799 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1800 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1801 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1802 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1803 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1804 ; GFX8-NEXT: buffer_wbinvl1_vol
1805 ; GFX10-NEXT: buffer_gl0_inv
1806 ; GFX10-NEXT: buffer_gl1_inv
1807 ; GFX10: .amdhsa_kernel system_seq_cst_acquire
1808 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1809 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1810 ; GFX10-NOT: .amdhsa_memory_ordered 0
1811 define amdgpu_kernel void @system_seq_cst_acquire(
1812 i32* %out, i32 %in, i32 %old) {
1814 %gep = getelementptr i32, i32* %out, i32 4
1815 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire
1819 ; GCN-LABEL: {{^}}system_seq_cst_seq_cst:
1820 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1821 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1822 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1823 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1824 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1825 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1826 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1827 ; GFX8-NEXT: buffer_wbinvl1_vol
1828 ; GFX10-NEXT: buffer_gl0_inv
1829 ; GFX10-NEXT: buffer_gl1_inv
1830 ; GFX10: .amdhsa_kernel system_seq_cst_seq_cst
1831 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1832 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1833 ; GFX10-NOT: .amdhsa_memory_ordered 0
1834 define amdgpu_kernel void @system_seq_cst_seq_cst(
1835 i32* %out, i32 %in, i32 %old) {
1837 %gep = getelementptr i32, i32* %out, i32 4
1838 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
1842 ; GCN-LABEL: {{^}}singlethread_monotonic_monotonic:
1843 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1844 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1845 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1846 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1847 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1848 ; GFX8-NOT: buffer_wbinvl1_vol
1849 ; GFX10-NOT: buffer_gl{{[01]}}_inv
1850 ; GFX10: .amdhsa_kernel singlethread_monotonic_monotonic
1851 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1852 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1853 ; GFX10-NOT: .amdhsa_memory_ordered 0
1854 define amdgpu_kernel void @singlethread_monotonic_monotonic(
1855 i32* %out, i32 %in, i32 %old) {
1857 %gep = getelementptr i32, i32* %out, i32 4
1858 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
1862 ; GCN-LABEL: {{^}}singlethread_acquire_monotonic:
1863 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1864 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1865 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1866 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1867 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1868 ; GFX8-NOT: buffer_wbinvl1_vol
1869 ; GFX10-NOT: buffer_gl{{[01]}}_inv
1870 ; GFX10: .amdhsa_kernel singlethread_acquire_monotonic
1871 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1872 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1873 ; GFX10-NOT: .amdhsa_memory_ordered 0
1874 define amdgpu_kernel void @singlethread_acquire_monotonic(
1875 i32* %out, i32 %in, i32 %old) {
1877 %gep = getelementptr i32, i32* %out, i32 4
1878 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
1882 ; GCN-LABEL: {{^}}singlethread_release_monotonic:
1883 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1884 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1885 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1886 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1887 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1888 ; Gfx8-NOT: buffer_wbinvl1_vol
1889 ; GCN-NOT: buffer_gl{{[01]}}_inv
1890 ; GFX10: .amdhsa_kernel singlethread_release_monotonic
1891 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1892 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1893 ; GFX10-NOT: .amdhsa_memory_ordered 0
1894 define amdgpu_kernel void @singlethread_release_monotonic(
1895 i32* %out, i32 %in, i32 %old) {
1897 %gep = getelementptr i32, i32* %out, i32 4
1898 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
1902 ; GCN-LABEL: {{^}}singlethread_acq_rel_monotonic:
1903 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1904 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1905 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1906 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1907 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1908 ; GFX8-NOT: buffer_wbinvl1_vol
1909 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1910 ; GFX10: .amdhsa_kernel singlethread_acq_rel_monotonic
1911 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1912 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1913 ; GFX10-NOT: .amdhsa_memory_ordered 0
1914 define amdgpu_kernel void @singlethread_acq_rel_monotonic(
1915 i32* %out, i32 %in, i32 %old) {
1917 %gep = getelementptr i32, i32* %out, i32 4
1918 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
1922 ; GCN-LABEL: {{^}}singlethread_seq_cst_monotonic:
1923 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1924 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1925 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1926 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1927 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1928 ; GFX8-NOT: buffer_wbinvl1_vol
1929 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1930 ; GFX10: .amdhsa_kernel singlethread_seq_cst_monotonic
1931 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1932 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1933 ; GFX10-NOT: .amdhsa_memory_ordered 0
1934 define amdgpu_kernel void @singlethread_seq_cst_monotonic(
1935 i32* %out, i32 %in, i32 %old) {
1937 %gep = getelementptr i32, i32* %out, i32 4
1938 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
1942 ; GCN-LABEL: {{^}}singlethread_acquire_acquire:
1943 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1944 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1945 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1946 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1947 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1948 ; GFX8-NOT: buffer_wbinvl1_vol
1949 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1950 ; GFX10: .amdhsa_kernel singlethread_acquire_acquire
1951 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1952 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1953 ; GFX10-NOT: .amdhsa_memory_ordered 0
1954 define amdgpu_kernel void @singlethread_acquire_acquire(
1955 i32* %out, i32 %in, i32 %old) {
1957 %gep = getelementptr i32, i32* %out, i32 4
1958 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
1962 ; GCN-LABEL: {{^}}singlethread_release_acquire:
1963 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1964 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1965 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1966 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1967 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1968 ; GFX8-NOT: buffer_wbinvl1_vol
1969 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1970 ; GFX10: .amdhsa_kernel singlethread_release_acquire
1971 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1972 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1973 ; GFX10-NOT: .amdhsa_memory_ordered 0
1974 define amdgpu_kernel void @singlethread_release_acquire(
1975 i32* %out, i32 %in, i32 %old) {
1977 %gep = getelementptr i32, i32* %out, i32 4
1978 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
1982 ; GCN-LABEL: {{^}}singlethread_acq_rel_acquire:
1983 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1984 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1985 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1986 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1987 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1988 ; GFX8-NOT: buffer_wbinvl1_vol
1989 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1990 ; GFX10: .amdhsa_kernel singlethread_acq_rel_acquire
1991 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1992 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1993 ; GFX10-NOT: .amdhsa_memory_ordered 0
1994 define amdgpu_kernel void @singlethread_acq_rel_acquire(
1995 i32* %out, i32 %in, i32 %old) {
1997 %gep = getelementptr i32, i32* %out, i32 4
1998 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
2002 ; GCN-LABEL: {{^}}singlethread_seq_cst_acquire:
2003 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2004 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2005 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2006 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2007 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2008 ; GFX8-NOT: buffer_wbinvl1_vol
2009 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2010 ; GFX10: .amdhsa_kernel singlethread_seq_cst_acquire
2011 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2012 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2013 ; GFX10-NOT: .amdhsa_memory_ordered 0
2014 define amdgpu_kernel void @singlethread_seq_cst_acquire(
2015 i32* %out, i32 %in, i32 %old) {
2017 %gep = getelementptr i32, i32* %out, i32 4
2018 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
2022 ; GCN-LABEL: {{^}}singlethread_seq_cst_seq_cst:
2023 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2024 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2025 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2026 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2027 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2028 ; GFX8-NOT: buffer_wbinvl1_vol
2029 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2030 ; GFX10: .amdhsa_kernel singlethread_seq_cst_seq_cst
2031 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2032 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2033 ; GFX10-NOT: .amdhsa_memory_ordered 0
2034 define amdgpu_kernel void @singlethread_seq_cst_seq_cst(
2035 i32* %out, i32 %in, i32 %old) {
2037 %gep = getelementptr i32, i32* %out, i32 4
2038 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
2042 ; GCN-LABEL: {{^}}agent_monotonic_monotonic:
2043 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2044 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2045 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2046 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2047 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2048 ; GFX8-NOT: buffer_wbinvl1_vol
2049 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2050 ; GFX10: .amdhsa_kernel agent_monotonic_monotonic
2051 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2052 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2053 ; GFX10-NOT: .amdhsa_memory_ordered 0
2054 define amdgpu_kernel void @agent_monotonic_monotonic(
2055 i32* %out, i32 %in, i32 %old) {
2057 %gep = getelementptr i32, i32* %out, i32 4
2058 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
2062 ; GCN-LABEL: {{^}}agent_acquire_monotonic:
2063 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2064 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2065 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2066 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2067 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2068 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2069 ; GFX8-NEXT: buffer_wbinvl1_vol
2070 ; GFX10-NEXT: buffer_gl0_inv
2071 ; GFX10-NEXT: buffer_gl1_inv
2072 ; GFX10: .amdhsa_kernel agent_acquire_monotonic
2073 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2074 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2075 ; GFX10-NOT: .amdhsa_memory_ordered 0
2076 define amdgpu_kernel void @agent_acquire_monotonic(
2077 i32* %out, i32 %in, i32 %old) {
2079 %gep = getelementptr i32, i32* %out, i32 4
2080 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
2084 ; GCN-LABEL: {{^}}agent_release_monotonic:
2085 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2086 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2087 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2088 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2089 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2090 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2091 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
2092 ; GFX10: .amdhsa_kernel agent_release_monotonic
2093 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2094 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2095 ; GFX10-NOT: .amdhsa_memory_ordered 0
2096 define amdgpu_kernel void @agent_release_monotonic(
2097 i32* %out, i32 %in, i32 %old) {
2099 %gep = getelementptr i32, i32* %out, i32 4
2100 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic
2104 ; GCN-LABEL: {{^}}agent_acq_rel_monotonic:
2105 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2106 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2107 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2108 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2109 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2110 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2111 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2112 ; GFX8-NEXT: buffer_wbinvl1_vol
2113 ; GFX10-NEXT: buffer_gl0_inv
2114 ; GFX10-NEXT: buffer_gl1_inv
2115 ; GFX10: .amdhsa_kernel agent_acq_rel_monotonic
2116 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2117 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2118 ; GFX10-NOT: .amdhsa_memory_ordered 0
2119 define amdgpu_kernel void @agent_acq_rel_monotonic(
2120 i32* %out, i32 %in, i32 %old) {
2122 %gep = getelementptr i32, i32* %out, i32 4
2123 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
2127 ; GCN-LABEL: {{^}}agent_seq_cst_monotonic:
2128 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2129 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2130 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2131 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2132 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2133 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2134 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2135 ; GFX8-NEXT: buffer_wbinvl1_vol
2136 ; GFX10-NEXT: buffer_gl0_inv
2137 ; GFX10-NEXT: buffer_gl1_inv
2138 ; GFX10: .amdhsa_kernel agent_seq_cst_monotonic
2139 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2140 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2141 ; GFX10-NOT: .amdhsa_memory_ordered 0
2142 define amdgpu_kernel void @agent_seq_cst_monotonic(
2143 i32* %out, i32 %in, i32 %old) {
2145 %gep = getelementptr i32, i32* %out, i32 4
2146 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
2150 ; GCN-LABEL: {{^}}agent_acquire_acquire:
2151 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2152 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2153 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2154 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2155 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2156 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2157 ; GFX8-NEXT: buffer_wbinvl1_vol
2158 ; GFX10-NEXT: buffer_gl0_inv
2159 ; GFX10-NEXT: buffer_gl1_inv
2160 ; GFX10: .amdhsa_kernel agent_acquire_acquire
2161 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2162 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2163 ; GFX10-NOT: .amdhsa_memory_ordered 0
2164 define amdgpu_kernel void @agent_acquire_acquire(
2165 i32* %out, i32 %in, i32 %old) {
2167 %gep = getelementptr i32, i32* %out, i32 4
2168 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
2172 ; GCN-LABEL: {{^}}agent_release_acquire:
2173 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2174 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2175 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2176 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2177 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2178 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2179 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2180 ; GFX8-NEXT: buffer_wbinvl1_vol
2181 ; GFX10-NEXT: buffer_gl0_inv
2182 ; GFX10-NEXT: buffer_gl1_inv
2183 ; GFX10: .amdhsa_kernel agent_release_acquire
2184 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2185 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2186 ; GFX10-NOT: .amdhsa_memory_ordered 0
2187 define amdgpu_kernel void @agent_release_acquire(
2188 i32* %out, i32 %in, i32 %old) {
2190 %gep = getelementptr i32, i32* %out, i32 4
2191 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire
2195 ; GCN-LABEL: {{^}}agent_acq_rel_acquire:
2196 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2197 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2198 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2199 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2200 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2201 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2202 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2203 ; GFX8-NEXT: buffer_wbinvl1_vol
2204 ; GFX10-NEXT: buffer_gl0_inv
2205 ; GFX10-NEXT: buffer_gl1_inv
2206 ; GFX10: .amdhsa_kernel agent_acq_rel_acquire
2207 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2208 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2209 ; GFX10-NOT: .amdhsa_memory_ordered 0
2210 define amdgpu_kernel void @agent_acq_rel_acquire(
2211 i32* %out, i32 %in, i32 %old) {
2213 %gep = getelementptr i32, i32* %out, i32 4
2214 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
2218 ; GCN-LABEL: {{^}}agent_seq_cst_acquire:
2219 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2220 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2221 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2222 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2223 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2224 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2225 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2226 ; GFX8-NEXT: buffer_wbinvl1_vol
2227 ; GFX10-NEXT: buffer_gl0_inv
2228 ; GFX10-NEXT: buffer_gl1_inv
2229 ; GFX10: .amdhsa_kernel agent_seq_cst_acquire
2230 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2231 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2232 ; GFX10-NOT: .amdhsa_memory_ordered 0
2233 define amdgpu_kernel void @agent_seq_cst_acquire(
2234 i32* %out, i32 %in, i32 %old) {
2236 %gep = getelementptr i32, i32* %out, i32 4
2237 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
2241 ; GCN-LABEL: {{^}}agent_seq_cst_seq_cst:
2242 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2243 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2244 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2245 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2246 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2247 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2248 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2249 ; GFX8-NEXT: buffer_wbinvl1_vol
2250 ; GFX10-NEXT: buffer_gl0_inv
2251 ; GFX10-NEXT: buffer_gl1_inv
2252 ; GFX10: .amdhsa_kernel agent_seq_cst_seq_cst
2253 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2254 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2255 ; GFX10-NOT: .amdhsa_memory_ordered 0
2256 define amdgpu_kernel void @agent_seq_cst_seq_cst(
2257 i32* %out, i32 %in, i32 %old) {
2259 %gep = getelementptr i32, i32* %out, i32 4
2260 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
2264 ; GCN-LABEL: {{^}}workgroup_monotonic_monotonic:
2265 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2266 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2267 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2268 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2269 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2270 ; GFX8-NOT: buffer_wbinvl1_vol
2271 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2272 ; GFX10: .amdhsa_kernel workgroup_monotonic_monotonic
2273 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2274 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2275 ; GFX10-NOT: .amdhsa_memory_ordered 0
2276 define amdgpu_kernel void @workgroup_monotonic_monotonic(
2277 i32* %out, i32 %in, i32 %old) {
2279 %gep = getelementptr i32, i32* %out, i32 4
2280 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic
2284 ; GCN-LABEL: {{^}}workgroup_acquire_monotonic:
2285 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2286 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2287 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2288 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2289 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2290 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2291 ; GFX10WGP-NEXT: buffer_gl0_inv
2292 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2293 ; GFX10CU-NOT: buffer_gl0_inv
2294 ; GFX8-NOT: buffer_wbinvl1_vol
2295 ; GFX10: .amdhsa_kernel workgroup_acquire_monotonic
2296 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2297 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2298 ; GFX10-NOT: .amdhsa_memory_ordered 0
2299 define amdgpu_kernel void @workgroup_acquire_monotonic(
2300 i32* %out, i32 %in, i32 %old) {
2302 %gep = getelementptr i32, i32* %out, i32 4
2303 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
2307 ; GCN-LABEL: {{^}}workgroup_release_monotonic:
2308 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2309 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2310 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2311 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
2312 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2313 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2314 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2315 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2316 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
2317 ; GFX10: .amdhsa_kernel workgroup_release_monotonic
2318 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2319 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2320 ; GFX10-NOT: .amdhsa_memory_ordered 0
2321 define amdgpu_kernel void @workgroup_release_monotonic(
2322 i32* %out, i32 %in, i32 %old) {
2324 %gep = getelementptr i32, i32* %out, i32 4
2325 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic
2329 ; GCN-LABEL: {{^}}workgroup_acq_rel_monotonic:
2330 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2331 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2332 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2333 ; GFX10CU-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2334 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2335 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2336 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2337 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2338 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2339 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2340 ; GFX8-NOT: buffer_wbinvl1_vol
2341 ; GFX10WGP-NEXT: buffer_gl0_inv
2342 ; GFX10CU-NOT: buffer_gl0_inv
2343 ; GFX10: .amdhsa_kernel workgroup_acq_rel_monotonic
2344 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2345 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2346 ; GFX10-NOT: .amdhsa_memory_ordered 0
2347 define amdgpu_kernel void @workgroup_acq_rel_monotonic(
2348 i32* %out, i32 %in, i32 %old) {
2350 %gep = getelementptr i32, i32* %out, i32 4
2351 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
2355 ; GCN-LABEL: {{^}}workgroup_seq_cst_monotonic:
2356 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2357 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2358 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2359 ; GFX10CU-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2360 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2361 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2362 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2363 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2364 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2365 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2366 ; GFX8-NOT: buffer_wbinvl1_vol
2367 ; GFX10WGP-NEXT: buffer_gl0_inv
2368 ; GFX10CU-NOT: buffer_gl0_inv
2369 ; GFX10: .amdhsa_kernel workgroup_seq_cst_monotonic
2370 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2371 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2372 ; GFX10-NOT: .amdhsa_memory_ordered 0
2373 define amdgpu_kernel void @workgroup_seq_cst_monotonic(
2374 i32* %out, i32 %in, i32 %old) {
2376 %gep = getelementptr i32, i32* %out, i32 4
2377 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
2381 ; GCN-LABEL: {{^}}workgroup_acquire_acquire:
2382 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2383 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2384 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2385 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2386 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2387 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2388 ; GFX10WGP-NEXT: buffer_gl0_inv
2389 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2390 ; GFX10CU-NOT: buffer_gl0_inv
2391 ; GFX8-NOT: buffer_wbinvl1_vol
2392 ; GFX10: .amdhsa_kernel workgroup_acquire_acquire
2393 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2394 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2395 ; GFX10-NOT: .amdhsa_memory_ordered 0
2396 define amdgpu_kernel void @workgroup_acquire_acquire(
2397 i32* %out, i32 %in, i32 %old) {
2399 %gep = getelementptr i32, i32* %out, i32 4
2400 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
2404 ; GCN-LABEL: {{^}}workgroup_release_acquire:
2405 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2406 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2407 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2408 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
2409 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2410 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2411 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2412 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2413 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2414 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2415 ; GFX8-NOT: buffer_wbinvl1_vol
2416 ; GFX10WGP-NEXT: buffer_gl0_inv
2417 ; GFX10CU-NOT: buffer_gl0_inv
2418 ; GFX10: .amdhsa_kernel workgroup_release_acquire
2419 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2420 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2421 ; GFX10-NOT: .amdhsa_memory_ordered 0
2422 define amdgpu_kernel void @workgroup_release_acquire(
2423 i32* %out, i32 %in, i32 %old) {
2425 %gep = getelementptr i32, i32* %out, i32 4
2426 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
2430 ; GCN-LABEL: {{^}}workgroup_acq_rel_acquire:
2431 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2432 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2433 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2434 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
2435 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2436 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2437 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2438 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2439 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2440 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2441 ; GFX8-NOT: buffer_wbinvl1_vol
2442 ; GFX10WGP: buffer_gl0_inv
2443 ; GFX10CU-NOT: buffer_gl0_inv
2444 ; GFX10: .amdhsa_kernel workgroup_acq_rel_acquire
2445 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2446 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2447 ; GFX10-NOT: .amdhsa_memory_ordered 0
2448 define amdgpu_kernel void @workgroup_acq_rel_acquire(
2449 i32* %out, i32 %in, i32 %old) {
2451 %gep = getelementptr i32, i32* %out, i32 4
2452 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
2456 ; GCN-LABEL: {{^}}workgroup_seq_cst_acquire:
2457 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2458 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2459 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2460 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
2461 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2462 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2463 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2464 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2465 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2466 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2467 ; GFX8-NOT: buffer_wbinvl1_vol
2468 ; GFX10WGP-NEXT: buffer_gl0_inv
2469 ; GFX10CU-NOT: buffer_gl0_inv
2470 ; GFX10: .amdhsa_kernel workgroup_seq_cst_acquire
2471 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2472 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2473 ; GFX10-NOT: .amdhsa_memory_ordered 0
2474 define amdgpu_kernel void @workgroup_seq_cst_acquire(
2475 i32* %out, i32 %in, i32 %old) {
2477 %gep = getelementptr i32, i32* %out, i32 4
2478 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
2482 ; GCN-LABEL: {{^}}workgroup_seq_cst_seq_cst:
2483 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2484 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2485 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2486 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
2487 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2488 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2489 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2490 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2491 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2492 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2493 ; GFX8-NOT: buffer_wbinvl1_vol
2494 ; GFX10WGP: buffer_gl0_inv
2495 ; GFX10CU-NOT: buffer_gl0_inv
2496 ; GFX10: .amdhsa_kernel workgroup_seq_cst_seq_cst
2497 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2498 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2499 ; GFX10-NOT: .amdhsa_memory_ordered 0
2500 define amdgpu_kernel void @workgroup_seq_cst_seq_cst(
2501 i32* %out, i32 %in, i32 %old) {
2503 %gep = getelementptr i32, i32* %out, i32 4
2504 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
2508 ; GCN-LABEL: {{^}}wavefront_monotonic_monotonic:
2509 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2510 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2511 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2512 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2513 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2514 ; GFX8-NOT: buffer_wbinvl1_vol
2515 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2516 ; GFX10: .amdhsa_kernel wavefront_monotonic_monotonic
2517 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2518 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2519 ; GFX10-NOT: .amdhsa_memory_ordered 0
2520 define amdgpu_kernel void @wavefront_monotonic_monotonic(
2521 i32* %out, i32 %in, i32 %old) {
2523 %gep = getelementptr i32, i32* %out, i32 4
2524 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic
2528 ; GCN-LABEL: {{^}}wavefront_acquire_monotonic:
2529 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2530 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2531 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2532 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2533 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2534 ; GFX8-NOT: buffer_wbinvl1_vol
2535 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2536 ; GFX10: .amdhsa_kernel wavefront_acquire_monotonic
2537 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2538 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2539 ; GFX10-NOT: .amdhsa_memory_ordered 0
2540 define amdgpu_kernel void @wavefront_acquire_monotonic(
2541 i32* %out, i32 %in, i32 %old) {
2543 %gep = getelementptr i32, i32* %out, i32 4
2544 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic
2548 ; GCN-LABEL: {{^}}wavefront_release_monotonic:
2549 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2550 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2551 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2552 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2553 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2554 ; GFX8-NOT: buffer_wbinvl1_vol
2555 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2556 ; GFX10: .amdhsa_kernel wavefront_release_monotonic
2557 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2558 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2559 ; GFX10-NOT: .amdhsa_memory_ordered 0
2560 define amdgpu_kernel void @wavefront_release_monotonic(
2561 i32* %out, i32 %in, i32 %old) {
2563 %gep = getelementptr i32, i32* %out, i32 4
2564 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic
2568 ; GCN-LABEL: {{^}}wavefront_acq_rel_monotonic:
2569 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2570 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2571 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2572 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2573 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2574 ; GFX8-NOT: buffer_wbinvl1_vol
2575 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2576 ; GFX10: .amdhsa_kernel wavefront_acq_rel_monotonic
2577 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2578 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2579 ; GFX10-NOT: .amdhsa_memory_ordered 0
2580 define amdgpu_kernel void @wavefront_acq_rel_monotonic(
2581 i32* %out, i32 %in, i32 %old) {
2583 %gep = getelementptr i32, i32* %out, i32 4
2584 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic
2588 ; GCN-LABEL: {{^}}wavefront_seq_cst_monotonic:
2589 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2590 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2591 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2592 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2593 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2594 ; GFX8-NOT: buffer_wbinvl1_vol
2595 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2596 ; GFX10: .amdhsa_kernel wavefront_seq_cst_monotonic
2597 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2598 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2599 ; GFX10-NOT: .amdhsa_memory_ordered 0
2600 define amdgpu_kernel void @wavefront_seq_cst_monotonic(
2601 i32* %out, i32 %in, i32 %old) {
2603 %gep = getelementptr i32, i32* %out, i32 4
2604 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic
2608 ; GCN-LABEL: {{^}}wavefront_acquire_acquire:
2609 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2610 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2611 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2612 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2613 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2614 ; GFX8-NOT: buffer_wbinvl1_vol
2615 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2616 ; GFX10: .amdhsa_kernel wavefront_acquire_acquire
2617 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2618 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2619 ; GFX10-NOT: .amdhsa_memory_ordered 0
2620 define amdgpu_kernel void @wavefront_acquire_acquire(
2621 i32* %out, i32 %in, i32 %old) {
2623 %gep = getelementptr i32, i32* %out, i32 4
2624 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire
2628 ; GCN-LABEL: {{^}}wavefront_release_acquire:
2629 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2630 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2631 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2632 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2633 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2634 ; GFX8-NOT: buffer_wbinvl1_vol
2635 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2636 ; GFX10: .amdhsa_kernel wavefront_release_acquire
2637 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2638 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2639 ; GFX10-NOT: .amdhsa_memory_ordered 0
2640 define amdgpu_kernel void @wavefront_release_acquire(
2641 i32* %out, i32 %in, i32 %old) {
2643 %gep = getelementptr i32, i32* %out, i32 4
2644 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire
2648 ; GCN-LABEL: {{^}}wavefront_acq_rel_acquire:
2649 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2650 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2651 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2652 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2653 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2654 ; GFX8-NOT: buffer_wbinvl1_vol
2655 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2656 ; GFX10: .amdhsa_kernel wavefront_acq_rel_acquire
2657 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2658 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2659 ; GFX10-NOT: .amdhsa_memory_ordered 0
2660 define amdgpu_kernel void @wavefront_acq_rel_acquire(
2661 i32* %out, i32 %in, i32 %old) {
2663 %gep = getelementptr i32, i32* %out, i32 4
2664 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire
2668 ; GCN-LABEL: {{^}}wavefront_seq_cst_acquire:
2669 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2670 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2671 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2672 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2673 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2674 ; GFX8-NOT: buffer_wbinvl1_vol
2675 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2676 ; GFX10: .amdhsa_kernel wavefront_seq_cst_acquire
2677 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2678 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2679 ; GFX10-NOT: .amdhsa_memory_ordered 0
2680 define amdgpu_kernel void @wavefront_seq_cst_acquire(
2681 i32* %out, i32 %in, i32 %old) {
2683 %gep = getelementptr i32, i32* %out, i32 4
2684 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire
2688 ; GCN-LABEL: {{^}}wavefront_seq_cst_seq_cst:
2689 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2690 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2691 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2692 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2693 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2694 ; GFX8-NOT: buffer_wbinvl1_vol
2695 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2696 ; GFX10: .amdhsa_kernel wavefront_seq_cst_seq_cst
2697 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2698 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2699 ; GFX10-NOT: .amdhsa_memory_ordered 0
2700 define amdgpu_kernel void @wavefront_seq_cst_seq_cst(
2701 i32* %out, i32 %in, i32 %old) {
2703 %gep = getelementptr i32, i32* %out, i32 4
2704 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst
2708 ; GCN-LABEL: {{^}}system_acquire_monotonic_ret:
2709 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2710 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2711 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
2712 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2713 ; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2714 ; GFX8-NEXT: buffer_wbinvl1_vol
2715 ; GFX10-NEXT: buffer_gl0_inv
2716 ; GFX10-NEXT: buffer_gl1_inv
2717 ; GFX10: .amdhsa_kernel system_acquire_monotonic_ret
2718 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2719 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2720 ; GFX10-NOT: .amdhsa_memory_ordered 0
2721 define amdgpu_kernel void @system_acquire_monotonic_ret(
2722 i32* %out, i32 %in, i32 %old) {
2724 %gep = getelementptr i32, i32* %out, i32 4
2725 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic
2726 %val0 = extractvalue { i32, i1 } %val, 0
2727 store i32 %val0, i32* %out, align 4
2731 ; GCN-LABEL: {{^}}system_acq_rel_monotonic_ret:
2732 ; GCN: s_waitcnt lgkmcnt(0){{$}}
2733 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2734 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
2735 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2736 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2737 ; GFX8-NEXT: buffer_wbinvl1_vol
2738 ; GFX10-NEXT: buffer_gl0_inv
2739 ; GFX10-NEXT: buffer_gl1_inv
2740 ; GFX10: .amdhsa_kernel system_acq_rel_monotonic_ret
2741 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2742 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2743 ; GFX10-NOT: .amdhsa_memory_ordered 0
2744 define amdgpu_kernel void @system_acq_rel_monotonic_ret(
2745 i32* %out, i32 %in, i32 %old) {
2747 %gep = getelementptr i32, i32* %out, i32 4
2748 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic
2749 %val0 = extractvalue { i32, i1 } %val, 0
2750 store i32 %val0, i32* %out, align 4
2754 ; GCN-LABEL: {{^}}system_seq_cst_monotonic_ret:
2755 ; GCN: s_waitcnt lgkmcnt(0){{$}}
2756 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2757 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
2758 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2759 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2760 ; GFX8-NEXT: buffer_wbinvl1_vol
2761 ; GFX10-NEXT: buffer_gl0_inv
2762 ; GFX10-NEXT: buffer_gl1_inv
2763 ; GFX10: .amdhsa_kernel system_seq_cst_monotonic_ret
2764 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2765 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2766 ; GFX10-NOT: .amdhsa_memory_ordered 0
2767 define amdgpu_kernel void @system_seq_cst_monotonic_ret(
2768 i32* %out, i32 %in, i32 %old) {
2770 %gep = getelementptr i32, i32* %out, i32 4
2771 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic
2772 %val0 = extractvalue { i32, i1 } %val, 0
2773 store i32 %val0, i32* %out, align 4
2777 ; GCN-LABEL: {{^}}system_acquire_acquire_ret:
2778 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2779 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2780 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
2781 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2782 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2783 ; GFX8-NEXT: buffer_wbinvl1_vol
2784 ; GFX10-NEXT: buffer_gl0_inv
2785 ; GFX10-NEXT: buffer_gl1_inv
2786 ; GFX10: .amdhsa_kernel system_acquire_acquire_ret
2787 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2788 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2789 ; GFX10-NOT: .amdhsa_memory_ordered 0
2790 define amdgpu_kernel void @system_acquire_acquire_ret(
2791 i32* %out, i32 %in, i32 %old) {
2793 %gep = getelementptr i32, i32* %out, i32 4
2794 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire
2795 %val0 = extractvalue { i32, i1 } %val, 0
2796 store i32 %val0, i32* %out, align 4
2800 ; GCN-LABEL: {{^}}system_release_acquire_ret:
2801 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2802 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2803 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2804 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
2805 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2806 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2807 ; GFX8-NEXT: buffer_wbinvl1_vol
2808 ; GFX10-NEXT: buffer_gl0_inv
2809 ; GFX10-NEXT: buffer_gl1_inv
2810 ; GFX10: .amdhsa_kernel system_release_acquire_ret
2811 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2812 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2813 ; GFX10-NOT: .amdhsa_memory_ordered 0
2814 define amdgpu_kernel void @system_release_acquire_ret(
2815 i32* %out, i32 %in, i32 %old) {
2817 %gep = getelementptr i32, i32* %out, i32 4
2818 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire
2819 %val0 = extractvalue { i32, i1 } %val, 0
2820 store i32 %val0, i32* %out, align 4
2824 ; GCN-LABEL: {{^}}system_acq_rel_acquire_ret:
2825 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2826 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2827 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2828 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
2829 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2830 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2831 ; GFX8-NEXT: buffer_wbinvl1_vol
2832 ; GFX10-NEXT: buffer_gl0_inv
2833 ; GFX10-NEXT: buffer_gl1_inv
2834 ; GFX10: .amdhsa_kernel system_acq_rel_acquire_ret
2835 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2836 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2837 ; GFX10-NOT: .amdhsa_memory_ordered 0
2838 define amdgpu_kernel void @system_acq_rel_acquire_ret(
2839 i32* %out, i32 %in, i32 %old) {
2841 %gep = getelementptr i32, i32* %out, i32 4
2842 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire
2843 %val0 = extractvalue { i32, i1 } %val, 0
2844 store i32 %val0, i32* %out, align 4
2848 ; GCN-LABEL: {{^}}system_seq_cst_acquire_ret:
2849 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2850 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2851 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2852 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
2853 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2854 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2855 ; GFX8-NEXT: buffer_wbinvl1_vol
2856 ; GFX10-NEXT: buffer_gl0_inv
2857 ; GFX10-NEXT: buffer_gl1_inv
2858 ; GFX10: .amdhsa_kernel system_seq_cst_acquire_ret
2859 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2860 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2861 ; GFX10-NOT: .amdhsa_memory_ordered 0
2862 define amdgpu_kernel void @system_seq_cst_acquire_ret(
2863 i32* %out, i32 %in, i32 %old) {
2865 %gep = getelementptr i32, i32* %out, i32 4
2866 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire
2867 %val0 = extractvalue { i32, i1 } %val, 0
2868 store i32 %val0, i32* %out, align 4
2872 ; GCN-LABEL: {{^}}system_seq_cst_seq_cst_ret:
2873 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2874 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2875 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2876 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
2877 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2878 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2879 ; GFX8-NEXT: buffer_wbinvl1_vol
2880 ; GFX10-NEXT: buffer_gl0_inv
2881 ; GFX10-NEXT: buffer_gl1_inv
2882 ; GFX10: .amdhsa_kernel system_seq_cst_seq_cst_ret
2883 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2884 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2885 ; GFX10-NOT: .amdhsa_memory_ordered 0
2886 define amdgpu_kernel void @system_seq_cst_seq_cst_ret(
2887 i32* %out, i32 %in, i32 %old) {
2889 %gep = getelementptr i32, i32* %out, i32 4
2890 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
2891 %val0 = extractvalue { i32, i1 } %val, 0
2892 store i32 %val0, i32* %out, align 4
2896 ; GCN-LABEL: {{^}}agent_acquire_monotonic_ret:
2897 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2898 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2899 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
2900 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2901 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2902 ; GFX8-NEXT: buffer_wbinvl1_vol
2903 ; GFX10-NEXT: buffer_gl0_inv
2904 ; GFX10-NEXT: buffer_gl1_inv
2905 ; GFX10: .amdhsa_kernel agent_acquire_monotonic_ret
2906 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2907 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2908 ; GFX10-NOT: .amdhsa_memory_ordered 0
2909 define amdgpu_kernel void @agent_acquire_monotonic_ret(
2910 i32* %out, i32 %in, i32 %old) {
2912 %gep = getelementptr i32, i32* %out, i32 4
2913 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
2914 %val0 = extractvalue { i32, i1 } %val, 0
2915 store i32 %val0, i32* %out, align 4
2919 ; GCN-LABEL: {{^}}agent_acq_rel_monotonic_ret:
2920 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2921 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2922 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2923 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
2924 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2925 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2926 ; GFX8-NEXT: buffer_wbinvl1_vol
2927 ; GFX10-NEXT: buffer_gl0_inv
2928 ; GFX10-NEXT: buffer_gl1_inv
2929 ; GFX10: .amdhsa_kernel agent_acq_rel_monotonic_ret
2930 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2931 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2932 ; GFX10-NOT: .amdhsa_memory_ordered 0
2933 define amdgpu_kernel void @agent_acq_rel_monotonic_ret(
2934 i32* %out, i32 %in, i32 %old) {
2936 %gep = getelementptr i32, i32* %out, i32 4
2937 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
2938 %val0 = extractvalue { i32, i1 } %val, 0
2939 store i32 %val0, i32* %out, align 4
2943 ; GCN-LABEL: {{^}}agent_seq_cst_monotonic_ret:
2944 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2945 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2946 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2947 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
2948 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2949 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2950 ; GFX8-NEXT: buffer_wbinvl1_vol
2951 ; GFX10-NEXT: buffer_gl0_inv
2952 ; GFX10-NEXT: buffer_gl1_inv
2953 ; GFX10: .amdhsa_kernel agent_seq_cst_monotonic_ret
2954 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2955 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2956 ; GFX10-NOT: .amdhsa_memory_ordered 0
2957 define amdgpu_kernel void @agent_seq_cst_monotonic_ret(
2958 i32* %out, i32 %in, i32 %old) {
2960 %gep = getelementptr i32, i32* %out, i32 4
2961 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
2962 %val0 = extractvalue { i32, i1 } %val, 0
2963 store i32 %val0, i32* %out, align 4
2967 ; GCN-LABEL: {{^}}agent_acquire_acquire_ret:
2968 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2969 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2970 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
2971 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2972 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2973 ; GFX8-NEXT: buffer_wbinvl1_vol
2974 ; GFX10-NEXT: buffer_gl0_inv
2975 ; GFX10-NEXT: buffer_gl1_inv
2976 ; GFX10: .amdhsa_kernel agent_acquire_acquire_ret
2977 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2978 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2979 ; GFX10-NOT: .amdhsa_memory_ordered 0
2980 define amdgpu_kernel void @agent_acquire_acquire_ret(
2981 i32* %out, i32 %in, i32 %old) {
2983 %gep = getelementptr i32, i32* %out, i32 4
2984 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
2985 %val0 = extractvalue { i32, i1 } %val, 0
2986 store i32 %val0, i32* %out, align 4
2990 ; GCN-LABEL: {{^}}agent_release_acquire_ret:
2991 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2992 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2993 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2994 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
2995 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2996 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2997 ; GFX8-NEXT: buffer_wbinvl1_vol
2998 ; GFX10-NEXT: buffer_gl0_inv
2999 ; GFX10-NEXT: buffer_gl1_inv
3000 ; GFX10: .amdhsa_kernel agent_release_acquire_ret
3001 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
3002 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
3003 ; GFX10-NOT: .amdhsa_memory_ordered 0
3004 define amdgpu_kernel void @agent_release_acquire_ret(
3005 i32* %out, i32 %in, i32 %old) {
3007 %gep = getelementptr i32, i32* %out, i32 4
3008 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire
3009 %val0 = extractvalue { i32, i1 } %val, 0
3010 store i32 %val0, i32* %out, align 4
3014 ; GCN-LABEL: {{^}}agent_acq_rel_acquire_ret:
3015 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3016 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
3017 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
3018 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
3019 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3020 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3021 ; GFX8-NEXT: buffer_wbinvl1_vol
3022 ; GFX10-NEXT: buffer_gl0_inv
3023 ; GFX10-NEXT: buffer_gl1_inv
3024 ; GFX10: .amdhsa_kernel agent_acq_rel_acquire_ret
3025 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
3026 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
3027 ; GFX10-NOT: .amdhsa_memory_ordered 0
3028 define amdgpu_kernel void @agent_acq_rel_acquire_ret(
3029 i32* %out, i32 %in, i32 %old) {
3031 %gep = getelementptr i32, i32* %out, i32 4
3032 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
3033 %val0 = extractvalue { i32, i1 } %val, 0
3034 store i32 %val0, i32* %out, align 4
3038 ; GCN-LABEL: {{^}}agent_seq_cst_acquire_ret:
3039 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3040 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
3041 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
3042 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
3043 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3044 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3045 ; GFX8-NEXT: buffer_wbinvl1_vol
3046 ; GFX10-NEXT: buffer_gl0_inv
3047 ; GFX10-NEXT: buffer_gl1_inv
3048 ; GFX10: .amdhsa_kernel agent_seq_cst_acquire_ret
3049 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
3050 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
3051 ; GFX10-NOT: .amdhsa_memory_ordered 0
3052 define amdgpu_kernel void @agent_seq_cst_acquire_ret(
3053 i32* %out, i32 %in, i32 %old) {
3055 %gep = getelementptr i32, i32* %out, i32 4
3056 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
3057 %val0 = extractvalue { i32, i1 } %val, 0
3058 store i32 %val0, i32* %out, align 4
3062 ; GCN-LABEL: {{^}}agent_seq_cst_seq_cst_ret:
3063 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3064 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
3065 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
3066 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
3067 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3068 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3069 ; GFX8-NEXT: buffer_wbinvl1_vol
3070 ; GFX10-NEXT: buffer_gl0_inv
3071 ; GFX10-NEXT: buffer_gl1_inv
3072 ; GFX10: .amdhsa_kernel agent_seq_cst_seq_cst_ret
3073 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
3074 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
3075 ; GFX10-NOT: .amdhsa_memory_ordered 0
3076 define amdgpu_kernel void @agent_seq_cst_seq_cst_ret(
3077 i32* %out, i32 %in, i32 %old) {
3079 %gep = getelementptr i32, i32* %out, i32 4
3080 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
3081 %val0 = extractvalue { i32, i1 } %val, 0
3082 store i32 %val0, i32* %out, align 4
3086 ; GCN-LABEL: {{^}}workgroup_acquire_monotonic_ret:
3087 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
3088 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
3089 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
3090 ; GFX8: s_waitcnt vmcnt(0){{$}}
3091 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3092 ; GFX10WGP-NEXT: buffer_gl0_inv
3093 ; GFX10CU: s_waitcnt vmcnt(0){{$}}
3094 ; GFX10CU-NOT: buffer_gl0_inv
3095 ; GFX8-NOT: buffer_wbinvl1_vol
3096 ; GFX10: .amdhsa_kernel workgroup_acquire_monotonic_ret
3097 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
3098 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
3099 ; GFX10-NOT: .amdhsa_memory_ordered 0
3100 define amdgpu_kernel void @workgroup_acquire_monotonic_ret(
3101 i32* %out, i32 %in, i32 %old) {
3103 %gep = getelementptr i32, i32* %out, i32 4
3104 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
3105 %val0 = extractvalue { i32, i1 } %val, 0
3106 store i32 %val0, i32* %out, align 4
3110 ; GCN-LABEL: {{^}}workgroup_acq_rel_monotonic_ret:
3111 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
3112 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3113 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
3114 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
3115 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
3116 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
3117 ; GFX8: s_waitcnt vmcnt(0){{$}}
3118 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3119 ; GFX10CU: s_waitcnt vmcnt(0){{$}}
3120 ; GFX8-NOT: buffer_wbinvl1_vol
3121 ; GFX10WGP-NEXT: buffer_gl0_inv
3122 ; GFX10CU-NOT: buffer_gl0_inv
3123 ; GFX10: .amdhsa_kernel workgroup_acq_rel_monotonic_ret
3124 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
3125 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
3126 ; GFX10-NOT: .amdhsa_memory_ordered 0
3127 define amdgpu_kernel void @workgroup_acq_rel_monotonic_ret(
3128 i32* %out, i32 %in, i32 %old) {
3130 %gep = getelementptr i32, i32* %out, i32 4
3131 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
3132 %val0 = extractvalue { i32, i1 } %val, 0
3133 store i32 %val0, i32* %out, align 4
3137 ; GCN-LABEL: {{^}}workgroup_seq_cst_monotonic_ret:
3138 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
3139 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3140 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
3141 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
3142 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
3143 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
3144 ; GFX8: s_waitcnt vmcnt(0){{$}}
3145 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3146 ; GFX10CU: s_waitcnt vmcnt(0){{$}}
3147 ; GFX8-NOT: buffer_wbinvl1_vol
3148 ; GFX10WGP-NEXT: buffer_gl0_inv
3149 ; GFX10CU-NOT: buffer_gl0_inv
3150 ; GFX10: .amdhsa_kernel workgroup_seq_cst_monotonic_ret
3151 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
3152 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
3153 ; GFX10-NOT: .amdhsa_memory_ordered 0
3154 define amdgpu_kernel void @workgroup_seq_cst_monotonic_ret(
3155 i32* %out, i32 %in, i32 %old) {
3157 %gep = getelementptr i32, i32* %out, i32 4
3158 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
3159 %val0 = extractvalue { i32, i1 } %val, 0
3160 store i32 %val0, i32* %out, align 4
3164 ; GCN-LABEL: {{^}}workgroup_acquire_acquire_ret:
3165 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
3166 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
3167 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
3168 ; GFX8: s_waitcnt vmcnt(0){{$}}
3169 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3170 ; GFX10WGP-NEXT: buffer_gl0_inv
3171 ; GFX10CU: s_waitcnt vmcnt(0){{$}}
3172 ; GFX10CU-NOT: buffer_gl0_inv
3173 ; GFX8-NOT: buffer_wbinvl1_vol
3174 ; GFX10: .amdhsa_kernel workgroup_acquire_acquire_ret
3175 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
3176 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
3177 ; GFX10-NOT: .amdhsa_memory_ordered 0
3178 define amdgpu_kernel void @workgroup_acquire_acquire_ret(
3179 i32* %out, i32 %in, i32 %old) {
3181 %gep = getelementptr i32, i32* %out, i32 4
3182 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
3183 %val0 = extractvalue { i32, i1 } %val, 0
3184 store i32 %val0, i32* %out, align 4
3188 ; GCN-LABEL: {{^}}workgroup_release_acquire_ret:
3189 ; GFX8: s_waitcnt lgkmcnt(0){{$}}
3190 ; GFX8: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
3191 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3192 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
3193 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
3194 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
3195 ; GFX10: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
3196 ; GFX8: s_waitcnt vmcnt(0){{$}}
3197 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3198 ; GFX10CU: s_waitcnt vmcnt(0){{$}}
3199 ; GFX8-NOT: buffer_wbinvl1_vol
3200 ; GFX10WGP-NEXT: buffer_gl0_inv
3201 ; GFX10CU-NOT: buffer_gl0_inv
3202 ; GFX10: .amdhsa_kernel workgroup_release_acquire_ret
3203 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
3204 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
3205 ; GFX10-NOT: .amdhsa_memory_ordered 0
3206 define amdgpu_kernel void @workgroup_release_acquire_ret(
3207 i32* %out, i32 %in, i32 %old) {
3209 %gep = getelementptr i32, i32* %out, i32 4
3210 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
3211 %val0 = extractvalue { i32, i1 } %val, 0
3212 store i32 %val0, i32* %out, align 4
3216 ; GCN-LABEL: {{^}}workgroup_acq_rel_acquire_ret:
3217 ; GFX8: s_waitcnt lgkmcnt(0){{$}}
3218 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3219 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
3220 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
3221 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
3222 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
3223 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3224 ; GFX10CU: s_waitcnt vmcnt(0){{$}}
3225 ; GFX8-NOT: buffer_wbinvl1_vol
3226 ; GFX10WGP: buffer_gl0_inv
3227 ; GFX10CU-NOT: buffer_gl0_inv
3228 ; GFX10: .amdhsa_kernel workgroup_acq_rel_acquire_ret
3229 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
3230 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
3231 ; GFX10-NOT: .amdhsa_memory_ordered 0
3232 define amdgpu_kernel void @workgroup_acq_rel_acquire_ret(
3233 i32* %out, i32 %in, i32 %old) {
3235 %gep = getelementptr i32, i32* %out, i32 4
3236 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
3237 %val0 = extractvalue { i32, i1 } %val, 0
3238 store i32 %val0, i32* %out, align 4
3242 ; GCN-LABEL: {{^}}workgroup_seq_cst_acquire_ret:
3243 ; GFX8: s_waitcnt lgkmcnt(0){{$}}
3244 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3245 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
3246 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
3247 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
3248 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
3249 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3250 ; GFX10CU: s_waitcnt vmcnt(0){{$}}
3251 ; GFX8-NOT: buffer_wbinvl1_vol
3252 ; GFX10WGP-NEXT: buffer_gl0_inv
3253 ; GFX10CU-NOT: buffer_gl0_inv
3254 ; GFX10: .amdhsa_kernel workgroup_seq_cst_acquire_ret
3255 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
3256 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
3257 ; GFX10-NOT: .amdhsa_memory_ordered 0
3258 define amdgpu_kernel void @workgroup_seq_cst_acquire_ret(
3259 i32* %out, i32 %in, i32 %old) {
3261 %gep = getelementptr i32, i32* %out, i32 4
3262 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
3263 %val0 = extractvalue { i32, i1 } %val, 0
3264 store i32 %val0, i32* %out, align 4
3268 ; GCN-LABEL: {{^}}workgroup_seq_cst_seq_cst_ret:
3269 ; GFX8: s_waitcnt lgkmcnt(0){{$}}
3270 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3271 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
3272 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
3273 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
3274 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
3275 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
3276 ; GFX10CU: s_waitcnt vmcnt(0){{$}}
3277 ; GFX8-NOT: buffer_wbinvl1_vol
3278 ; GFX10WGP: buffer_gl0_inv
3279 ; GFX10CU-NOT: buffer_gl0_inv
3280 ; GFX10: .amdhsa_kernel workgroup_seq_cst_seq_cst_ret
3281 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
3282 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
3283 ; GFX10-NOT: .amdhsa_memory_ordered 0
3284 define amdgpu_kernel void @workgroup_seq_cst_seq_cst_ret(
3285 i32* %out, i32 %in, i32 %old) {
3287 %gep = getelementptr i32, i32* %out, i32 4
3288 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
3289 %val0 = extractvalue { i32, i1 } %val, 0
3290 store i32 %val0, i32* %out, align 4