1 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s
4 ; CHECK-LABEL: {{^}}system_monotonic_monotonic
5 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
6 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
7 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
8 ; CHECK-NOT: buffer_wbinvl1_vol
9 define amdgpu_kernel void @system_monotonic_monotonic(
10 i32 addrspace(4)* %out, i32 %in, i32 %old) {
12 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
13 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in monotonic monotonic
17 ; CHECK-LABEL: {{^}}system_acquire_monotonic
18 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
19 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
20 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
21 ; CHECK-NEXT: buffer_wbinvl1_vol
22 define amdgpu_kernel void @system_acquire_monotonic(
23 i32 addrspace(4)* %out, i32 %in, i32 %old) {
25 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
26 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acquire monotonic
30 ; CHECK-LABEL: {{^}}system_release_monotonic
31 ; CHECK: s_waitcnt vmcnt(0){{$}}
32 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
33 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
34 ; CHECK-NOT: buffer_wbinvl1_vol
35 define amdgpu_kernel void @system_release_monotonic(
36 i32 addrspace(4)* %out, i32 %in, i32 %old) {
38 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
39 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in release monotonic
43 ; CHECK-LABEL: {{^}}system_acq_rel_monotonic
44 ; CHECK: s_waitcnt vmcnt(0){{$}}
45 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
46 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
47 ; CHECK-NEXT: buffer_wbinvl1_vol
48 define amdgpu_kernel void @system_acq_rel_monotonic(
49 i32 addrspace(4)* %out, i32 %in, i32 %old) {
51 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
52 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acq_rel monotonic
56 ; CHECK-LABEL: {{^}}system_seq_cst_monotonic
57 ; CHECK: s_waitcnt vmcnt(0){{$}}
58 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
59 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
60 ; CHECK-NEXT: buffer_wbinvl1_vol
61 define amdgpu_kernel void @system_seq_cst_monotonic(
62 i32 addrspace(4)* %out, i32 %in, i32 %old) {
64 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
65 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst monotonic
69 ; CHECK-LABEL: {{^}}system_acquire_acquire
70 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
71 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
72 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
73 ; CHECK-NEXT: buffer_wbinvl1_vol
74 define amdgpu_kernel void @system_acquire_acquire(
75 i32 addrspace(4)* %out, i32 %in, i32 %old) {
77 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
78 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acquire acquire
82 ; CHECK-LABEL: {{^}}system_release_acquire
83 ; CHECK: s_waitcnt vmcnt(0){{$}}
84 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
85 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
86 ; CHECK-NEXT: buffer_wbinvl1_vol
87 define amdgpu_kernel void @system_release_acquire(
88 i32 addrspace(4)* %out, i32 %in, i32 %old) {
90 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
91 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in release acquire
95 ; CHECK-LABEL: {{^}}system_acq_rel_acquire
96 ; CHECK: s_waitcnt vmcnt(0){{$}}
97 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
98 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
99 ; CHECK-NEXT: buffer_wbinvl1_vol
100 define amdgpu_kernel void @system_acq_rel_acquire(
101 i32 addrspace(4)* %out, i32 %in, i32 %old) {
103 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
104 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acq_rel acquire
108 ; CHECK-LABEL: {{^}}system_seq_cst_acquire
109 ; CHECK: s_waitcnt vmcnt(0){{$}}
110 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
111 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
112 ; CHECK-NEXT: buffer_wbinvl1_vol
113 define amdgpu_kernel void @system_seq_cst_acquire(
114 i32 addrspace(4)* %out, i32 %in, i32 %old) {
116 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
117 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst acquire
121 ; CHECK-LABEL: {{^}}system_seq_cst_seq_cst
122 ; CHECK: s_waitcnt vmcnt(0){{$}}
123 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
124 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
125 ; CHECK-NEXT: buffer_wbinvl1_vol
126 define amdgpu_kernel void @system_seq_cst_seq_cst(
127 i32 addrspace(4)* %out, i32 %in, i32 %old) {
129 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
130 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst
134 ; CHECK-LABEL: {{^}}singlethread_monotonic_monotonic
135 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
136 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
137 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
138 ; CHECK-NOT: buffer_wbinvl1_vol
139 define amdgpu_kernel void @singlethread_monotonic_monotonic(
140 i32 addrspace(4)* %out, i32 %in, i32 %old) {
142 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
143 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
147 ; CHECK-LABEL: {{^}}singlethread_acquire_monotonic
148 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
149 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
150 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
151 ; CHECK-NOT: buffer_wbinvl1_vol
152 define amdgpu_kernel void @singlethread_acquire_monotonic(
153 i32 addrspace(4)* %out, i32 %in, i32 %old) {
155 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
156 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
160 ; CHECK-LABEL: {{^}}singlethread_release_monotonic
161 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
162 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
163 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
164 ; CHECK-NOT: buffer_wbinvl1_vol
165 define amdgpu_kernel void @singlethread_release_monotonic(
166 i32 addrspace(4)* %out, i32 %in, i32 %old) {
168 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
169 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
173 ; CHECK-LABEL: {{^}}singlethread_acq_rel_monotonic
174 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
175 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
176 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
177 ; CHECK-NOT: buffer_wbinvl1_vol
178 define amdgpu_kernel void @singlethread_acq_rel_monotonic(
179 i32 addrspace(4)* %out, i32 %in, i32 %old) {
181 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
182 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
186 ; CHECK-LABEL: {{^}}singlethread_seq_cst_monotonic
187 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
188 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
189 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
190 ; CHECK-NOT: buffer_wbinvl1_vol
191 define amdgpu_kernel void @singlethread_seq_cst_monotonic(
192 i32 addrspace(4)* %out, i32 %in, i32 %old) {
194 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
195 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
199 ; CHECK-LABEL: {{^}}singlethread_acquire_acquire
200 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
201 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
202 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
203 ; CHECK-NOT: buffer_wbinvl1_vol
204 define amdgpu_kernel void @singlethread_acquire_acquire(
205 i32 addrspace(4)* %out, i32 %in, i32 %old) {
207 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
208 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
212 ; CHECK-LABEL: {{^}}singlethread_release_acquire
213 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
214 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
215 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
216 ; CHECK-NOT: buffer_wbinvl1_vol
217 define amdgpu_kernel void @singlethread_release_acquire(
218 i32 addrspace(4)* %out, i32 %in, i32 %old) {
220 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
221 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
225 ; CHECK-LABEL: {{^}}singlethread_acq_rel_acquire
226 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
227 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
228 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
229 ; CHECK-NOT: buffer_wbinvl1_vol
230 define amdgpu_kernel void @singlethread_acq_rel_acquire(
231 i32 addrspace(4)* %out, i32 %in, i32 %old) {
233 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
234 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
238 ; CHECK-LABEL: {{^}}singlethread_seq_cst_acquire
239 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
240 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
241 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
242 ; CHECK-NOT: buffer_wbinvl1_vol
243 define amdgpu_kernel void @singlethread_seq_cst_acquire(
244 i32 addrspace(4)* %out, i32 %in, i32 %old) {
246 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
247 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
251 ; CHECK-LABEL: {{^}}singlethread_seq_cst_seq_cst
252 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
253 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
254 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
255 ; CHECK-NOT: buffer_wbinvl1_vol
256 define amdgpu_kernel void @singlethread_seq_cst_seq_cst(
257 i32 addrspace(4)* %out, i32 %in, i32 %old) {
259 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
260 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
264 ; CHECK-LABEL: {{^}}agent_monotonic_monotonic
265 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
266 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
267 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
268 ; CHECK-NOT: buffer_wbinvl1_vol
269 define amdgpu_kernel void @agent_monotonic_monotonic(
270 i32 addrspace(4)* %out, i32 %in, i32 %old) {
272 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
273 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
277 ; CHECK-LABEL: {{^}}agent_acquire_monotonic
278 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
279 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
280 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
281 ; CHECK-NEXT: buffer_wbinvl1_vol
282 define amdgpu_kernel void @agent_acquire_monotonic(
283 i32 addrspace(4)* %out, i32 %in, i32 %old) {
285 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
286 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
290 ; CHECK-LABEL: {{^}}agent_release_monotonic
291 ; CHECK: s_waitcnt vmcnt(0){{$}}
292 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
293 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
294 ; CHECK-NOT: buffer_wbinvl1_vol
295 define amdgpu_kernel void @agent_release_monotonic(
296 i32 addrspace(4)* %out, i32 %in, i32 %old) {
298 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
299 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") release monotonic
303 ; CHECK-LABEL: {{^}}agent_acq_rel_monotonic
304 ; CHECK: s_waitcnt vmcnt(0){{$}}
305 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
306 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
307 ; CHECK-NEXT: buffer_wbinvl1_vol
308 define amdgpu_kernel void @agent_acq_rel_monotonic(
309 i32 addrspace(4)* %out, i32 %in, i32 %old) {
311 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
312 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
316 ; CHECK-LABEL: {{^}}agent_seq_cst_monotonic
317 ; CHECK: s_waitcnt vmcnt(0){{$}}
318 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
319 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
320 ; CHECK-NEXT: buffer_wbinvl1_vol
321 define amdgpu_kernel void @agent_seq_cst_monotonic(
322 i32 addrspace(4)* %out, i32 %in, i32 %old) {
324 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
325 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
329 ; CHECK-LABEL: {{^}}agent_acquire_acquire
330 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
331 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
332 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
333 ; CHECK-NEXT: buffer_wbinvl1_vol
334 define amdgpu_kernel void @agent_acquire_acquire(
335 i32 addrspace(4)* %out, i32 %in, i32 %old) {
337 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
338 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
342 ; CHECK-LABEL: {{^}}agent_release_acquire
343 ; CHECK: s_waitcnt vmcnt(0){{$}}
344 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
345 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
346 ; CHECK-NEXT: buffer_wbinvl1_vol
347 define amdgpu_kernel void @agent_release_acquire(
348 i32 addrspace(4)* %out, i32 %in, i32 %old) {
350 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
351 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") release acquire
355 ; CHECK-LABEL: {{^}}agent_acq_rel_acquire
356 ; CHECK: s_waitcnt vmcnt(0){{$}}
357 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
358 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
359 ; CHECK-NEXT: buffer_wbinvl1_vol
360 define amdgpu_kernel void @agent_acq_rel_acquire(
361 i32 addrspace(4)* %out, i32 %in, i32 %old) {
363 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
364 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
368 ; CHECK-LABEL: {{^}}agent_seq_cst_acquire
369 ; CHECK: s_waitcnt vmcnt(0){{$}}
370 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
371 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
372 ; CHECK-NEXT: buffer_wbinvl1_vol
373 define amdgpu_kernel void @agent_seq_cst_acquire(
374 i32 addrspace(4)* %out, i32 %in, i32 %old) {
376 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
377 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
381 ; CHECK-LABEL: {{^}}agent_seq_cst_seq_cst
382 ; CHECK: s_waitcnt vmcnt(0){{$}}
383 ; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
384 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
385 ; CHECK-NEXT: buffer_wbinvl1_vol
386 define amdgpu_kernel void @agent_seq_cst_seq_cst(
387 i32 addrspace(4)* %out, i32 %in, i32 %old) {
389 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
390 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
394 ; CHECK-LABEL: {{^}}workgroup_monotonic_monotonic
395 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
396 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
397 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
398 ; CHECK-NOT: buffer_wbinvl1_vol
399 define amdgpu_kernel void @workgroup_monotonic_monotonic(
400 i32 addrspace(4)* %out, i32 %in, i32 %old) {
402 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
403 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic
407 ; CHECK-LABEL: {{^}}workgroup_acquire_monotonic
408 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
409 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
410 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
411 ; CHECK-NOT: buffer_wbinvl1_vol
412 define amdgpu_kernel void @workgroup_acquire_monotonic(
413 i32 addrspace(4)* %out, i32 %in, i32 %old) {
415 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
416 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
420 ; CHECK-LABEL: {{^}}workgroup_release_monotonic
421 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
422 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
423 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
424 ; CHECK-NOT: buffer_wbinvl1_vol
425 define amdgpu_kernel void @workgroup_release_monotonic(
426 i32 addrspace(4)* %out, i32 %in, i32 %old) {
428 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
429 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic
433 ; CHECK-LABEL: {{^}}workgroup_acq_rel_monotonic
434 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
435 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
436 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
437 ; CHECK-NOT: buffer_wbinvl1_vol
438 define amdgpu_kernel void @workgroup_acq_rel_monotonic(
439 i32 addrspace(4)* %out, i32 %in, i32 %old) {
441 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
442 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
446 ; CHECK-LABEL: {{^}}workgroup_seq_cst_monotonic
447 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
448 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
449 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
450 ; CHECK-NOT: buffer_wbinvl1_vol
451 define amdgpu_kernel void @workgroup_seq_cst_monotonic(
452 i32 addrspace(4)* %out, i32 %in, i32 %old) {
454 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
455 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
459 ; CHECK-LABEL: {{^}}workgroup_acquire_acquire
460 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
461 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
462 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
463 ; CHECK-NOT: buffer_wbinvl1_vol
464 define amdgpu_kernel void @workgroup_acquire_acquire(
465 i32 addrspace(4)* %out, i32 %in, i32 %old) {
467 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
468 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
472 ; CHECK-LABEL: {{^}}workgroup_release_acquire
473 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
474 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
475 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
476 ; CHECK-NOT: buffer_wbinvl1_vol
477 define amdgpu_kernel void @workgroup_release_acquire(
478 i32 addrspace(4)* %out, i32 %in, i32 %old) {
480 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
481 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
485 ; CHECK-LABEL: {{^}}workgroup_acq_rel_acquire
486 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
487 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
488 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
489 ; CHECK-NOT: buffer_wbinvl1_vol
490 define amdgpu_kernel void @workgroup_acq_rel_acquire(
491 i32 addrspace(4)* %out, i32 %in, i32 %old) {
493 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
494 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
498 ; CHECK-LABEL: {{^}}workgroup_seq_cst_acquire
499 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
500 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
501 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
502 ; CHECK-NOT: buffer_wbinvl1_vol
503 define amdgpu_kernel void @workgroup_seq_cst_acquire(
504 i32 addrspace(4)* %out, i32 %in, i32 %old) {
506 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
507 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
511 ; CHECK-LABEL: {{^}}workgroup_seq_cst_seq_cst
512 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
513 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
514 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
515 ; CHECK-NOT: buffer_wbinvl1_vol
516 define amdgpu_kernel void @workgroup_seq_cst_seq_cst(
517 i32 addrspace(4)* %out, i32 %in, i32 %old) {
519 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
520 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
524 ; CHECK-LABEL: {{^}}wavefront_monotonic_monotonic
525 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
526 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
527 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
528 ; CHECK-NOT: buffer_wbinvl1_vol
529 define amdgpu_kernel void @wavefront_monotonic_monotonic(
530 i32 addrspace(4)* %out, i32 %in, i32 %old) {
532 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
533 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic
537 ; CHECK-LABEL: {{^}}wavefront_acquire_monotonic
538 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
539 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
540 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
541 ; CHECK-NOT: buffer_wbinvl1_vol
542 define amdgpu_kernel void @wavefront_acquire_monotonic(
543 i32 addrspace(4)* %out, i32 %in, i32 %old) {
545 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
546 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic
550 ; CHECK-LABEL: {{^}}wavefront_release_monotonic
551 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
552 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
553 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
554 ; CHECK-NOT: buffer_wbinvl1_vol
555 define amdgpu_kernel void @wavefront_release_monotonic(
556 i32 addrspace(4)* %out, i32 %in, i32 %old) {
558 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
559 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic
563 ; CHECK-LABEL: {{^}}wavefront_acq_rel_monotonic
564 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
565 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
566 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
567 ; CHECK-NOT: buffer_wbinvl1_vol
568 define amdgpu_kernel void @wavefront_acq_rel_monotonic(
569 i32 addrspace(4)* %out, i32 %in, i32 %old) {
571 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
572 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic
576 ; CHECK-LABEL: {{^}}wavefront_seq_cst_monotonic
577 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
578 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
579 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
580 ; CHECK-NOT: buffer_wbinvl1_vol
581 define amdgpu_kernel void @wavefront_seq_cst_monotonic(
582 i32 addrspace(4)* %out, i32 %in, i32 %old) {
584 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
585 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic
589 ; CHECK-LABEL: {{^}}wavefront_acquire_acquire
590 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
591 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
592 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
593 ; CHECK-NOT: buffer_wbinvl1_vol
594 define amdgpu_kernel void @wavefront_acquire_acquire(
595 i32 addrspace(4)* %out, i32 %in, i32 %old) {
597 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
598 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire
602 ; CHECK-LABEL: {{^}}wavefront_release_acquire
603 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
604 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
605 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
606 ; CHECK-NOT: buffer_wbinvl1_vol
607 define amdgpu_kernel void @wavefront_release_acquire(
608 i32 addrspace(4)* %out, i32 %in, i32 %old) {
610 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
611 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire
615 ; CHECK-LABEL: {{^}}wavefront_acq_rel_acquire
616 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
617 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
618 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
619 ; CHECK-NOT: buffer_wbinvl1_vol
620 define amdgpu_kernel void @wavefront_acq_rel_acquire(
621 i32 addrspace(4)* %out, i32 %in, i32 %old) {
623 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
624 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire
628 ; CHECK-LABEL: {{^}}wavefront_seq_cst_acquire
629 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
630 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
631 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
632 ; CHECK-NOT: buffer_wbinvl1_vol
633 define amdgpu_kernel void @wavefront_seq_cst_acquire(
634 i32 addrspace(4)* %out, i32 %in, i32 %old) {
636 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
637 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire
641 ; CHECK-LABEL: {{^}}wavefront_seq_cst_seq_cst
642 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
643 ; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
644 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
645 ; CHECK-NOT: buffer_wbinvl1_vol
646 define amdgpu_kernel void @wavefront_seq_cst_seq_cst(
647 i32 addrspace(4)* %out, i32 %in, i32 %old) {
649 %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
650 %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst