[InstCombine] Signed saturation patterns
[llvm-complete.git] / test / CodeGen / AMDGPU / memory-legalizer-atomic-rmw.ll
blob21612e85f3e638dfce42507a58c3fef09d69c10f
1 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10WGP %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10CU %s
6 ; GCN-LABEL: {{^}}system_one_as_monotonic:
7 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
8 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
9 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
10 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
11 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
12 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
13 ; GFX10:         .amdhsa_kernel system_one_as_monotonic
14 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
15 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
16 ; GFX10-NOT:     .amdhsa_memory_ordered 0
17 define amdgpu_kernel void @system_one_as_monotonic(
18     i32* %out, i32 %in) {
19 entry:
20   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") monotonic
21   ret void
24 ; GCN-LABEL: {{^}}system_one_as_acquire:
25 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
26 ; GCN-NOT:    s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
27 ; GCN:        flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
28 ; GFX8-NEXT:  s_waitcnt vmcnt(0){{$}}
29 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
30 ; GFX8-NEXT:  buffer_wbinvl1_vol
31 ; GFX10-NEXT: buffer_gl0_inv
32 ; GFX10-NEXT: buffer_gl1_inv
33 ; GFX10:         .amdhsa_kernel system_one_as_acquire
34 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
35 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
36 ; GFX10-NOT:     .amdhsa_memory_ordered 0
37 define amdgpu_kernel void @system_one_as_acquire(
38     i32* %out, i32 %in) {
39 entry:
40   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acquire
41   ret void
44 ; GCN-LABEL: {{^}}system_one_as_release:
45 ; GCN:        s_waitcnt vmcnt(0){{$}}
46 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
47 ; GCN-NEXT:   flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
48 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
49 ; GCN-NOT:    s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
50 ; GCN-NOT:    buffer_{{wbinvl1_vol|gl._inv}}
51 ; GFX10:         .amdhsa_kernel system_one_as_release
52 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
53 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
54 ; GFX10-NOT:     .amdhsa_memory_ordered 0
55 define amdgpu_kernel void @system_one_as_release(
56     i32* %out, i32 %in) {
57 entry:
58   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") release
59   ret void
62 ; GCN-LABEL: {{^}}system_one_as_acq_rel:
63 ; GCN:         s_waitcnt vmcnt(0){{$}}
64 ; GFX10-NEXT:  s_waitcnt_vscnt null, 0x0{{$}}
65 ; GCN-NEXT:    flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
66 ; GFX8-NEXT:   s_waitcnt vmcnt(0){{$}}
67 ; GFX10-NEXT:  s_waitcnt_vscnt null, 0x0{{$}}
68 ; GFX8-NEXT:   buffer_wbinvl1_vol
69 ; GFX10-NEXT:  buffer_gl0_inv
70 ; GFX10-NEXT:  buffer_gl1_inv
71 ; GFX10:         .amdhsa_kernel system_one_as_acq_rel
72 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
73 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
74 ; GFX10-NOT:     .amdhsa_memory_ordered 0
75 define amdgpu_kernel void @system_one_as_acq_rel(
76     i32* %out, i32 %in) {
77 entry:
78   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acq_rel
79   ret void
82 ; GCN-LABEL: {{^}}system_one_as_seq_cst:
83 ; GCN:        s_waitcnt vmcnt(0){{$}}
84 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
85 ; GCN-NEXT:   flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
86 ; GFX8-NEXT:  s_waitcnt vmcnt(0){{$}}
87 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
88 ; GFX8-NEXT:  buffer_wbinvl1_vol
89 ; GFX10-NEXT: buffer_gl0_inv
90 ; GFX10-NEXT: buffer_gl1_inv
91 ; GFX10:         .amdhsa_kernel system_one_as_seq_cst
92 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
93 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
94 ; GFX10-NOT:     .amdhsa_memory_ordered 0
95 define amdgpu_kernel void @system_one_as_seq_cst(
96     i32* %out, i32 %in) {
97 entry:
98   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") seq_cst
99   ret void
102 ; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
103 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
104 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
105 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
106 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
107 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
108 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
109 ; GFX10:         .amdhsa_kernel singlethread_one_as_monotonic
110 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
111 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
112 ; GFX10-NOT:     .amdhsa_memory_ordered 0
113 define amdgpu_kernel void @singlethread_one_as_monotonic(
114     i32* %out, i32 %in) {
115 entry:
116   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") monotonic
117   ret void
120 ; GCN-LABEL: {{^}}singlethread_one_as_acquire:
121 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
122 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
123 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
124 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
125 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
126 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
127 ; GFX10:         .amdhsa_kernel singlethread_one_as_acquire
128 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
129 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
130 ; GFX10-NOT:     .amdhsa_memory_ordered 0
131 define amdgpu_kernel void @singlethread_one_as_acquire(
132     i32* %out, i32 %in) {
133 entry:
134   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acquire
135   ret void
138 ; GCN-LABEL: {{^}}singlethread_one_as_release:
139 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
140 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
141 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
142 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
143 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
144 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
145 ; GFX10:         .amdhsa_kernel singlethread_one_as_release
146 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
147 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
148 ; GFX10-NOT:     .amdhsa_memory_ordered 0
149 define amdgpu_kernel void @singlethread_one_as_release(
150     i32* %out, i32 %in) {
151 entry:
152   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") release
153   ret void
156 ; GCN-LABEL: {{^}}singlethread_one_as_acq_rel:
157 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
158 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
159 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
160 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
161 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
162 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
163 ; GFX10:         .amdhsa_kernel singlethread_one_as_acq_rel
164 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
165 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
166 ; GFX10-NOT:     .amdhsa_memory_ordered 0
167 define amdgpu_kernel void @singlethread_one_as_acq_rel(
168     i32* %out, i32 %in) {
169 entry:
170   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acq_rel
171   ret void
174 ; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
175 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
176 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
177 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
178 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
179 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
180 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
181 ; GFX10:         .amdhsa_kernel singlethread_one_as_seq_cst
182 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
183 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
184 ; GFX10-NOT:     .amdhsa_memory_ordered 0
185 define amdgpu_kernel void @singlethread_one_as_seq_cst(
186     i32* %out, i32 %in) {
187 entry:
188   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") seq_cst
189   ret void
192 ; GCN-LABEL: {{^}}agent_one_as_monotonic:
193 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
194 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
195 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
196 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
197 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
198 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
199 ; GFX10:         .amdhsa_kernel agent_one_as_monotonic
200 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
201 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
202 ; GFX10-NOT:     .amdhsa_memory_ordered 0
203 define amdgpu_kernel void @agent_one_as_monotonic(
204     i32* %out, i32 %in) {
205 entry:
206   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") monotonic
207   ret void
210 ; GCN-LABEL: {{^}}agent_one_as_acquire:
211 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
212 ; GCN-NOT:    s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
213 ; GCN:        flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
214 ; GFX8-NEXT:  s_waitcnt vmcnt(0){{$}}
215 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
216 ; GFX8-NEXT:  buffer_wbinvl1_vol
217 ; GFX10-NEXT: buffer_gl0_inv
218 ; GFX10-NEXT: buffer_gl1_inv
219 ; GFX10:         .amdhsa_kernel agent_one_as_acquire
220 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
221 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
222 ; GFX10-NOT:     .amdhsa_memory_ordered 0
223 define amdgpu_kernel void @agent_one_as_acquire(
224     i32* %out, i32 %in) {
225 entry:
226   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acquire
227   ret void
230 ; GCN-LABEL: {{^}}agent_one_as_release:
231 ; GCN:        s_waitcnt vmcnt(0){{$}}
232 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
233 ; GCN-NEXT:   flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
234 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
235 ; GCN-NOT:    s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
236 ; GCN-NOT:    buffer_{{wbinvl1_vol|gl._inv}}
237 ; GFX10:         .amdhsa_kernel agent_one_as_release
238 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
239 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
240 ; GFX10-NOT:     .amdhsa_memory_ordered 0
241 define amdgpu_kernel void @agent_one_as_release(
242     i32* %out, i32 %in) {
243 entry:
244   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") release
245   ret void
248 ; GCN-LABEL: {{^}}agent_one_as_acq_rel:
249 ; GCN:        s_waitcnt vmcnt(0){{$}}
250 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
251 ; GCN-NEXT:   flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
252 ; GFX8-NEXT:  s_waitcnt vmcnt(0){{$}}
253 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
254 ; GFX8-NEXT:  buffer_wbinvl1_vol
255 ; GFX10-NEXT: buffer_gl0_inv
256 ; GFX10-NEXT: buffer_gl1_inv
257 ; GFX10:         .amdhsa_kernel agent_one_as_acq_rel
258 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
259 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
260 ; GFX10-NOT:     .amdhsa_memory_ordered 0
261 define amdgpu_kernel void @agent_one_as_acq_rel(
262     i32* %out, i32 %in) {
263 entry:
264   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acq_rel
265   ret void
268 ; GCN-LABEL: {{^}}agent_one_as_seq_cst:
269 ; GCN:        s_waitcnt vmcnt(0){{$}}
270 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
271 ; GCN-NEXT:   flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
272 ; GFX8-NEXT:  s_waitcnt vmcnt(0){{$}}
273 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
274 ; GFX8-NEXT:  buffer_wbinvl1_vol
275 ; GFX10-NEXT: buffer_gl0_inv
276 ; GFX10-NEXT: buffer_gl1_inv
277 ; GFX10:         .amdhsa_kernel agent_one_as_seq_cst
278 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
279 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
280 ; GFX10-NOT:     .amdhsa_memory_ordered 0
281 define amdgpu_kernel void @agent_one_as_seq_cst(
282     i32* %out, i32 %in) {
283 entry:
284   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") seq_cst
285   ret void
288 ; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
289 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
290 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
291 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
292 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
293 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
294 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
295 ; GFX10:         .amdhsa_kernel workgroup_one_as_monotonic
296 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
297 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
298 ; GFX10-NOT:     .amdhsa_memory_ordered 0
299 define amdgpu_kernel void @workgroup_one_as_monotonic(
300     i32* %out, i32 %in) {
301 entry:
302   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") monotonic
303   ret void
306 ; GCN-LABEL:     {{^}}workgroup_one_as_acquire:
307 ; GCN-NOT:       s_waitcnt vmcnt(0){{$}}
308 ; GCN-NOT:       s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
309 ; GCN:           flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
310 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
311 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
312 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
313 ; GFX8-NOT:      buffer_wbinvl1_vol
314 ; GFX10WGP-NEXT: buffer_gl0_inv
315 ; GFX10CU-NOT:   buffer_gl0_inv
316 ; GFX10:         .amdhsa_kernel workgroup_one_as_acquire
317 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
318 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
319 ; GFX10-NOT:     .amdhsa_memory_ordered 0
320 define amdgpu_kernel void @workgroup_one_as_acquire(
321     i32* %out, i32 %in) {
322 entry:
323   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acquire
324   ret void
327 ; GCN-LABEL:     {{^}}workgroup_one_as_release:
328 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
329 ; GFX10WGP:      s_waitcnt vmcnt(0){{$}}
330 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
331 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
332 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
333 ; GCN:           flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
334 ; GCN-NOT:       s_waitcnt vmcnt(0){{$}}
335 ; GCN-NOT:       s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
336 ; GCN-NOT:       buffer_{{wbinvl1_vol|gl._inv}}
337 ; GFX10:         .amdhsa_kernel workgroup_one_as_release
338 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
339 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
340 ; GFX10-NOT:     .amdhsa_memory_ordered 0
341 define amdgpu_kernel void @workgroup_one_as_release(
342     i32* %out, i32 %in) {
343 entry:
344   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") release
345   ret void
348 ; GCN-LABEL:     {{^}}workgroup_one_as_acq_rel:
349 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
350 ; GFX10WGP:      s_waitcnt vmcnt(0){{$}}
351 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
352 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
353 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
354 ; GCN:           flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
355 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
356 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
357 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
358 ; GFX8-NOT:      buffer_wbinvl1_vol
359 ; GFX10WGP-NEXT: buffer_gl0_inv
360 ; GFX10CU-NOT:   buffer_gl0_inv
361 ; GFX10:         .amdhsa_kernel workgroup_one_as_acq_rel
362 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
363 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
364 ; GFX10-NOT:     .amdhsa_memory_ordered 0
365 define amdgpu_kernel void @workgroup_one_as_acq_rel(
366     i32* %out, i32 %in) {
367 entry:
368   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acq_rel
369   ret void
372 ; GCN-LABEL:     {{^}}workgroup_one_as_seq_cst:
373 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
374 ; GFX10WGP:      s_waitcnt vmcnt(0){{$}}
375 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
376 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
377 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
378 ; GCN:           flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
379 ; GCN-NOT:       s_waitcnt vmcnt(0){{$}}
380 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
381 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
382 ; GFX8-NOT:      buffer_wbinvl1_vol
383 ; GFX10WGP-NEXT: buffer_gl0_inv
384 ; GFX10CU-NOT:   buffer_gl0_inv
385 ; GFX10:         .amdhsa_kernel workgroup_one_as_seq_cst
386 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
387 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
388 ; GFX10-NOT:     .amdhsa_memory_ordered 0
389 define amdgpu_kernel void @workgroup_one_as_seq_cst(
390     i32* %out, i32 %in) {
391 entry:
392   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") seq_cst
393   ret void
396 ; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
397 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
398 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
399 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
400 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
401 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
402 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
403 ; GFX10:         .amdhsa_kernel wavefront_one_as_monotonic
404 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
405 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
406 ; GFX10-NOT:     .amdhsa_memory_ordered 0
407 define amdgpu_kernel void @wavefront_one_as_monotonic(
408     i32* %out, i32 %in) {
409 entry:
410   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") monotonic
411   ret void
414 ; GCN-LABEL: {{^}}wavefront_one_as_acquire:
415 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
416 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
417 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
418 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
419 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
420 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
421 ; GFX10:         .amdhsa_kernel wavefront_one_as_acquire
422 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
423 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
424 ; GFX10-NOT:     .amdhsa_memory_ordered 0
425 define amdgpu_kernel void @wavefront_one_as_acquire(
426     i32* %out, i32 %in) {
427 entry:
428   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acquire
429   ret void
432 ; GCN-LABEL: {{^}}wavefront_one_as_release:
433 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
434 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
435 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
436 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
437 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
438 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
439 ; GFX10:         .amdhsa_kernel wavefront_one_as_release
440 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
441 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
442 ; GFX10-NOT:     .amdhsa_memory_ordered 0
443 define amdgpu_kernel void @wavefront_one_as_release(
444     i32* %out, i32 %in) {
445 entry:
446   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") release
447   ret void
450 ; GCN-LABEL: {{^}}wavefront_one_as_acq_rel:
451 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
452 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
453 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
454 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
455 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
456 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
457 ; GFX10:         .amdhsa_kernel wavefront_one_as_acq_rel
458 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
459 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
460 ; GFX10-NOT:     .amdhsa_memory_ordered 0
461 define amdgpu_kernel void @wavefront_one_as_acq_rel(
462     i32* %out, i32 %in) {
463 entry:
464   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acq_rel
465   ret void
468 ; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
469 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
470 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
471 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
472 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
473 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
474 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
475 ; GFX10:         .amdhsa_kernel wavefront_one_as_seq_cst
476 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
477 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
478 ; GFX10-NOT:     .amdhsa_memory_ordered 0
479 define amdgpu_kernel void @wavefront_one_as_seq_cst(
480     i32* %out, i32 %in) {
481 entry:
482   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") seq_cst
483   ret void
486 ; GCN-LABEL: {{^}}system_one_as_acquire_ret:
487 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
488 ; GCN-NOT:    s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
489 ; GCN:        flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
490 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
491 ; GFX8-NEXT:  buffer_wbinvl1_vol
492 ; GFX10-NEXT: buffer_gl0_inv
493 ; GFX10-NEXT: buffer_gl1_inv
494 ; GFX10:         .amdhsa_kernel system_one_as_acquire_ret
495 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
496 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
497 ; GFX10-NOT:     .amdhsa_memory_ordered 0
498 define amdgpu_kernel void @system_one_as_acquire_ret(
499     i32* %out, i32 %in) {
500 entry:
501   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acquire
502   store i32 %val, i32* %out, align 4
503   ret void
506 ; GCN-LABEL: {{^}}system_one_as_acq_rel_ret:
507 ; GCN:         s_waitcnt vmcnt(0){{$}}
508 ; GFX10-NEXT:  s_waitcnt_vscnt null, 0x0{{$}}
509 ; GCN-NEXT:    flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
510 ; GCN-NEXT:    s_waitcnt vmcnt(0){{$}}
511 ; GFX8-NEXT:   buffer_wbinvl1_vol
512 ; GFX10-NEXT:  buffer_gl0_inv
513 ; GFX10-NEXT:  buffer_gl1_inv
514 ; GFX10:         .amdhsa_kernel system_one_as_acq_rel_ret
515 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
516 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
517 ; GFX10-NOT:     .amdhsa_memory_ordered 0
518 define amdgpu_kernel void @system_one_as_acq_rel_ret(
519     i32* %out, i32 %in) {
520 entry:
521   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acq_rel
522   store i32 %val, i32* %out, align 4
523   ret void
526 ; GCN-LABEL: {{^}}system_one_as_seq_cst_ret:
527 ; GCN:        s_waitcnt vmcnt(0){{$}}
528 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
529 ; GCN-NEXT:   flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
530 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
531 ; GFX8-NEXT:  buffer_wbinvl1_vol
532 ; GFX10-NEXT: buffer_gl0_inv
533 ; GFX10-NEXT: buffer_gl1_inv
534 ; GFX10:         .amdhsa_kernel system_one_as_seq_cst_ret
535 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
536 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
537 ; GFX10-NOT:     .amdhsa_memory_ordered 0
538 define amdgpu_kernel void @system_one_as_seq_cst_ret(
539     i32* %out, i32 %in) {
540 entry:
541   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") seq_cst
542   store i32 %val, i32* %out, align 4
543   ret void
546 ; GCN-LABEL: {{^}}agent_one_as_acquire_ret:
547 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
548 ; GCN-NOT:    s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
549 ; GCN:        flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
550 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
551 ; GFX8-NEXT:  buffer_wbinvl1_vol
552 ; GFX10-NEXT: buffer_gl0_inv
553 ; GFX10-NEXT: buffer_gl1_inv
554 ; GFX10:         .amdhsa_kernel agent_one_as_acquire_ret
555 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
556 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
557 ; GFX10-NOT:     .amdhsa_memory_ordered 0
558 define amdgpu_kernel void @agent_one_as_acquire_ret(
559     i32* %out, i32 %in) {
560 entry:
561   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acquire
562   store i32 %val, i32* %out, align 4
563   ret void
566 ; GCN-LABEL: {{^}}agent_one_as_acq_rel_ret:
567 ; GCN:        s_waitcnt vmcnt(0){{$}}
568 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
569 ; GCN-NEXT:   flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
570 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
571 ; GFX8-NEXT:  buffer_wbinvl1_vol
572 ; GFX10-NEXT: buffer_gl0_inv
573 ; GFX10-NEXT: buffer_gl1_inv
574 ; GFX10:         .amdhsa_kernel agent_one_as_acq_rel_ret
575 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
576 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
577 ; GFX10-NOT:     .amdhsa_memory_ordered 0
578 define amdgpu_kernel void @agent_one_as_acq_rel_ret(
579     i32* %out, i32 %in) {
580 entry:
581   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acq_rel
582   store i32 %val, i32* %out, align 4
583   ret void
586 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_ret:
587 ; GCN:        s_waitcnt vmcnt(0){{$}}
588 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
589 ; GCN-NEXT:   flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
590 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
591 ; GFX8-NEXT:  buffer_wbinvl1_vol
592 ; GFX10-NEXT: buffer_gl0_inv
593 ; GFX10-NEXT: buffer_gl1_inv
594 ; GFX10:         .amdhsa_kernel agent_one_as_seq_cst_ret
595 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
596 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
597 ; GFX10-NOT:     .amdhsa_memory_ordered 0
598 define amdgpu_kernel void @agent_one_as_seq_cst_ret(
599     i32* %out, i32 %in) {
600 entry:
601   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") seq_cst
602   store i32 %val, i32* %out, align 4
603   ret void
606 ; GCN-LABEL:     {{^}}workgroup_one_as_acquire_ret:
607 ; GCN-NOT:       s_waitcnt vmcnt(0){{$}}
608 ; GCN-NOT:       s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
609 ; GCN:           flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
610 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
611 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
612 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
613 ; GFX8-NOT:      buffer_wbinvl1_vol
614 ; GFX10WGP-NEXT: buffer_gl0_inv
615 ; GFX10CU-NOT:   buffer_gl0_inv
616 ; GFX10:         .amdhsa_kernel workgroup_one_as_acquire_ret
617 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
618 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
619 ; GFX10-NOT:     .amdhsa_memory_ordered 0
620 define amdgpu_kernel void @workgroup_one_as_acquire_ret(
621     i32* %out, i32 %in) {
622 entry:
623   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acquire
624   store i32 %val, i32* %out, align 4
625   ret void
628 ; GCN-LABEL:     {{^}}workgroup_one_as_acq_rel_ret:
629 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
630 ; GFX10WGP:      s_waitcnt vmcnt(0){{$}}
631 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
632 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
633 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
634 ; GCN:           flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
635 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
636 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
637 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
638 ; GFX8-NOT:      buffer_wbinvl1_vol
639 ; GFX10WGP-NEXT: buffer_gl0_inv
640 ; GFX10CU-NOT:   buffer_gl0_inv
641 ; GFX10:         .amdhsa_kernel workgroup_one_as_acq_rel_ret
642 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
643 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
644 ; GFX10-NOT:     .amdhsa_memory_ordered 0
645 define amdgpu_kernel void @workgroup_one_as_acq_rel_ret(
646     i32* %out, i32 %in) {
647 entry:
648   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acq_rel
649   store i32 %val, i32* %out, align 4
650   ret void
653 ; GCN-LABEL:     {{^}}workgroup_one_as_seq_cst_ret:
654 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
655 ; GFX10WGP:      s_waitcnt vmcnt(0){{$}}
656 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
657 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
658 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
659 ; GCN:           flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
660 ; GCN-NOT:       s_waitcnt vmcnt(0){{$}}
661 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
662 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
663 ; GFX8-NOT:      buffer_wbinvl1_vol
664 ; GFX10WGP-NEXT: buffer_gl0_inv
665 ; GFX10CU-NOT:   buffer_gl0_inv
666 ; GFX10:         .amdhsa_kernel workgroup_one_as_seq_cst_ret
667 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
668 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
669 ; GFX10-NOT:     .amdhsa_memory_ordered 0
670 define amdgpu_kernel void @workgroup_one_as_seq_cst_ret(
671     i32* %out, i32 %in) {
672 entry:
673   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") seq_cst
674   store i32 %val, i32* %out, align 4
675   ret void
678 ; GCN-LABEL: {{^}}system_monotonic:
679 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
680 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
681 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
682 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
683 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
684 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
685 ; GFX10:         .amdhsa_kernel system_monotonic
686 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
687 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
688 ; GFX10-NOT:     .amdhsa_memory_ordered 0
689 define amdgpu_kernel void @system_monotonic(
690     i32* %out, i32 %in) {
691 entry:
692   %val = atomicrmw volatile xchg i32* %out, i32 %in monotonic
693   ret void
696 ; GCN-LABEL: {{^}}system_acquire:
697 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
698 ; GCN-NOT:    s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
699 ; GCN:        flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
700 ; GFX8-NEXT:  s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
701 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
702 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
703 ; GFX8-NEXT:  buffer_wbinvl1_vol
704 ; GFX10-NEXT: buffer_gl0_inv
705 ; GFX10-NEXT: buffer_gl1_inv
706 ; GFX10:         .amdhsa_kernel system_acquire
707 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
708 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
709 ; GFX10-NOT:     .amdhsa_memory_ordered 0
710 define amdgpu_kernel void @system_acquire(
711     i32* %out, i32 %in) {
712 entry:
713   %val = atomicrmw volatile xchg i32* %out, i32 %in acquire
714   ret void
717 ; GCN-LABEL: {{^}}system_release:
718 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
719 ; GFX10:      s_waitcnt lgkmcnt(0){{$}}
720 ; GFX10:      s_waitcnt_vscnt null, 0x0{{$}}
721 ; GCN-NEXT:   flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
722 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
723 ; GCN-NOT:    s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
724 ; GCN-NOT:    buffer_{{wbinvl1_vol|gl._inv}}
725 ; GFX10:         .amdhsa_kernel system_release
726 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
727 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
728 ; GFX10-NOT:     .amdhsa_memory_ordered 0
729 define amdgpu_kernel void @system_release(
730     i32* %out, i32 %in) {
731 entry:
732   %val = atomicrmw volatile xchg i32* %out, i32 %in release
733   ret void
736 ; GCN-LABEL: {{^}}system_acq_rel:
737 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
738 ; GFX10:      s_waitcnt lgkmcnt(0){{$}}
739 ; GFX10:      s_waitcnt_vscnt null, 0x0{{$}}
740 ; GCN-NEXT:    flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
741 ; GFX8-NEXT:  s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
742 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
743 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
744 ; GFX8-NEXT:   buffer_wbinvl1_vol
745 ; GFX10-NEXT:  buffer_gl0_inv
746 ; GFX10-NEXT:  buffer_gl1_inv
747 ; GFX10:         .amdhsa_kernel system_acq_rel
748 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
749 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
750 ; GFX10-NOT:     .amdhsa_memory_ordered 0
751 define amdgpu_kernel void @system_acq_rel(
752     i32* %out, i32 %in) {
753 entry:
754   %val = atomicrmw volatile xchg i32* %out, i32 %in acq_rel
755   ret void
758 ; GCN-LABEL: {{^}}system_seq_cst:
759 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
760 ; GFX10:      s_waitcnt lgkmcnt(0){{$}}
761 ; GFX10:      s_waitcnt_vscnt null, 0x0{{$}}
762 ; GCN-NEXT:   flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
763 ; GFX8-NEXT:  s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
764 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
765 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
766 ; GFX8-NEXT:  buffer_wbinvl1_vol
767 ; GFX10-NEXT: buffer_gl0_inv
768 ; GFX10-NEXT: buffer_gl1_inv
769 ; GFX10:         .amdhsa_kernel system_seq_cst
770 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
771 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
772 ; GFX10-NOT:     .amdhsa_memory_ordered 0
773 define amdgpu_kernel void @system_seq_cst(
774     i32* %out, i32 %in) {
775 entry:
776   %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
777   ret void
780 ; GCN-LABEL: {{^}}singlethread_monotonic:
781 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
782 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
783 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
784 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
785 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
786 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
787 ; GFX10:         .amdhsa_kernel singlethread_monotonic
788 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
789 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
790 ; GFX10-NOT:     .amdhsa_memory_ordered 0
791 define amdgpu_kernel void @singlethread_monotonic(
792     i32* %out, i32 %in) {
793 entry:
794   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") monotonic
795   ret void
798 ; GCN-LABEL: {{^}}singlethread_acquire:
799 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
800 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
801 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
802 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
803 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
804 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
805 ; GFX10:         .amdhsa_kernel singlethread_acquire
806 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
807 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
808 ; GFX10-NOT:     .amdhsa_memory_ordered 0
809 define amdgpu_kernel void @singlethread_acquire(
810     i32* %out, i32 %in) {
811 entry:
812   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acquire
813   ret void
816 ; GCN-LABEL: {{^}}singlethread_release:
817 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
818 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
819 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
820 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
821 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
822 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
823 ; GFX10:         .amdhsa_kernel singlethread_release
824 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
825 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
826 ; GFX10-NOT:     .amdhsa_memory_ordered 0
827 define amdgpu_kernel void @singlethread_release(
828     i32* %out, i32 %in) {
829 entry:
830   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") release
831   ret void
834 ; GCN-LABEL: {{^}}singlethread_acq_rel:
835 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
836 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
837 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
838 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
839 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
840 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
841 ; GFX10:         .amdhsa_kernel singlethread_acq_rel
842 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
843 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
844 ; GFX10-NOT:     .amdhsa_memory_ordered 0
845 define amdgpu_kernel void @singlethread_acq_rel(
846     i32* %out, i32 %in) {
847 entry:
848   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acq_rel
849   ret void
852 ; GCN-LABEL: {{^}}singlethread_seq_cst:
853 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
854 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
855 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
856 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
857 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
858 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
859 ; GFX10:         .amdhsa_kernel singlethread_seq_cst
860 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
861 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
862 ; GFX10-NOT:     .amdhsa_memory_ordered 0
863 define amdgpu_kernel void @singlethread_seq_cst(
864     i32* %out, i32 %in) {
865 entry:
866   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") seq_cst
867   ret void
870 ; GCN-LABEL: {{^}}agent_monotonic:
871 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
872 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
873 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
874 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
875 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
876 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
877 ; GFX10:         .amdhsa_kernel agent_monotonic
878 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
879 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
880 ; GFX10-NOT:     .amdhsa_memory_ordered 0
881 define amdgpu_kernel void @agent_monotonic(
882     i32* %out, i32 %in) {
883 entry:
884   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") monotonic
885   ret void
888 ; GCN-LABEL: {{^}}agent_acquire:
889 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
890 ; GCN-NOT:    s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
891 ; GCN:        flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
892 ; GFX8-NEXT:  s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
893 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
894 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
895 ; GFX8-NEXT:  buffer_wbinvl1_vol
896 ; GFX10-NEXT: buffer_gl0_inv
897 ; GFX10-NEXT: buffer_gl1_inv
898 ; GFX10:         .amdhsa_kernel agent_acquire
899 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
900 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
901 ; GFX10-NOT:     .amdhsa_memory_ordered 0
902 define amdgpu_kernel void @agent_acquire(
903     i32* %out, i32 %in) {
904 entry:
905   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acquire
906   ret void
909 ; GCN-LABEL: {{^}}agent_release:
910 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
911 ; GFX10:      s_waitcnt lgkmcnt(0){{$}}
912 ; GFX10:      s_waitcnt_vscnt null, 0x0{{$}}
913 ; GCN-NEXT:   flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
914 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
915 ; GCN-NOT:    s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
916 ; GCN-NOT:    buffer_{{wbinvl1_vol|gl._inv}}
917 ; GFX10:         .amdhsa_kernel agent_release
918 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
919 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
920 ; GFX10-NOT:     .amdhsa_memory_ordered 0
921 define amdgpu_kernel void @agent_release(
922     i32* %out, i32 %in) {
923 entry:
924   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") release
925   ret void
928 ; GCN-LABEL: {{^}}agent_acq_rel:
929 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
930 ; GFX10:      s_waitcnt lgkmcnt(0){{$}}
931 ; GFX10:      s_waitcnt_vscnt null, 0x0{{$}}
932 ; GCN-NEXT:   flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
933 ; GFX8-NEXT:  s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
934 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
935 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
936 ; GFX8-NEXT:  buffer_wbinvl1_vol
937 ; GFX10-NEXT: buffer_gl0_inv
938 ; GFX10-NEXT: buffer_gl1_inv
939 ; GFX10:         .amdhsa_kernel agent_acq_rel
940 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
941 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
942 ; GFX10-NOT:     .amdhsa_memory_ordered 0
943 define amdgpu_kernel void @agent_acq_rel(
944     i32* %out, i32 %in) {
945 entry:
946   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acq_rel
947   ret void
950 ; GCN-LABEL: {{^}}agent_seq_cst:
951 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
952 ; GFX10:      s_waitcnt lgkmcnt(0){{$}}
953 ; GFX10:      s_waitcnt_vscnt null, 0x0{{$}}
954 ; GCN-NEXT:   flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
955 ; GFX8-NEXT:  s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
956 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
957 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
958 ; GFX8-NEXT:  buffer_wbinvl1_vol
959 ; GFX10-NEXT: buffer_gl0_inv
960 ; GFX10-NEXT: buffer_gl1_inv
961 ; GFX10:         .amdhsa_kernel agent_seq_cst
962 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
963 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
964 ; GFX10-NOT:     .amdhsa_memory_ordered 0
965 define amdgpu_kernel void @agent_seq_cst(
966     i32* %out, i32 %in) {
967 entry:
968   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") seq_cst
969   ret void
972 ; GCN-LABEL: {{^}}workgroup_monotonic:
973 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
974 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
975 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
976 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
977 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
978 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
979 ; GFX10:         .amdhsa_kernel workgroup_monotonic
980 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
981 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
982 ; GFX10-NOT:     .amdhsa_memory_ordered 0
983 define amdgpu_kernel void @workgroup_monotonic(
984     i32* %out, i32 %in) {
985 entry:
986   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") monotonic
987   ret void
990 ; GCN-LABEL:     {{^}}workgroup_acquire:
991 ; GCN-NOT:       s_waitcnt vmcnt(0){{$}}
992 ; GCN-NOT:       s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
993 ; GCN:           flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
994 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
995 ; GFX10WGP:      s_waitcnt_vscnt null, 0x0{{$}}
996 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
997 ; GFX8-NOT:      buffer_wbinvl1_vol
998 ; GFX10WGP-NEXT: buffer_gl0_inv
999 ; GFX10CU-NOT:   buffer_gl0_inv
1000 ; GFX10:         .amdhsa_kernel workgroup_acquire
1001 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1002 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1003 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1004 define amdgpu_kernel void @workgroup_acquire(
1005     i32* %out, i32 %in) {
1006 entry:
1007   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acquire
1008   ret void
1011 ; GCN-LABEL:     {{^}}workgroup_release:
1012 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
1013 ; GFX10WGP:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1014 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1015 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
1016 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
1017 ; GCN:           flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1018 ; GCN-NOT:       s_waitcnt vmcnt(0){{$}}
1019 ; GCN-NOT:       s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1020 ; GCN-NOT:       buffer_{{wbinvl1_vol|gl._inv}}
1021 ; GFX10:         .amdhsa_kernel workgroup_release
1022 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1023 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1024 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1025 define amdgpu_kernel void @workgroup_release(
1026     i32* %out, i32 %in) {
1027 entry:
1028   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") release
1029   ret void
1032 ; GCN-LABEL:     {{^}}workgroup_acq_rel:
1033 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
1034 ; GFX10WGP:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1035 ; GFX10WGP:      s_waitcnt_vscnt null, 0x0{{$}}
1036 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
1037 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
1038 ; GCN:           flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1039 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
1040 ; GFX10WGP:      s_waitcnt_vscnt null, 0x0{{$}}
1041 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
1042 ; GFX8-NOT:      buffer_wbinvl1_vol
1043 ; GFX10WGP-NEXT: buffer_gl0_inv
1044 ; GFX10CU-NOT:   buffer_gl0_inv
1045 ; GFX10:         .amdhsa_kernel workgroup_acq_rel
1046 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1047 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1048 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1049 define amdgpu_kernel void @workgroup_acq_rel(
1050     i32* %out, i32 %in) {
1051 entry:
1052   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acq_rel
1053   ret void
1056 ; GCN-LABEL:     {{^}}workgroup_seq_cst:
1057 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
1058 ; GFX10WGP:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1059 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1060 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
1061 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
1062 ; GCN:           flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1063 ; GCN-NOT:       s_waitcnt vmcnt(0){{$}}
1064 ; GFX10WGP:      s_waitcnt_vscnt null, 0x0{{$}}
1065 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
1066 ; GFX8-NOT:      buffer_wbinvl1_vol
1067 ; GFX10WGP-NEXT: buffer_gl0_inv
1068 ; GFX10CU-NOT:   buffer_gl0_inv
1069 ; GFX10:         .amdhsa_kernel workgroup_seq_cst
1070 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1071 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1072 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1073 define amdgpu_kernel void @workgroup_seq_cst(
1074     i32* %out, i32 %in) {
1075 entry:
1076   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") seq_cst
1077   ret void
1080 ; GCN-LABEL: {{^}}wavefront_monotonic:
1081 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
1082 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1083 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1084 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
1085 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1086 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
1087 ; GFX10:         .amdhsa_kernel wavefront_monotonic
1088 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1089 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1090 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1091 define amdgpu_kernel void @wavefront_monotonic(
1092     i32* %out, i32 %in) {
1093 entry:
1094   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") monotonic
1095   ret void
1098 ; GCN-LABEL: {{^}}wavefront_acquire:
1099 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
1100 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1101 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1102 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
1103 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1104 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
1105 ; GFX10:         .amdhsa_kernel wavefront_acquire
1106 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1107 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1108 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1109 define amdgpu_kernel void @wavefront_acquire(
1110     i32* %out, i32 %in) {
1111 entry:
1112   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acquire
1113   ret void
1116 ; GCN-LABEL: {{^}}wavefront_release:
1117 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
1118 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1119 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1120 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
1121 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1122 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
1123 ; GFX10:         .amdhsa_kernel wavefront_release
1124 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1125 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1126 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1127 define amdgpu_kernel void @wavefront_release(
1128     i32* %out, i32 %in) {
1129 entry:
1130   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") release
1131   ret void
1134 ; GCN-LABEL: {{^}}wavefront_acq_rel:
1135 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
1136 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1137 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1138 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
1139 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1140 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
1141 ; GFX10:         .amdhsa_kernel wavefront_acq_rel
1142 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1143 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1144 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1145 define amdgpu_kernel void @wavefront_acq_rel(
1146     i32* %out, i32 %in) {
1147 entry:
1148   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acq_rel
1149   ret void
1152 ; GCN-LABEL: {{^}}wavefront_seq_cst:
1153 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
1154 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1155 ; GCN:       flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1156 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
1157 ; GCN-NOT:   s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1158 ; GCN-NOT:   buffer_{{wbinvl1_vol|gl._inv}}
1159 ; GFX10:         .amdhsa_kernel wavefront_seq_cst
1160 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1161 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1162 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1163 define amdgpu_kernel void @wavefront_seq_cst(
1164     i32* %out, i32 %in) {
1165 entry:
1166   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") seq_cst
1167   ret void
1170 ; GCN-LABEL: {{^}}system_acquire_ret:
1171 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
1172 ; GCN-NOT:    s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1173 ; GCN:        flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1174 ; GFX8-NEXT:  s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1175 ; GFX8-NEXT:  buffer_wbinvl1_vol
1176 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1177 ; GFX10-NEXT: buffer_gl0_inv
1178 ; GFX10-NEXT: buffer_gl1_inv
1179 ; GFX10:         .amdhsa_kernel system_acquire_ret
1180 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1181 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1182 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1183 define amdgpu_kernel void @system_acquire_ret(
1184     i32* %out, i32 %in) {
1185 entry:
1186   %val = atomicrmw volatile xchg i32* %out, i32 %in acquire
1187   store i32 %val, i32* %out, align 4
1188   ret void
1191 ; GCN-LABEL: {{^}}system_acq_rel_ret:
1192 ; GFX8:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1193 ; GFX10:       s_waitcnt lgkmcnt(0){{$}}
1194 ; GFX10:       s_waitcnt_vscnt null, 0x0{{$}}
1195 ; GCN-NEXT:    flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1196 ; GFX8-NEXT:   s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1197 ; GFX8-NEXT:   buffer_wbinvl1_vol
1198 ; GFX10-NEXT:  s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1199 ; GFX10-NEXT:  buffer_gl0_inv
1200 ; GFX10-NEXT:  buffer_gl1_inv
1201 ; GFX10:         .amdhsa_kernel system_acq_rel_ret
1202 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1203 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1204 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1205 define amdgpu_kernel void @system_acq_rel_ret(
1206     i32* %out, i32 %in) {
1207 entry:
1208   %val = atomicrmw volatile xchg i32* %out, i32 %in acq_rel
1209   store i32 %val, i32* %out, align 4
1210   ret void
1213 ; GCN-LABEL: {{^}}system_seq_cst_ret:
1214 ; GFX8:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1215 ; GFX10:       s_waitcnt lgkmcnt(0){{$}}
1216 ; GFX10:       s_waitcnt_vscnt null, 0x0{{$}}
1217 ; GCN-NEXT:   flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1218 ; GFX8-NEXT:   s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1219 ; GFX8-NEXT:   buffer_wbinvl1_vol
1220 ; GFX10-NEXT:  s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1221 ; GFX10-NEXT:  buffer_gl0_inv
1222 ; GFX10-NEXT:  buffer_gl1_inv
1223 ; GFX10:         .amdhsa_kernel system_seq_cst_ret
1224 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1225 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1226 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1227 define amdgpu_kernel void @system_seq_cst_ret(
1228     i32* %out, i32 %in) {
1229 entry:
1230   %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
1231   store i32 %val, i32* %out, align 4
1232   ret void
1235 ; GCN-LABEL: {{^}}agent_acquire_ret:
1236 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
1237 ; GCN-NOT:    s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1238 ; GCN:        flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1239 ; GFX8-NEXT:   s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1240 ; GFX8-NEXT:   buffer_wbinvl1_vol
1241 ; GFX10-NEXT:  s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1242 ; GFX10-NEXT:  buffer_gl0_inv
1243 ; GFX10-NEXT:  buffer_gl1_inv
1244 ; GFX10:         .amdhsa_kernel agent_acquire_ret
1245 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1246 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1247 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1248 define amdgpu_kernel void @agent_acquire_ret(
1249     i32* %out, i32 %in) {
1250 entry:
1251   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acquire
1252   store i32 %val, i32* %out, align 4
1253   ret void
1256 ; GCN-LABEL: {{^}}agent_acq_rel_ret:
1257 ; GFX8:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1258 ; GFX10:       s_waitcnt lgkmcnt(0){{$}}
1259 ; GFX10:       s_waitcnt_vscnt null, 0x0{{$}}
1260 ; GCN-NEXT:   flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1261 ; GFX8-NEXT:   s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1262 ; GFX8-NEXT:   buffer_wbinvl1_vol
1263 ; GFX10-NEXT:  s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1264 ; GFX10-NEXT:  buffer_gl0_inv
1265 ; GFX10-NEXT:  buffer_gl1_inv
1266 ; GFX10:         .amdhsa_kernel agent_acq_rel_ret
1267 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1268 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1269 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1270 define amdgpu_kernel void @agent_acq_rel_ret(
1271     i32* %out, i32 %in) {
1272 entry:
1273   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acq_rel
1274   store i32 %val, i32* %out, align 4
1275   ret void
1278 ; GCN-LABEL: {{^}}agent_seq_cst_ret:
1279 ; GFX8:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1280 ; GFX10:       s_waitcnt lgkmcnt(0){{$}}
1281 ; GFX10:       s_waitcnt_vscnt null, 0x0{{$}}
1282 ; GCN-NEXT:   flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1283 ; GFX8-NEXT:   s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1284 ; GFX8-NEXT:   buffer_wbinvl1_vol
1285 ; GFX10-NEXT:  s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1286 ; GFX10-NEXT:  buffer_gl0_inv
1287 ; GFX10-NEXT:  buffer_gl1_inv
1288 ; GFX10:         .amdhsa_kernel agent_seq_cst_ret
1289 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1290 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1291 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1292 define amdgpu_kernel void @agent_seq_cst_ret(
1293     i32* %out, i32 %in) {
1294 entry:
1295   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") seq_cst
1296   store i32 %val, i32* %out, align 4
1297   ret void
1300 ; GCN-LABEL:     {{^}}workgroup_acquire_ret:
1301 ; GCN-NOT:       s_waitcnt vmcnt(0){{$}}
1302 ; GCN-NOT:       s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1303 ; GCN:           flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1304 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
1305 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1306 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
1307 ; GFX8-NOT:      buffer_wbinvl1_vol
1308 ; GFX10WGP-NEXT: buffer_gl0_inv
1309 ; GFX10CU-NOT:   buffer_gl0_inv
1310 ; GFX10:         .amdhsa_kernel workgroup_acquire_ret
1311 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1312 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1313 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1314 define amdgpu_kernel void @workgroup_acquire_ret(
1315     i32* %out, i32 %in) {
1316 entry:
1317   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acquire
1318   store i32 %val, i32* %out, align 4
1319   ret void
1322 ; GCN-LABEL:     {{^}}workgroup_acq_rel_ret:
1323 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
1324 ; GFX10WGP:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1325 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1326 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
1327 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
1328 ; GCN:           flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1329 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
1330 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1331 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
1332 ; GFX8-NOT:      buffer_wbinvl1_vol
1333 ; GFX10WGP-NEXT: buffer_gl0_inv
1334 ; GFX10CU-NOT:   buffer_gl0_inv
1335 ; GFX10:         .amdhsa_kernel workgroup_acq_rel_ret
1336 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1337 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1338 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1339 define amdgpu_kernel void @workgroup_acq_rel_ret(
1340     i32* %out, i32 %in) {
1341 entry:
1342   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acq_rel
1343   store i32 %val, i32* %out, align 4
1344   ret void
1347 ; GCN-LABEL:     {{^}}workgroup_seq_cst_ret:
1348 ; GFX8-NOT:      s_waitcnt vmcnt(0){{$}}
1349 ; GFX10WGP:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1350 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1351 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
1352 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
1353 ; GCN:           flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1354 ; GCN-NOT:       s_waitcnt vmcnt(0){{$}}
1355 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1356 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
1357 ; GFX8-NOT:      buffer_wbinvl1_vol
1358 ; GFX10WGP-NEXT: buffer_gl0_inv
1359 ; GFX10CU-NOT:   buffer_gl0_inv
1360 ; GFX10:         .amdhsa_kernel workgroup_seq_cst_ret
1361 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
1362 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
1363 ; GFX10-NOT:     .amdhsa_memory_ordered 0
1364 define amdgpu_kernel void @workgroup_seq_cst_ret(
1365     i32* %out, i32 %in) {
1366 entry:
1367   %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") seq_cst
1368   store i32 %val, i32* %out, align 4
1369   ret void