1 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10WGP %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10CU %s
6 ; GCN-LABEL: {{^}}system_one_as_monotonic:
7 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
8 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
9 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
10 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
11 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
12 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
13 ; GFX10: .amdhsa_kernel system_one_as_monotonic
14 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
15 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
16 ; GFX10-NOT: .amdhsa_memory_ordered 0
17 define amdgpu_kernel void @system_one_as_monotonic(
20 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") monotonic
24 ; GCN-LABEL: {{^}}system_one_as_acquire:
25 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
26 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
27 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
28 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
29 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
30 ; GFX8-NEXT: buffer_wbinvl1_vol
31 ; GFX10-NEXT: buffer_gl0_inv
32 ; GFX10-NEXT: buffer_gl1_inv
33 ; GFX10: .amdhsa_kernel system_one_as_acquire
34 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
35 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
36 ; GFX10-NOT: .amdhsa_memory_ordered 0
37 define amdgpu_kernel void @system_one_as_acquire(
40 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acquire
44 ; GCN-LABEL: {{^}}system_one_as_release:
45 ; GCN: s_waitcnt vmcnt(0){{$}}
46 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
47 ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
48 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
49 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
50 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
51 ; GFX10: .amdhsa_kernel system_one_as_release
52 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
53 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
54 ; GFX10-NOT: .amdhsa_memory_ordered 0
55 define amdgpu_kernel void @system_one_as_release(
58 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") release
62 ; GCN-LABEL: {{^}}system_one_as_acq_rel:
63 ; GCN: s_waitcnt vmcnt(0){{$}}
64 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
65 ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
66 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
67 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
68 ; GFX8-NEXT: buffer_wbinvl1_vol
69 ; GFX10-NEXT: buffer_gl0_inv
70 ; GFX10-NEXT: buffer_gl1_inv
71 ; GFX10: .amdhsa_kernel system_one_as_acq_rel
72 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
73 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
74 ; GFX10-NOT: .amdhsa_memory_ordered 0
75 define amdgpu_kernel void @system_one_as_acq_rel(
78 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acq_rel
82 ; GCN-LABEL: {{^}}system_one_as_seq_cst:
83 ; GCN: s_waitcnt vmcnt(0){{$}}
84 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
85 ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
86 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
87 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
88 ; GFX8-NEXT: buffer_wbinvl1_vol
89 ; GFX10-NEXT: buffer_gl0_inv
90 ; GFX10-NEXT: buffer_gl1_inv
91 ; GFX10: .amdhsa_kernel system_one_as_seq_cst
92 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
93 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
94 ; GFX10-NOT: .amdhsa_memory_ordered 0
95 define amdgpu_kernel void @system_one_as_seq_cst(
98 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") seq_cst
102 ; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
103 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
104 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
105 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
106 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
107 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
108 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
109 ; GFX10: .amdhsa_kernel singlethread_one_as_monotonic
110 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
111 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
112 ; GFX10-NOT: .amdhsa_memory_ordered 0
113 define amdgpu_kernel void @singlethread_one_as_monotonic(
114 i32* %out, i32 %in) {
116 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") monotonic
120 ; GCN-LABEL: {{^}}singlethread_one_as_acquire:
121 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
122 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
123 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
124 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
125 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
126 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
127 ; GFX10: .amdhsa_kernel singlethread_one_as_acquire
128 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
129 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
130 ; GFX10-NOT: .amdhsa_memory_ordered 0
131 define amdgpu_kernel void @singlethread_one_as_acquire(
132 i32* %out, i32 %in) {
134 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acquire
138 ; GCN-LABEL: {{^}}singlethread_one_as_release:
139 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
140 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
141 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
142 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
143 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
144 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
145 ; GFX10: .amdhsa_kernel singlethread_one_as_release
146 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
147 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
148 ; GFX10-NOT: .amdhsa_memory_ordered 0
149 define amdgpu_kernel void @singlethread_one_as_release(
150 i32* %out, i32 %in) {
152 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") release
156 ; GCN-LABEL: {{^}}singlethread_one_as_acq_rel:
157 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
158 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
159 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
160 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
161 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
162 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
163 ; GFX10: .amdhsa_kernel singlethread_one_as_acq_rel
164 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
165 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
166 ; GFX10-NOT: .amdhsa_memory_ordered 0
167 define amdgpu_kernel void @singlethread_one_as_acq_rel(
168 i32* %out, i32 %in) {
170 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acq_rel
174 ; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
175 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
176 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
177 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
178 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
179 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
180 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
181 ; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst
182 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
183 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
184 ; GFX10-NOT: .amdhsa_memory_ordered 0
185 define amdgpu_kernel void @singlethread_one_as_seq_cst(
186 i32* %out, i32 %in) {
188 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") seq_cst
192 ; GCN-LABEL: {{^}}agent_one_as_monotonic:
193 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
194 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
195 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
196 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
197 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
198 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
199 ; GFX10: .amdhsa_kernel agent_one_as_monotonic
200 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
201 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
202 ; GFX10-NOT: .amdhsa_memory_ordered 0
203 define amdgpu_kernel void @agent_one_as_monotonic(
204 i32* %out, i32 %in) {
206 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") monotonic
210 ; GCN-LABEL: {{^}}agent_one_as_acquire:
211 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
212 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
213 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
214 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
215 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
216 ; GFX8-NEXT: buffer_wbinvl1_vol
217 ; GFX10-NEXT: buffer_gl0_inv
218 ; GFX10-NEXT: buffer_gl1_inv
219 ; GFX10: .amdhsa_kernel agent_one_as_acquire
220 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
221 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
222 ; GFX10-NOT: .amdhsa_memory_ordered 0
223 define amdgpu_kernel void @agent_one_as_acquire(
224 i32* %out, i32 %in) {
226 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acquire
230 ; GCN-LABEL: {{^}}agent_one_as_release:
231 ; GCN: s_waitcnt vmcnt(0){{$}}
232 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
233 ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
234 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
235 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
236 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
237 ; GFX10: .amdhsa_kernel agent_one_as_release
238 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
239 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
240 ; GFX10-NOT: .amdhsa_memory_ordered 0
241 define amdgpu_kernel void @agent_one_as_release(
242 i32* %out, i32 %in) {
244 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") release
248 ; GCN-LABEL: {{^}}agent_one_as_acq_rel:
249 ; GCN: s_waitcnt vmcnt(0){{$}}
250 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
251 ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
252 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
253 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
254 ; GFX8-NEXT: buffer_wbinvl1_vol
255 ; GFX10-NEXT: buffer_gl0_inv
256 ; GFX10-NEXT: buffer_gl1_inv
257 ; GFX10: .amdhsa_kernel agent_one_as_acq_rel
258 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
259 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
260 ; GFX10-NOT: .amdhsa_memory_ordered 0
261 define amdgpu_kernel void @agent_one_as_acq_rel(
262 i32* %out, i32 %in) {
264 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acq_rel
268 ; GCN-LABEL: {{^}}agent_one_as_seq_cst:
269 ; GCN: s_waitcnt vmcnt(0){{$}}
270 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
271 ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
272 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
273 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
274 ; GFX8-NEXT: buffer_wbinvl1_vol
275 ; GFX10-NEXT: buffer_gl0_inv
276 ; GFX10-NEXT: buffer_gl1_inv
277 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst
278 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
279 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
280 ; GFX10-NOT: .amdhsa_memory_ordered 0
281 define amdgpu_kernel void @agent_one_as_seq_cst(
282 i32* %out, i32 %in) {
284 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") seq_cst
288 ; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
289 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
290 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
291 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
292 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
293 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
294 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
295 ; GFX10: .amdhsa_kernel workgroup_one_as_monotonic
296 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
297 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
298 ; GFX10-NOT: .amdhsa_memory_ordered 0
299 define amdgpu_kernel void @workgroup_one_as_monotonic(
300 i32* %out, i32 %in) {
302 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") monotonic
306 ; GCN-LABEL: {{^}}workgroup_one_as_acquire:
307 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
308 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
309 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
310 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
311 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
312 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
313 ; GFX8-NOT: buffer_wbinvl1_vol
314 ; GFX10WGP-NEXT: buffer_gl0_inv
315 ; GFX10CU-NOT: buffer_gl0_inv
316 ; GFX10: .amdhsa_kernel workgroup_one_as_acquire
317 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
318 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
319 ; GFX10-NOT: .amdhsa_memory_ordered 0
320 define amdgpu_kernel void @workgroup_one_as_acquire(
321 i32* %out, i32 %in) {
323 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acquire
327 ; GCN-LABEL: {{^}}workgroup_one_as_release:
328 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
329 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
330 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
331 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
332 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
333 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
334 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
335 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
336 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
337 ; GFX10: .amdhsa_kernel workgroup_one_as_release
338 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
339 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
340 ; GFX10-NOT: .amdhsa_memory_ordered 0
341 define amdgpu_kernel void @workgroup_one_as_release(
342 i32* %out, i32 %in) {
344 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") release
348 ; GCN-LABEL: {{^}}workgroup_one_as_acq_rel:
349 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
350 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
351 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
352 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
353 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
354 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
355 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
356 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
357 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
358 ; GFX8-NOT: buffer_wbinvl1_vol
359 ; GFX10WGP-NEXT: buffer_gl0_inv
360 ; GFX10CU-NOT: buffer_gl0_inv
361 ; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel
362 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
363 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
364 ; GFX10-NOT: .amdhsa_memory_ordered 0
365 define amdgpu_kernel void @workgroup_one_as_acq_rel(
366 i32* %out, i32 %in) {
368 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acq_rel
372 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
373 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
374 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
375 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
376 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
377 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
378 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
379 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
380 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
381 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
382 ; GFX8-NOT: buffer_wbinvl1_vol
383 ; GFX10WGP-NEXT: buffer_gl0_inv
384 ; GFX10CU-NOT: buffer_gl0_inv
385 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst
386 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
387 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
388 ; GFX10-NOT: .amdhsa_memory_ordered 0
389 define amdgpu_kernel void @workgroup_one_as_seq_cst(
390 i32* %out, i32 %in) {
392 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") seq_cst
396 ; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
397 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
398 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
399 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
400 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
401 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
402 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
403 ; GFX10: .amdhsa_kernel wavefront_one_as_monotonic
404 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
405 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
406 ; GFX10-NOT: .amdhsa_memory_ordered 0
407 define amdgpu_kernel void @wavefront_one_as_monotonic(
408 i32* %out, i32 %in) {
410 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") monotonic
414 ; GCN-LABEL: {{^}}wavefront_one_as_acquire:
415 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
416 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
417 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
418 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
419 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
420 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
421 ; GFX10: .amdhsa_kernel wavefront_one_as_acquire
422 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
423 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
424 ; GFX10-NOT: .amdhsa_memory_ordered 0
425 define amdgpu_kernel void @wavefront_one_as_acquire(
426 i32* %out, i32 %in) {
428 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acquire
432 ; GCN-LABEL: {{^}}wavefront_one_as_release:
433 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
434 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
435 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
436 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
437 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
438 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
439 ; GFX10: .amdhsa_kernel wavefront_one_as_release
440 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
441 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
442 ; GFX10-NOT: .amdhsa_memory_ordered 0
443 define amdgpu_kernel void @wavefront_one_as_release(
444 i32* %out, i32 %in) {
446 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") release
450 ; GCN-LABEL: {{^}}wavefront_one_as_acq_rel:
451 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
452 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
453 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
454 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
455 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
456 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
457 ; GFX10: .amdhsa_kernel wavefront_one_as_acq_rel
458 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
459 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
460 ; GFX10-NOT: .amdhsa_memory_ordered 0
461 define amdgpu_kernel void @wavefront_one_as_acq_rel(
462 i32* %out, i32 %in) {
464 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acq_rel
468 ; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
469 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
470 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
471 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
472 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
473 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
474 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
475 ; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst
476 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
477 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
478 ; GFX10-NOT: .amdhsa_memory_ordered 0
479 define amdgpu_kernel void @wavefront_one_as_seq_cst(
480 i32* %out, i32 %in) {
482 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") seq_cst
486 ; GCN-LABEL: {{^}}system_one_as_acquire_ret:
487 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
488 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
489 ; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
490 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
491 ; GFX8-NEXT: buffer_wbinvl1_vol
492 ; GFX10-NEXT: buffer_gl0_inv
493 ; GFX10-NEXT: buffer_gl1_inv
494 ; GFX10: .amdhsa_kernel system_one_as_acquire_ret
495 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
496 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
497 ; GFX10-NOT: .amdhsa_memory_ordered 0
498 define amdgpu_kernel void @system_one_as_acquire_ret(
499 i32* %out, i32 %in) {
501 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acquire
502 store i32 %val, i32* %out, align 4
506 ; GCN-LABEL: {{^}}system_one_as_acq_rel_ret:
507 ; GCN: s_waitcnt vmcnt(0){{$}}
508 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
509 ; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
510 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
511 ; GFX8-NEXT: buffer_wbinvl1_vol
512 ; GFX10-NEXT: buffer_gl0_inv
513 ; GFX10-NEXT: buffer_gl1_inv
514 ; GFX10: .amdhsa_kernel system_one_as_acq_rel_ret
515 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
516 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
517 ; GFX10-NOT: .amdhsa_memory_ordered 0
518 define amdgpu_kernel void @system_one_as_acq_rel_ret(
519 i32* %out, i32 %in) {
521 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acq_rel
522 store i32 %val, i32* %out, align 4
526 ; GCN-LABEL: {{^}}system_one_as_seq_cst_ret:
527 ; GCN: s_waitcnt vmcnt(0){{$}}
528 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
529 ; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
530 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
531 ; GFX8-NEXT: buffer_wbinvl1_vol
532 ; GFX10-NEXT: buffer_gl0_inv
533 ; GFX10-NEXT: buffer_gl1_inv
534 ; GFX10: .amdhsa_kernel system_one_as_seq_cst_ret
535 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
536 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
537 ; GFX10-NOT: .amdhsa_memory_ordered 0
538 define amdgpu_kernel void @system_one_as_seq_cst_ret(
539 i32* %out, i32 %in) {
541 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") seq_cst
542 store i32 %val, i32* %out, align 4
546 ; GCN-LABEL: {{^}}agent_one_as_acquire_ret:
547 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
548 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
549 ; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
550 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
551 ; GFX8-NEXT: buffer_wbinvl1_vol
552 ; GFX10-NEXT: buffer_gl0_inv
553 ; GFX10-NEXT: buffer_gl1_inv
554 ; GFX10: .amdhsa_kernel agent_one_as_acquire_ret
555 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
556 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
557 ; GFX10-NOT: .amdhsa_memory_ordered 0
558 define amdgpu_kernel void @agent_one_as_acquire_ret(
559 i32* %out, i32 %in) {
561 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acquire
562 store i32 %val, i32* %out, align 4
566 ; GCN-LABEL: {{^}}agent_one_as_acq_rel_ret:
567 ; GCN: s_waitcnt vmcnt(0){{$}}
568 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
569 ; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
570 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
571 ; GFX8-NEXT: buffer_wbinvl1_vol
572 ; GFX10-NEXT: buffer_gl0_inv
573 ; GFX10-NEXT: buffer_gl1_inv
574 ; GFX10: .amdhsa_kernel agent_one_as_acq_rel_ret
575 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
576 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
577 ; GFX10-NOT: .amdhsa_memory_ordered 0
578 define amdgpu_kernel void @agent_one_as_acq_rel_ret(
579 i32* %out, i32 %in) {
581 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acq_rel
582 store i32 %val, i32* %out, align 4
586 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_ret:
587 ; GCN: s_waitcnt vmcnt(0){{$}}
588 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
589 ; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
590 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
591 ; GFX8-NEXT: buffer_wbinvl1_vol
592 ; GFX10-NEXT: buffer_gl0_inv
593 ; GFX10-NEXT: buffer_gl1_inv
594 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst_ret
595 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
596 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
597 ; GFX10-NOT: .amdhsa_memory_ordered 0
598 define amdgpu_kernel void @agent_one_as_seq_cst_ret(
599 i32* %out, i32 %in) {
601 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") seq_cst
602 store i32 %val, i32* %out, align 4
606 ; GCN-LABEL: {{^}}workgroup_one_as_acquire_ret:
607 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
608 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
609 ; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
610 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
611 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
612 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
613 ; GFX8-NOT: buffer_wbinvl1_vol
614 ; GFX10WGP-NEXT: buffer_gl0_inv
615 ; GFX10CU-NOT: buffer_gl0_inv
616 ; GFX10: .amdhsa_kernel workgroup_one_as_acquire_ret
617 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
618 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
619 ; GFX10-NOT: .amdhsa_memory_ordered 0
620 define amdgpu_kernel void @workgroup_one_as_acquire_ret(
621 i32* %out, i32 %in) {
623 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acquire
624 store i32 %val, i32* %out, align 4
628 ; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_ret:
629 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
630 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
631 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
632 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
633 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
634 ; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
635 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
636 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
637 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
638 ; GFX8-NOT: buffer_wbinvl1_vol
639 ; GFX10WGP-NEXT: buffer_gl0_inv
640 ; GFX10CU-NOT: buffer_gl0_inv
641 ; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_ret
642 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
643 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
644 ; GFX10-NOT: .amdhsa_memory_ordered 0
645 define amdgpu_kernel void @workgroup_one_as_acq_rel_ret(
646 i32* %out, i32 %in) {
648 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acq_rel
649 store i32 %val, i32* %out, align 4
653 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_ret:
654 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
655 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
656 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
657 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
658 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
659 ; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
660 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
661 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
662 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
663 ; GFX8-NOT: buffer_wbinvl1_vol
664 ; GFX10WGP-NEXT: buffer_gl0_inv
665 ; GFX10CU-NOT: buffer_gl0_inv
666 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_ret
667 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
668 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
669 ; GFX10-NOT: .amdhsa_memory_ordered 0
670 define amdgpu_kernel void @workgroup_one_as_seq_cst_ret(
671 i32* %out, i32 %in) {
673 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") seq_cst
674 store i32 %val, i32* %out, align 4
678 ; GCN-LABEL: {{^}}system_monotonic:
679 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
680 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
681 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
682 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
683 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
684 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
685 ; GFX10: .amdhsa_kernel system_monotonic
686 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
687 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
688 ; GFX10-NOT: .amdhsa_memory_ordered 0
689 define amdgpu_kernel void @system_monotonic(
690 i32* %out, i32 %in) {
692 %val = atomicrmw volatile xchg i32* %out, i32 %in monotonic
696 ; GCN-LABEL: {{^}}system_acquire:
697 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
698 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
699 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
700 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
701 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
702 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
703 ; GFX8-NEXT: buffer_wbinvl1_vol
704 ; GFX10-NEXT: buffer_gl0_inv
705 ; GFX10-NEXT: buffer_gl1_inv
706 ; GFX10: .amdhsa_kernel system_acquire
707 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
708 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
709 ; GFX10-NOT: .amdhsa_memory_ordered 0
710 define amdgpu_kernel void @system_acquire(
711 i32* %out, i32 %in) {
713 %val = atomicrmw volatile xchg i32* %out, i32 %in acquire
717 ; GCN-LABEL: {{^}}system_release:
718 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
719 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
720 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
721 ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
722 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
723 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
724 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
725 ; GFX10: .amdhsa_kernel system_release
726 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
727 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
728 ; GFX10-NOT: .amdhsa_memory_ordered 0
729 define amdgpu_kernel void @system_release(
730 i32* %out, i32 %in) {
732 %val = atomicrmw volatile xchg i32* %out, i32 %in release
736 ; GCN-LABEL: {{^}}system_acq_rel:
737 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
738 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
739 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
740 ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
741 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
742 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
743 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
744 ; GFX8-NEXT: buffer_wbinvl1_vol
745 ; GFX10-NEXT: buffer_gl0_inv
746 ; GFX10-NEXT: buffer_gl1_inv
747 ; GFX10: .amdhsa_kernel system_acq_rel
748 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
749 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
750 ; GFX10-NOT: .amdhsa_memory_ordered 0
751 define amdgpu_kernel void @system_acq_rel(
752 i32* %out, i32 %in) {
754 %val = atomicrmw volatile xchg i32* %out, i32 %in acq_rel
758 ; GCN-LABEL: {{^}}system_seq_cst:
759 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
760 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
761 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
762 ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
763 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
764 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
765 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
766 ; GFX8-NEXT: buffer_wbinvl1_vol
767 ; GFX10-NEXT: buffer_gl0_inv
768 ; GFX10-NEXT: buffer_gl1_inv
769 ; GFX10: .amdhsa_kernel system_seq_cst
770 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
771 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
772 ; GFX10-NOT: .amdhsa_memory_ordered 0
773 define amdgpu_kernel void @system_seq_cst(
774 i32* %out, i32 %in) {
776 %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
780 ; GCN-LABEL: {{^}}singlethread_monotonic:
781 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
782 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
783 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
784 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
785 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
786 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
787 ; GFX10: .amdhsa_kernel singlethread_monotonic
788 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
789 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
790 ; GFX10-NOT: .amdhsa_memory_ordered 0
791 define amdgpu_kernel void @singlethread_monotonic(
792 i32* %out, i32 %in) {
794 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") monotonic
798 ; GCN-LABEL: {{^}}singlethread_acquire:
799 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
800 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
801 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
802 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
803 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
804 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
805 ; GFX10: .amdhsa_kernel singlethread_acquire
806 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
807 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
808 ; GFX10-NOT: .amdhsa_memory_ordered 0
809 define amdgpu_kernel void @singlethread_acquire(
810 i32* %out, i32 %in) {
812 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acquire
816 ; GCN-LABEL: {{^}}singlethread_release:
817 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
818 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
819 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
820 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
821 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
822 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
823 ; GFX10: .amdhsa_kernel singlethread_release
824 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
825 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
826 ; GFX10-NOT: .amdhsa_memory_ordered 0
827 define amdgpu_kernel void @singlethread_release(
828 i32* %out, i32 %in) {
830 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") release
834 ; GCN-LABEL: {{^}}singlethread_acq_rel:
835 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
836 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
837 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
838 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
839 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
840 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
841 ; GFX10: .amdhsa_kernel singlethread_acq_rel
842 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
843 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
844 ; GFX10-NOT: .amdhsa_memory_ordered 0
845 define amdgpu_kernel void @singlethread_acq_rel(
846 i32* %out, i32 %in) {
848 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acq_rel
852 ; GCN-LABEL: {{^}}singlethread_seq_cst:
853 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
854 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
855 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
856 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
857 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
858 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
859 ; GFX10: .amdhsa_kernel singlethread_seq_cst
860 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
861 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
862 ; GFX10-NOT: .amdhsa_memory_ordered 0
863 define amdgpu_kernel void @singlethread_seq_cst(
864 i32* %out, i32 %in) {
866 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") seq_cst
870 ; GCN-LABEL: {{^}}agent_monotonic:
871 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
872 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
873 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
874 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
875 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
876 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
877 ; GFX10: .amdhsa_kernel agent_monotonic
878 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
879 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
880 ; GFX10-NOT: .amdhsa_memory_ordered 0
881 define amdgpu_kernel void @agent_monotonic(
882 i32* %out, i32 %in) {
884 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") monotonic
888 ; GCN-LABEL: {{^}}agent_acquire:
889 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
890 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
891 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
892 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
893 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
894 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
895 ; GFX8-NEXT: buffer_wbinvl1_vol
896 ; GFX10-NEXT: buffer_gl0_inv
897 ; GFX10-NEXT: buffer_gl1_inv
898 ; GFX10: .amdhsa_kernel agent_acquire
899 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
900 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
901 ; GFX10-NOT: .amdhsa_memory_ordered 0
902 define amdgpu_kernel void @agent_acquire(
903 i32* %out, i32 %in) {
905 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acquire
909 ; GCN-LABEL: {{^}}agent_release:
910 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
911 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
912 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
913 ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
914 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
915 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
916 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
917 ; GFX10: .amdhsa_kernel agent_release
918 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
919 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
920 ; GFX10-NOT: .amdhsa_memory_ordered 0
921 define amdgpu_kernel void @agent_release(
922 i32* %out, i32 %in) {
924 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") release
928 ; GCN-LABEL: {{^}}agent_acq_rel:
929 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
930 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
931 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
932 ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
933 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
934 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
935 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
936 ; GFX8-NEXT: buffer_wbinvl1_vol
937 ; GFX10-NEXT: buffer_gl0_inv
938 ; GFX10-NEXT: buffer_gl1_inv
939 ; GFX10: .amdhsa_kernel agent_acq_rel
940 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
941 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
942 ; GFX10-NOT: .amdhsa_memory_ordered 0
943 define amdgpu_kernel void @agent_acq_rel(
944 i32* %out, i32 %in) {
946 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acq_rel
950 ; GCN-LABEL: {{^}}agent_seq_cst:
951 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
952 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
953 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
954 ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
955 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
956 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
957 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
958 ; GFX8-NEXT: buffer_wbinvl1_vol
959 ; GFX10-NEXT: buffer_gl0_inv
960 ; GFX10-NEXT: buffer_gl1_inv
961 ; GFX10: .amdhsa_kernel agent_seq_cst
962 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
963 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
964 ; GFX10-NOT: .amdhsa_memory_ordered 0
965 define amdgpu_kernel void @agent_seq_cst(
966 i32* %out, i32 %in) {
968 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") seq_cst
972 ; GCN-LABEL: {{^}}workgroup_monotonic:
973 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
974 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
975 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
976 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
977 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
978 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
979 ; GFX10: .amdhsa_kernel workgroup_monotonic
980 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
981 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
982 ; GFX10-NOT: .amdhsa_memory_ordered 0
983 define amdgpu_kernel void @workgroup_monotonic(
984 i32* %out, i32 %in) {
986 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") monotonic
990 ; GCN-LABEL: {{^}}workgroup_acquire:
991 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
992 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
993 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
994 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
995 ; GFX10WGP: s_waitcnt_vscnt null, 0x0{{$}}
996 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
997 ; GFX8-NOT: buffer_wbinvl1_vol
998 ; GFX10WGP-NEXT: buffer_gl0_inv
999 ; GFX10CU-NOT: buffer_gl0_inv
1000 ; GFX10: .amdhsa_kernel workgroup_acquire
1001 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1002 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1003 ; GFX10-NOT: .amdhsa_memory_ordered 0
1004 define amdgpu_kernel void @workgroup_acquire(
1005 i32* %out, i32 %in) {
1007 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acquire
1011 ; GCN-LABEL: {{^}}workgroup_release:
1012 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1013 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1014 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1015 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1016 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1017 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1018 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1019 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1020 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
1021 ; GFX10: .amdhsa_kernel workgroup_release
1022 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1023 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1024 ; GFX10-NOT: .amdhsa_memory_ordered 0
1025 define amdgpu_kernel void @workgroup_release(
1026 i32* %out, i32 %in) {
1028 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") release
1032 ; GCN-LABEL: {{^}}workgroup_acq_rel:
1033 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1034 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1035 ; GFX10WGP: s_waitcnt_vscnt null, 0x0{{$}}
1036 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1037 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1038 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1039 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1040 ; GFX10WGP: s_waitcnt_vscnt null, 0x0{{$}}
1041 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1042 ; GFX8-NOT: buffer_wbinvl1_vol
1043 ; GFX10WGP-NEXT: buffer_gl0_inv
1044 ; GFX10CU-NOT: buffer_gl0_inv
1045 ; GFX10: .amdhsa_kernel workgroup_acq_rel
1046 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1047 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1048 ; GFX10-NOT: .amdhsa_memory_ordered 0
1049 define amdgpu_kernel void @workgroup_acq_rel(
1050 i32* %out, i32 %in) {
1052 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acq_rel
1056 ; GCN-LABEL: {{^}}workgroup_seq_cst:
1057 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1058 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1059 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1060 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1061 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1062 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1063 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1064 ; GFX10WGP: s_waitcnt_vscnt null, 0x0{{$}}
1065 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1066 ; GFX8-NOT: buffer_wbinvl1_vol
1067 ; GFX10WGP-NEXT: buffer_gl0_inv
1068 ; GFX10CU-NOT: buffer_gl0_inv
1069 ; GFX10: .amdhsa_kernel workgroup_seq_cst
1070 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1071 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1072 ; GFX10-NOT: .amdhsa_memory_ordered 0
1073 define amdgpu_kernel void @workgroup_seq_cst(
1074 i32* %out, i32 %in) {
1076 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") seq_cst
1080 ; GCN-LABEL: {{^}}wavefront_monotonic:
1081 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1082 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1083 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1084 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1085 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1086 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
1087 ; GFX10: .amdhsa_kernel wavefront_monotonic
1088 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1089 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1090 ; GFX10-NOT: .amdhsa_memory_ordered 0
1091 define amdgpu_kernel void @wavefront_monotonic(
1092 i32* %out, i32 %in) {
1094 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") monotonic
1098 ; GCN-LABEL: {{^}}wavefront_acquire:
1099 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1100 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1101 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1102 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1103 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1104 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
1105 ; GFX10: .amdhsa_kernel wavefront_acquire
1106 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1107 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1108 ; GFX10-NOT: .amdhsa_memory_ordered 0
1109 define amdgpu_kernel void @wavefront_acquire(
1110 i32* %out, i32 %in) {
1112 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acquire
1116 ; GCN-LABEL: {{^}}wavefront_release:
1117 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1118 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1119 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1120 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1121 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1122 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
1123 ; GFX10: .amdhsa_kernel wavefront_release
1124 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1125 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1126 ; GFX10-NOT: .amdhsa_memory_ordered 0
1127 define amdgpu_kernel void @wavefront_release(
1128 i32* %out, i32 %in) {
1130 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") release
1134 ; GCN-LABEL: {{^}}wavefront_acq_rel:
1135 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1136 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1137 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1138 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1139 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1140 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
1141 ; GFX10: .amdhsa_kernel wavefront_acq_rel
1142 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1143 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1144 ; GFX10-NOT: .amdhsa_memory_ordered 0
1145 define amdgpu_kernel void @wavefront_acq_rel(
1146 i32* %out, i32 %in) {
1148 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acq_rel
1152 ; GCN-LABEL: {{^}}wavefront_seq_cst:
1153 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1154 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1155 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
1156 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1157 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1158 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
1159 ; GFX10: .amdhsa_kernel wavefront_seq_cst
1160 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1161 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1162 ; GFX10-NOT: .amdhsa_memory_ordered 0
1163 define amdgpu_kernel void @wavefront_seq_cst(
1164 i32* %out, i32 %in) {
1166 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") seq_cst
1170 ; GCN-LABEL: {{^}}system_acquire_ret:
1171 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1172 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1173 ; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1174 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1175 ; GFX8-NEXT: buffer_wbinvl1_vol
1176 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1177 ; GFX10-NEXT: buffer_gl0_inv
1178 ; GFX10-NEXT: buffer_gl1_inv
1179 ; GFX10: .amdhsa_kernel system_acquire_ret
1180 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1181 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1182 ; GFX10-NOT: .amdhsa_memory_ordered 0
1183 define amdgpu_kernel void @system_acquire_ret(
1184 i32* %out, i32 %in) {
1186 %val = atomicrmw volatile xchg i32* %out, i32 %in acquire
1187 store i32 %val, i32* %out, align 4
1191 ; GCN-LABEL: {{^}}system_acq_rel_ret:
1192 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1193 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1194 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1195 ; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1196 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1197 ; GFX8-NEXT: buffer_wbinvl1_vol
1198 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1199 ; GFX10-NEXT: buffer_gl0_inv
1200 ; GFX10-NEXT: buffer_gl1_inv
1201 ; GFX10: .amdhsa_kernel system_acq_rel_ret
1202 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1203 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1204 ; GFX10-NOT: .amdhsa_memory_ordered 0
1205 define amdgpu_kernel void @system_acq_rel_ret(
1206 i32* %out, i32 %in) {
1208 %val = atomicrmw volatile xchg i32* %out, i32 %in acq_rel
1209 store i32 %val, i32* %out, align 4
1213 ; GCN-LABEL: {{^}}system_seq_cst_ret:
1214 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1215 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1216 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1217 ; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1218 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1219 ; GFX8-NEXT: buffer_wbinvl1_vol
1220 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1221 ; GFX10-NEXT: buffer_gl0_inv
1222 ; GFX10-NEXT: buffer_gl1_inv
1223 ; GFX10: .amdhsa_kernel system_seq_cst_ret
1224 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1225 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1226 ; GFX10-NOT: .amdhsa_memory_ordered 0
1227 define amdgpu_kernel void @system_seq_cst_ret(
1228 i32* %out, i32 %in) {
1230 %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
1231 store i32 %val, i32* %out, align 4
1235 ; GCN-LABEL: {{^}}agent_acquire_ret:
1236 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1237 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1238 ; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1239 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1240 ; GFX8-NEXT: buffer_wbinvl1_vol
1241 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1242 ; GFX10-NEXT: buffer_gl0_inv
1243 ; GFX10-NEXT: buffer_gl1_inv
1244 ; GFX10: .amdhsa_kernel agent_acquire_ret
1245 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1246 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1247 ; GFX10-NOT: .amdhsa_memory_ordered 0
1248 define amdgpu_kernel void @agent_acquire_ret(
1249 i32* %out, i32 %in) {
1251 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acquire
1252 store i32 %val, i32* %out, align 4
1256 ; GCN-LABEL: {{^}}agent_acq_rel_ret:
1257 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1258 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1259 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1260 ; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1261 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1262 ; GFX8-NEXT: buffer_wbinvl1_vol
1263 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1264 ; GFX10-NEXT: buffer_gl0_inv
1265 ; GFX10-NEXT: buffer_gl1_inv
1266 ; GFX10: .amdhsa_kernel agent_acq_rel_ret
1267 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1268 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1269 ; GFX10-NOT: .amdhsa_memory_ordered 0
1270 define amdgpu_kernel void @agent_acq_rel_ret(
1271 i32* %out, i32 %in) {
1273 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acq_rel
1274 store i32 %val, i32* %out, align 4
1278 ; GCN-LABEL: {{^}}agent_seq_cst_ret:
1279 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1280 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1281 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1282 ; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1283 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1284 ; GFX8-NEXT: buffer_wbinvl1_vol
1285 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1286 ; GFX10-NEXT: buffer_gl0_inv
1287 ; GFX10-NEXT: buffer_gl1_inv
1288 ; GFX10: .amdhsa_kernel agent_seq_cst_ret
1289 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1290 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1291 ; GFX10-NOT: .amdhsa_memory_ordered 0
1292 define amdgpu_kernel void @agent_seq_cst_ret(
1293 i32* %out, i32 %in) {
1295 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") seq_cst
1296 store i32 %val, i32* %out, align 4
1300 ; GCN-LABEL: {{^}}workgroup_acquire_ret:
1301 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1302 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1303 ; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1304 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1305 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1306 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1307 ; GFX8-NOT: buffer_wbinvl1_vol
1308 ; GFX10WGP-NEXT: buffer_gl0_inv
1309 ; GFX10CU-NOT: buffer_gl0_inv
1310 ; GFX10: .amdhsa_kernel workgroup_acquire_ret
1311 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1312 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1313 ; GFX10-NOT: .amdhsa_memory_ordered 0
1314 define amdgpu_kernel void @workgroup_acquire_ret(
1315 i32* %out, i32 %in) {
1317 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acquire
1318 store i32 %val, i32* %out, align 4
1322 ; GCN-LABEL: {{^}}workgroup_acq_rel_ret:
1323 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1324 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1325 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1326 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1327 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1328 ; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1329 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1330 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1331 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1332 ; GFX8-NOT: buffer_wbinvl1_vol
1333 ; GFX10WGP-NEXT: buffer_gl0_inv
1334 ; GFX10CU-NOT: buffer_gl0_inv
1335 ; GFX10: .amdhsa_kernel workgroup_acq_rel_ret
1336 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1337 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1338 ; GFX10-NOT: .amdhsa_memory_ordered 0
1339 define amdgpu_kernel void @workgroup_acq_rel_ret(
1340 i32* %out, i32 %in) {
1342 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acq_rel
1343 store i32 %val, i32* %out, align 4
1347 ; GCN-LABEL: {{^}}workgroup_seq_cst_ret:
1348 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1349 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1350 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1351 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1352 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1353 ; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
1354 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1355 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1356 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1357 ; GFX8-NOT: buffer_wbinvl1_vol
1358 ; GFX10WGP-NEXT: buffer_gl0_inv
1359 ; GFX10CU-NOT: buffer_gl0_inv
1360 ; GFX10: .amdhsa_kernel workgroup_seq_cst_ret
1361 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1362 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1363 ; GFX10-NOT: .amdhsa_memory_ordered 0
1364 define amdgpu_kernel void @workgroup_seq_cst_ret(
1365 i32* %out, i32 %in) {
1367 %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") seq_cst
1368 store i32 %val, i32* %out, align 4