Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / GlobalISel / memory-legalizer-atomic-fence.ll
blob0fbfe4cb6f35f130c0f5e04f0287a387afc34462
1 ; RUN: llc -global-isel -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX6,GFX68 %s
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10WGP %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10CU %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10WGP %s
7 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10CU %s
9 ; FUNC-LABEL: {{^}}system_one_as_acquire:
10 ; GCN:        %bb.0
11 ; GCN-NOT:    ATOMIC_FENCE
12 ; GFX6:       s_waitcnt vmcnt(0){{$}}
13 ; GFX6-NEXT:  buffer_wbinvl1{{$}}
14 ; GFX8:       s_waitcnt vmcnt(0){{$}}
15 ; GFX8-NEXT:  buffer_wbinvl1_vol{{$}}
16 ; GFX10:      s_waitcnt vmcnt(0){{$}}
17 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
18 ; GFX10-NEXT: buffer_gl0_inv{{$}}
19 ; GFX10-NEXT: buffer_gl1_inv{{$}}
20 ; GCN:        s_endpgm
21 ; GFX10:         .amdhsa_kernel system_one_as_acquire
22 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
23 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
24 ; GFX10-NOT:     .amdhsa_memory_ordered 0
25 define amdgpu_kernel void @system_one_as_acquire() {
26 entry:
27   fence syncscope("one-as") acquire
28   ret void
31 ; FUNC-LABEL: {{^}}system_one_as_release:
32 ; GCN:        %bb.0
33 ; GCN-NOT:    ATOMIC_FENCE
34 ; GCN:        s_waitcnt vmcnt(0){{$}}
35 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
36 ; GCN:        s_endpgm
37 ; GFX10:         .amdhsa_kernel system_one_as_release
38 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
39 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
40 ; GFX10-NOT:     .amdhsa_memory_ordered 0
41 define amdgpu_kernel void @system_one_as_release() {
42 entry:
43   fence syncscope("one-as") release
44   ret void
47 ; FUNC-LABEL: {{^}}system_one_as_acq_rel:
48 ; GCN:        %bb.0
49 ; GCN-NOT:    ATOMIC_FENCE
50 ; GCN:        s_waitcnt vmcnt(0){{$}}
51 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
52 ; GFX6:       buffer_wbinvl1{{$}}
53 ; GFX8:       buffer_wbinvl1_vol{{$}}
54 ; GFX10-NEXT: buffer_gl0_inv{{$}}
55 ; GFX10-NEXT: buffer_gl1_inv{{$}}
56 ; GCN:        s_endpgm
57 ; GFX10:         .amdhsa_kernel system_one_as_acq_rel
58 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
59 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
60 ; GFX10-NOT:     .amdhsa_memory_ordered 0
61 define amdgpu_kernel void @system_one_as_acq_rel() {
62 entry:
63   fence syncscope("one-as") acq_rel
64   ret void
67 ; FUNC-LABEL: {{^}}system_one_as_seq_cst:
68 ; GCN:        %bb.0
69 ; GCN-NOT:    ATOMIC_FENCE
70 ; GCN:        s_waitcnt vmcnt(0){{$}}
71 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
72 ; GFX6:       buffer_wbinvl1{{$}}
73 ; GFX8:       buffer_wbinvl1_vol{{$}}
74 ; GFX10-NEXT: buffer_gl0_inv{{$}}
75 ; GFX10-NEXT: buffer_gl1_inv{{$}}
76 ; GCN:        s_endpgm
77 ; GFX10:         .amdhsa_kernel system_one_as_seq_cst
78 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
79 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
80 ; GFX10-NOT:     .amdhsa_memory_ordered 0
81 define amdgpu_kernel void @system_one_as_seq_cst() {
82 entry:
83   fence syncscope("one-as") seq_cst
84   ret void
87 ; FUNC-LABEL: {{^}}singlethread_one_as_acquire:
88 ; GCN:        %bb.0
89 ; GCN-NOT:    ATOMIC_FENCE
90 ; GCN:        s_endpgm
91 ; GFX10:         .amdhsa_kernel singlethread_one_as_acquire
92 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
93 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
94 ; GFX10-NOT:     .amdhsa_memory_ordered 0
95 define amdgpu_kernel void @singlethread_one_as_acquire() {
96 entry:
97   fence syncscope("singlethread-one-as") acquire
98   ret void
101 ; FUNC-LABEL: {{^}}singlethread_one_as_release:
102 ; GCN:        %bb.0
103 ; GCN-NOT:    ATOMIC_FENCE
104 ; GCN:        s_endpgm
105 ; GFX10:         .amdhsa_kernel singlethread_one_as_release
106 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
107 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
108 ; GFX10-NOT:     .amdhsa_memory_ordered 0
109 define amdgpu_kernel void @singlethread_one_as_release() {
110 entry:
111   fence syncscope("singlethread-one-as") release
112   ret void
115 ; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel:
116 ; GCN:        %bb.0
117 ; GCN-NOT:    ATOMIC_FENCE
118 ; GCN:        s_endpgm
119 ; GFX10:         .amdhsa_kernel singlethread_one_as_acq_rel
120 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
121 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
122 ; GFX10-NOT:     .amdhsa_memory_ordered 0
123 define amdgpu_kernel void @singlethread_one_as_acq_rel() {
124 entry:
125   fence syncscope("singlethread-one-as") acq_rel
126   ret void
129 ; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst:
130 ; GCN:        %bb.0
131 ; GCN-NOT:    ATOMIC_FENCE
132 ; GCN:        s_endpgm
133 ; GFX10:         .amdhsa_kernel singlethread_one_as_seq_cst
134 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
135 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
136 ; GFX10-NOT:     .amdhsa_memory_ordered 0
137 define amdgpu_kernel void @singlethread_one_as_seq_cst() {
138 entry:
139   fence syncscope("singlethread-one-as") seq_cst
140   ret void
143 ; FUNC-LABEL: {{^}}agent_one_as_acquire:
144 ; GCN:        %bb.0
145 ; GCN-NOT:    ATOMIC_FENCE
146 ; GFX6:       s_waitcnt vmcnt(0){{$}}
147 ; GFX6-NEXT:  buffer_wbinvl1{{$}}
148 ; GFX8:       s_waitcnt vmcnt(0){{$}}
149 ; GFX8-NEXT:  buffer_wbinvl1_vol{{$}}
150 ; GFX10:      s_waitcnt vmcnt(0){{$}}
151 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
152 ; GFX10-NEXT: buffer_gl0_inv{{$}}
153 ; GFX10-NEXT: buffer_gl1_inv{{$}}
154 ; GCN:        s_endpgm
155 ; GFX10:         .amdhsa_kernel agent_one_as_acquire
156 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
157 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
158 ; GFX10-NOT:     .amdhsa_memory_ordered 0
159 define amdgpu_kernel void @agent_one_as_acquire() {
160 entry:
161   fence syncscope("agent-one-as") acquire
162   ret void
165 ; FUNC-LABEL: {{^}}agent_one_as_release:
166 ; GCN:        %bb.0
167 ; GCN-NOT:    ATOMIC_FENCE
168 ; GCN:        s_waitcnt vmcnt(0){{$}}
169 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
170 ; GCN:        s_endpgm
171 ; GFX10:         .amdhsa_kernel agent_one_as_release
172 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
173 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
174 ; GFX10-NOT:     .amdhsa_memory_ordered 0
175 define amdgpu_kernel void @agent_one_as_release() {
176 entry:
177   fence syncscope("agent-one-as") release
178   ret void
181 ; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
182 ; GCN:        %bb.0
183 ; GCN-NOT:    ATOMIC_FENCE
184 ; GCN:        s_waitcnt vmcnt(0){{$}}
185 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
186 ; GFX6:       buffer_wbinvl1{{$}}
187 ; GFX8:       buffer_wbinvl1_vol{{$}}
188 ; GFX10-NEXT: buffer_gl0_inv{{$}}
189 ; GFX10-NEXT: buffer_gl1_inv{{$}}
190 ; GCN:        s_endpgm
191 ; GFX10:         .amdhsa_kernel agent_one_as_acq_rel
192 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
193 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
194 ; GFX10-NOT:     .amdhsa_memory_ordered 0
195 define amdgpu_kernel void @agent_one_as_acq_rel() {
196 entry:
197   fence syncscope("agent-one-as") acq_rel
198   ret void
201 ; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
202 ; GCN:        %bb.0
203 ; GCN-NOT:    ATOMIC_FENCE
204 ; GCN:        s_waitcnt vmcnt(0){{$}}
205 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
206 ; GFX6:       buffer_wbinvl1{{$}}
207 ; GFX8:       buffer_wbinvl1_vol{{$}}
208 ; GFX10-NEXT: buffer_gl0_inv{{$}}
209 ; GFX10-NEXT: buffer_gl1_inv{{$}}
210 ; GCN:        s_endpgm
211 ; GFX10:         .amdhsa_kernel agent_one_as_seq_cst
212 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
213 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
214 ; GFX10-NOT:     .amdhsa_memory_ordered 0
215 define amdgpu_kernel void @agent_one_as_seq_cst() {
216 entry:
217   fence syncscope("agent-one-as") seq_cst
218   ret void
221 ; FUNC-LABEL:    {{^}}workgroup_one_as_acquire:
222 ; GCN:           %bb.0
223 ; GFX68-NOT:     s_waitcnt vmcnt(0){{$}}
224 ; GFX10WGP:      s_waitcnt vmcnt(0){{$}}
225 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
226 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
227 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
228 ; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
229 ; GFX10CU-NOT:   buffer_gl0_inv{{$}}
230 ; GCN-NOT:       ATOMIC_FENCE
231 ; GCN:           s_endpgm
232 ; GFX10:         .amdhsa_kernel workgroup_one_as_acquire
233 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
234 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
235 ; GFX10-NOT:     .amdhsa_memory_ordered 0
236 define amdgpu_kernel void @workgroup_one_as_acquire() {
237 entry:
238   fence syncscope("workgroup-one-as") acquire
239   ret void
242 ; FUNC-LABEL:    {{^}}workgroup_one_as_release:
243 ; GCN:           %bb.0
244 ; GFX68-NOT:     s_waitcnt vmcnt(0){{$}}
245 ; GFX10WGP:      s_waitcnt vmcnt(0){{$}}
246 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
247 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
248 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
249 ; GFX10-NOT:     buffer_gl0_inv
250 ; GCN-NOT:       ATOMIC_FENCE
251 ; GCN:           s_endpgm
252 ; GFX10:         .amdhsa_kernel workgroup_one_as_release
253 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
254 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
255 ; GFX10-NOT:     .amdhsa_memory_ordered 0
256 define amdgpu_kernel void @workgroup_one_as_release() {
257 entry:
258   fence syncscope("workgroup-one-as") release
259   ret void
262 ; FUNC-LABEL:    {{^}}workgroup_one_as_acq_rel:
263 ; GCN:           %bb.0
264 ; GFX68-NOT:     s_waitcnt vmcnt(0){{$}}
265 ; GFX10WGP:      s_waitcnt vmcnt(0){{$}}
266 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
267 ; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
268 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
269 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
270 ; GFX10CU-NOT:   buffer_gl0_inv{{$}}
271 ; GCN-NOT:       ATOMIC_FENCE
272 ; GCN:           s_endpgm
273 ; GFX10:         .amdhsa_kernel workgroup_one_as_acq_rel
274 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
275 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
276 ; GFX10-NOT:     .amdhsa_memory_ordered 0
277 define amdgpu_kernel void @workgroup_one_as_acq_rel() {
278 entry:
279   fence syncscope("workgroup-one-as") acq_rel
280   ret void
283 ; FUNC-LABEL:    {{^}}workgroup_one_as_seq_cst:
284 ; GCN:           %bb.0
285 ; GFX68-NOT:     s_waitcnt vmcnt(0){{$}}
286 ; GFX10WGP:      s_waitcnt vmcnt(0){{$}}
287 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
288 ; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
289 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
290 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
291 ; GFX10CU-NOT:   buffer_gl0_inv{{$}}
292 ; GCN-NOT:       ATOMIC_FENCE
293 ; GCN:           s_endpgm
294 ; GFX10:         .amdhsa_kernel workgroup_one_as_seq_cst
295 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
296 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
297 ; GFX10-NOT:     .amdhsa_memory_ordered 0
298 define amdgpu_kernel void @workgroup_one_as_seq_cst() {
299 entry:
300   fence syncscope("workgroup-one-as") seq_cst
301   ret void
304 ; FUNC-LABEL: {{^}}wavefront_one_as_acquire:
305 ; GCN:        %bb.0
306 ; GCN-NOT:    ATOMIC_FENCE
307 ; GCN:        s_endpgm
308 ; GFX10:         .amdhsa_kernel wavefront_one_as_acquire
309 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
310 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
311 ; GFX10-NOT:     .amdhsa_memory_ordered 0
312 define amdgpu_kernel void @wavefront_one_as_acquire() {
313 entry:
314   fence syncscope("wavefront-one-as") acquire
315   ret void
318 ; FUNC-LABEL: {{^}}wavefront_one_as_release:
319 ; GCN:        %bb.0
320 ; GCN-NOT:    ATOMIC_FENCE
321 ; GCN:        s_endpgm
322 ; GFX10:         .amdhsa_kernel wavefront_one_as_release
323 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
324 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
325 ; GFX10-NOT:     .amdhsa_memory_ordered 0
326 define amdgpu_kernel void @wavefront_one_as_release() {
327 entry:
328   fence syncscope("wavefront-one-as") release
329   ret void
332 ; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel:
333 ; GCN:        %bb.0
334 ; GCN-NOT:    ATOMIC_FENCE
335 ; GCN:        s_endpgm
336 ; GFX10:         .amdhsa_kernel wavefront_one_as_acq_rel
337 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
338 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
339 ; GFX10-NOT:     .amdhsa_memory_ordered 0
340 define amdgpu_kernel void @wavefront_one_as_acq_rel() {
341 entry:
342   fence syncscope("wavefront-one-as") acq_rel
343   ret void
346 ; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst:
347 ; GCN:        %bb.0
348 ; GCN-NOT:    ATOMIC_FENCE
349 ; GCN:        s_endpgm
350 ; GFX10:         .amdhsa_kernel wavefront_one_as_seq_cst
351 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
352 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
353 ; GFX10-NOT:     .amdhsa_memory_ordered 0
354 define amdgpu_kernel void @wavefront_one_as_seq_cst() {
355 entry:
356   fence syncscope("wavefront-one-as") seq_cst
357   ret void
360 ; FUNC-LABEL: {{^}}system_acquire:
361 ; GCN:        %bb.0
362 ; GCN-NOT:    ATOMIC_FENCE
363 ; GFX6:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
364 ; GFX6-NEXT:  buffer_wbinvl1{{$}}
365 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
366 ; GFX8-NEXT:  buffer_wbinvl1_vol{{$}}
367 ; GFX10:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
368 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
369 ; GFX10-NEXT: buffer_gl0_inv{{$}}
370 ; GFX10-NEXT: buffer_gl1_inv{{$}}
371 ; GCN:        s_endpgm
372 ; GFX10:         .amdhsa_kernel system_acquire
373 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
374 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
375 ; GFX10-NOT:     .amdhsa_memory_ordered 0
376 define amdgpu_kernel void @system_acquire() {
377 entry:
378   fence acquire
379   ret void
382 ; FUNC-LABEL: {{^}}system_release:
383 ; GCN:        %bb.0
384 ; GCN-NOT:    ATOMIC_FENCE
385 ; GFX6:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
386 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
387 ; GFX10:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
388 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
389 ; GCN:        s_endpgm
390 ; GFX10:         .amdhsa_kernel system_release
391 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
392 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
393 ; GFX10-NOT:     .amdhsa_memory_ordered 0
394 define amdgpu_kernel void @system_release() {
395 entry:
396   fence release
397   ret void
400 ; FUNC-LABEL: {{^}}system_acq_rel:
401 ; GCN:        %bb.0
402 ; GCN-NOT:    ATOMIC_FENCE
403 ; GFX6:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
404 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
405 ; GFX10:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
406 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
407 ; GFX6:       buffer_wbinvl1{{$}}
408 ; GFX8:       buffer_wbinvl1_vol{{$}}
409 ; GFX10-NEXT: buffer_gl0_inv{{$}}
410 ; GFX10-NEXT: buffer_gl1_inv{{$}}
411 ; GCN:        s_endpgm
412 ; GFX10:         .amdhsa_kernel system_acq_rel
413 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
414 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
415 ; GFX10-NOT:     .amdhsa_memory_ordered 0
416 define amdgpu_kernel void @system_acq_rel() {
417 entry:
418   fence acq_rel
419   ret void
422 ; FUNC-LABEL: {{^}}system_seq_cst:
423 ; GCN:        %bb.0
424 ; GCN-NOT:    ATOMIC_FENCE
425 ; GFX6:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
426 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
427 ; GFX10:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
428 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
429 ; GFX6:       buffer_wbinvl1{{$}}
430 ; GFX8:       buffer_wbinvl1_vol{{$}}
431 ; GFX10-NEXT: buffer_gl0_inv{{$}}
432 ; GFX10-NEXT: buffer_gl1_inv{{$}}
433 ; GCN:        s_endpgm
434 ; GFX10:         .amdhsa_kernel system_seq_cst
435 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
436 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
437 ; GFX10-NOT:     .amdhsa_memory_ordered 0
438 define amdgpu_kernel void @system_seq_cst() {
439 entry:
440   fence seq_cst
441   ret void
444 ; FUNC-LABEL: {{^}}singlethread_acquire:
445 ; GCN:        %bb.0
446 ; GCN-NOT:    ATOMIC_FENCE
447 ; GCN:        s_endpgm
448 ; GFX10:         .amdhsa_kernel singlethread_acquire
449 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
450 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
451 ; GFX10-NOT:     .amdhsa_memory_ordered 0
452 define amdgpu_kernel void @singlethread_acquire() {
453 entry:
454   fence syncscope("singlethread") acquire
455   ret void
458 ; FUNC-LABEL: {{^}}singlethread_release:
459 ; GCN:        %bb.0
460 ; GCN-NOT:    ATOMIC_FENCE
461 ; GCN:        s_endpgm
462 ; GFX10:         .amdhsa_kernel singlethread_release
463 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
464 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
465 ; GFX10-NOT:     .amdhsa_memory_ordered 0
466 define amdgpu_kernel void @singlethread_release() {
467 entry:
468   fence syncscope("singlethread") release
469   ret void
472 ; FUNC-LABEL: {{^}}singlethread_acq_rel:
473 ; GCN:        %bb.0
474 ; GCN-NOT:    ATOMIC_FENCE
475 ; GCN:        s_endpgm
476 ; GFX10:         .amdhsa_kernel singlethread_acq_rel
477 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
478 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
479 ; GFX10-NOT:     .amdhsa_memory_ordered 0
480 define amdgpu_kernel void @singlethread_acq_rel() {
481 entry:
482   fence syncscope("singlethread") acq_rel
483   ret void
486 ; FUNC-LABEL: {{^}}singlethread_seq_cst:
487 ; GCN:        %bb.0
488 ; GCN-NOT:    ATOMIC_FENCE
489 ; GCN:        s_endpgm
490 ; GFX10:         .amdhsa_kernel singlethread_seq_cst
491 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
492 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
493 ; GFX10-NOT:     .amdhsa_memory_ordered 0
494 define amdgpu_kernel void @singlethread_seq_cst() {
495 entry:
496   fence syncscope("singlethread") seq_cst
497   ret void
500 ; FUNC-LABEL: {{^}}agent_acquire:
501 ; GCN:        %bb.0
502 ; GCN-NOT:    ATOMIC_FENCE
503 ; GFX6:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
504 ; GFX6-NEXT:  buffer_wbinvl1{{$}}
505 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
506 ; GFX8-NEXT:  buffer_wbinvl1_vol{{$}}
507 ; GFX10:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
508 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
509 ; GFX10-NEXT: buffer_gl0_inv{{$}}
510 ; GFX10-NEXT: buffer_gl1_inv{{$}}
511 ; GCN:        s_endpgm
512 ; GFX10:         .amdhsa_kernel agent_acquire
513 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
514 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
515 ; GFX10-NOT:     .amdhsa_memory_ordered 0
516 define amdgpu_kernel void @agent_acquire() {
517 entry:
518   fence syncscope("agent") acquire
519   ret void
522 ; FUNC-LABEL: {{^}}agent_release:
523 ; GCN:        %bb.0
524 ; GCN-NOT:    ATOMIC_FENCE
525 ; GFX6:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
526 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
527 ; GFX10:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
528 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
529 ; GCN:        s_endpgm
530 ; GFX10:         .amdhsa_kernel agent_release
531 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
532 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
533 ; GFX10-NOT:     .amdhsa_memory_ordered 0
534 define amdgpu_kernel void @agent_release() {
535 entry:
536   fence syncscope("agent") release
537   ret void
540 ; FUNC-LABEL: {{^}}agent_acq_rel:
541 ; GCN:        %bb.0
542 ; GCN-NOT:    ATOMIC_FENCE
543 ; GFX6:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
544 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
545 ; GFX10:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
546 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
547 ; GFX6:       buffer_wbinvl1{{$}}
548 ; GFX8:       buffer_wbinvl1_vol{{$}}
549 ; GFX10-NEXT: buffer_gl0_inv{{$}}
550 ; GFX10-NEXT: buffer_gl1_inv{{$}}
551 ; GCN:        s_endpgm
552 ; GFX10:         .amdhsa_kernel agent_acq_rel
553 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
554 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
555 ; GFX10-NOT:     .amdhsa_memory_ordered 0
556 define amdgpu_kernel void @agent_acq_rel() {
557 entry:
558   fence syncscope("agent") acq_rel
559   ret void
562 ; FUNC-LABEL: {{^}}agent_seq_cst:
563 ; GCN:        %bb.0
564 ; GCN-NOT:    ATOMIC_FENCE
565 ; GFX6:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
566 ; GFX8:       s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
567 ; GFX10:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
568 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
569 ; GFX6:       buffer_wbinvl1{{$}}
570 ; GFX8:       buffer_wbinvl1_vol{{$}}
571 ; GFX10-NEXT: buffer_gl0_inv{{$}}
572 ; GFX10-NEXT: buffer_gl1_inv{{$}}
573 ; GCN:        s_endpgm
574 ; GFX10:         .amdhsa_kernel agent_seq_cst
575 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
576 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
577 ; GFX10-NOT:     .amdhsa_memory_ordered 0
578 define amdgpu_kernel void @agent_seq_cst() {
579 entry:
580   fence syncscope("agent") seq_cst
581   ret void
584 ; FUNC-LABEL:    {{^}}workgroup_acquire:
585 ; GCN:           %bb.0
586 ; GFX68-NOT:     s_waitcnt vmcnt(0){{$}}
587 ; GFX10WGP:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
588 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
589 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
590 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
591 ; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
592 ; GFX10CU-NOT:   buffer_gl0_inv{{$}}
593 ; GCN-NOT:       ATOMIC_FENCE
594 ; GCN:           s_endpgm
595 ; GFX10:         .amdhsa_kernel workgroup_acquire
596 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
597 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
598 ; GFX10-NOT:     .amdhsa_memory_ordered 0
599 define amdgpu_kernel void @workgroup_acquire() {
600 entry:
601   fence syncscope("workgroup") acquire
602   ret void
605 ; FUNC-LABEL:    {{^}}workgroup_release:
606 ; GCN:           %bb.0
607 ; GFX68-NOT:     s_waitcnt vmcnt(0){{$}}
608 ; GFX10WGP:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
609 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
610 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
611 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
612 ; GFX10-NOT:     buffer_gl0_inv
613 ; GCN-NOT:       ATOMIC_FENCE
614 ; GCN:           s_endpgm
615 ; GFX10:         .amdhsa_kernel workgroup_release
616 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
617 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
618 ; GFX10-NOT:     .amdhsa_memory_ordered 0
619 define amdgpu_kernel void @workgroup_release() {
620 entry:
621   fence syncscope("workgroup") release
622   ret void
625 ; FUNC-LABEL:    {{^}}workgroup_acq_rel:
626 ; GCN:           %bb.0
627 ; GFX68-NOT:     s_waitcnt vmcnt(0){{$}}
628 ; GFX10WGP:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
629 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
630 ; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
631 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
632 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
633 ; GFX10CU-NOT:   buffer_gl0_inv{{$}}
634 ; GCN-NOT:       ATOMIC_FENCE
635 ; GCN:           s_endpgm
636 ; GFX10:         .amdhsa_kernel workgroup_acq_rel
637 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
638 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
639 ; GFX10-NOT:     .amdhsa_memory_ordered 0
640 define amdgpu_kernel void @workgroup_acq_rel() {
641 entry:
642   fence syncscope("workgroup") acq_rel
643   ret void
646 ; FUNC-LABEL:    {{^}}workgroup_seq_cst:
647 ; GCN:           %bb.0
648 ; GFX68-NOT:     s_waitcnt vmcnt(0){{$}}
649 ; GFX10WGP:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
650 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
651 ; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
652 ; GFX10CU-NOT:   s_waitcnt vmcnt(0){{$}}
653 ; GFX10CU-NOT:   s_waitcnt_vscnt null, 0x0{{$}}
654 ; GFX10CU-NOT:   buffer_gl0_inv{{$}}
655 ; GCN-NOT:       ATOMIC_FENCE
656 ; GCN:           s_endpgm
657 ; GFX10:         .amdhsa_kernel workgroup_seq_cst
658 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
659 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
660 ; GFX10-NOT:     .amdhsa_memory_ordered 0
661 define amdgpu_kernel void @workgroup_seq_cst() {
662 entry:
663   fence syncscope("workgroup") seq_cst
664   ret void
667 ; FUNC-LABEL: {{^}}wavefront_acquire:
668 ; GCN:        %bb.0
669 ; GCN-NOT:    ATOMIC_FENCE
670 ; GCN:        s_endpgm
671 ; GFX10:         .amdhsa_kernel wavefront_acquire
672 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
673 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
674 ; GFX10-NOT:     .amdhsa_memory_ordered 0
675 define amdgpu_kernel void @wavefront_acquire() {
676 entry:
677   fence syncscope("wavefront") acquire
678   ret void
681 ; FUNC-LABEL: {{^}}wavefront_release:
682 ; GCN:        %bb.0
683 ; GCN-NOT:    ATOMIC_FENCE
684 ; GCN:        s_endpgm
685 ; GFX10:         .amdhsa_kernel wavefront_release
686 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
687 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
688 ; GFX10-NOT:     .amdhsa_memory_ordered 0
689 define amdgpu_kernel void @wavefront_release() {
690 entry:
691   fence syncscope("wavefront") release
692   ret void
695 ; FUNC-LABEL: {{^}}wavefront_acq_rel:
696 ; GCN:        %bb.0
697 ; GCN-NOT:    ATOMIC_FENCE
698 ; GCN:        s_endpgm
699 ; GFX10:         .amdhsa_kernel wavefront_acq_rel
700 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
701 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
702 ; GFX10-NOT:     .amdhsa_memory_ordered 0
703 define amdgpu_kernel void @wavefront_acq_rel() {
704 entry:
705   fence syncscope("wavefront") acq_rel
706   ret void
709 ; FUNC-LABEL: {{^}}wavefront_seq_cst:
710 ; GCN:        %bb.0
711 ; GCN-NOT:    ATOMIC_FENCE
712 ; GCN:        s_endpgm
713 ; GFX10:         .amdhsa_kernel wavefront_seq_cst
714 ; GFX10WGP-NOT:  .amdhsa_workgroup_processor_mode 0
715 ; GFX10CU:       .amdhsa_workgroup_processor_mode 0
716 ; GFX10-NOT:     .amdhsa_memory_ordered 0
717 define amdgpu_kernel void @wavefront_seq_cst() {
718 entry:
719   fence syncscope("wavefront") seq_cst
720   ret void