1 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s
4 ; CHECK-LABEL: {{^}}system_unordered
5 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
6 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
7 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
8 ; CHECK-NOT: buffer_wbinvl1_vol
9 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
10 define amdgpu_kernel void @system_unordered(
11 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
13 %val = load atomic i32, i32 addrspace(4)* %in unordered, align 4
14 store i32 %val, i32 addrspace(4)* %out
18 ; CHECK-LABEL: {{^}}system_monotonic
19 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
20 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
21 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
22 ; CHECK-NOT: buffer_wbinvl1_vol
23 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
24 define amdgpu_kernel void @system_monotonic(
25 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
27 %val = load atomic i32, i32 addrspace(4)* %in monotonic, align 4
28 store i32 %val, i32 addrspace(4)* %out
32 ; CHECK-LABEL: {{^}}system_acquire
33 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
34 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
35 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
36 ; CHECK-NEXT: buffer_wbinvl1_vol
37 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
38 define amdgpu_kernel void @system_acquire(
39 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
41 %val = load atomic i32, i32 addrspace(4)* %in acquire, align 4
42 store i32 %val, i32 addrspace(4)* %out
46 ; CHECK-LABEL: {{^}}system_seq_cst
47 ; CHECK: s_waitcnt vmcnt(0){{$}}
48 ; CHECK-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
49 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
50 ; CHECK-NEXT: buffer_wbinvl1_vol
51 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
52 define amdgpu_kernel void @system_seq_cst(
53 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
55 %val = load atomic i32, i32 addrspace(4)* %in seq_cst, align 4
56 store i32 %val, i32 addrspace(4)* %out
60 ; CHECK-LABEL: {{^}}singlethread_unordered
61 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
62 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
63 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
64 ; CHECK-NOT: buffer_wbinvl1_vol
65 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
66 define amdgpu_kernel void @singlethread_unordered(
67 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
69 %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") unordered, align 4
70 store i32 %val, i32 addrspace(4)* %out
74 ; CHECK-LABEL: {{^}}singlethread_monotonic
75 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
76 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
77 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
78 ; CHECK-NOT: buffer_wbinvl1_vol
79 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
80 define amdgpu_kernel void @singlethread_monotonic(
81 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
83 %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") monotonic, align 4
84 store i32 %val, i32 addrspace(4)* %out
88 ; CHECK-LABEL: {{^}}singlethread_acquire
89 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
90 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
91 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
92 ; CHECK-NOT: buffer_wbinvl1_vol
93 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
94 define amdgpu_kernel void @singlethread_acquire(
95 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
97 %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") acquire, align 4
98 store i32 %val, i32 addrspace(4)* %out
102 ; CHECK-LABEL: {{^}}singlethread_seq_cst
103 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
104 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
105 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
106 ; CHECK-NOT: buffer_wbinvl1_vol
107 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
108 define amdgpu_kernel void @singlethread_seq_cst(
109 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
111 %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") seq_cst, align 4
112 store i32 %val, i32 addrspace(4)* %out
116 ; CHECK-LABEL: {{^}}agent_unordered
117 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
118 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
119 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
120 ; CHECK-NOT: buffer_wbinvl1_vol
121 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
122 define amdgpu_kernel void @agent_unordered(
123 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
125 %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") unordered, align 4
126 store i32 %val, i32 addrspace(4)* %out
130 ; CHECK-LABEL: {{^}}agent_monotonic
131 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
132 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
133 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
134 ; CHECK-NOT: buffer_wbinvl1_vol
135 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
136 define amdgpu_kernel void @agent_monotonic(
137 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
139 %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") monotonic, align 4
140 store i32 %val, i32 addrspace(4)* %out
144 ; CHECK-LABEL: {{^}}agent_acquire
145 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
146 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
147 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
148 ; CHECK-NEXT: buffer_wbinvl1_vol
149 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
150 define amdgpu_kernel void @agent_acquire(
151 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
153 %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") acquire, align 4
154 store i32 %val, i32 addrspace(4)* %out
158 ; CHECK-LABEL: {{^}}agent_seq_cst
159 ; CHECK: s_waitcnt vmcnt(0){{$}}
160 ; CHECK-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
161 ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
162 ; CHECK-NEXT: buffer_wbinvl1_vol
163 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
164 define amdgpu_kernel void @agent_seq_cst(
165 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
167 %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") seq_cst, align 4
168 store i32 %val, i32 addrspace(4)* %out
172 ; CHECK-LABEL: {{^}}workgroup_unordered
173 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
174 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
175 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
176 ; CHECK-NOT: buffer_wbinvl1_vol
177 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
178 define amdgpu_kernel void @workgroup_unordered(
179 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
181 %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") unordered, align 4
182 store i32 %val, i32 addrspace(4)* %out
186 ; CHECK-LABEL: {{^}}workgroup_monotonic
187 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
188 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
189 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
190 ; CHECK-NOT: buffer_wbinvl1_vol
191 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
192 define amdgpu_kernel void @workgroup_monotonic(
193 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
195 %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") monotonic, align 4
196 store i32 %val, i32 addrspace(4)* %out
200 ; CHECK-LABEL: {{^}}workgroup_acquire
201 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
202 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
203 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
204 ; CHECK-NOT: buffer_wbinvl1_vol
205 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
206 define amdgpu_kernel void @workgroup_acquire(
207 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
209 %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") acquire, align 4
210 store i32 %val, i32 addrspace(4)* %out
214 ; CHECK-LABEL: {{^}}workgroup_seq_cst
215 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
216 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
217 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
218 ; CHECK-NOT: buffer_wbinvl1_vol
219 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
220 define amdgpu_kernel void @workgroup_seq_cst(
221 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
223 %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") seq_cst, align 4
224 store i32 %val, i32 addrspace(4)* %out
228 ; CHECK-LABEL: {{^}}wavefront_unordered
229 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
230 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
231 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
232 ; CHECK-NOT: buffer_wbinvl1_vol
233 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
234 define amdgpu_kernel void @wavefront_unordered(
235 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
237 %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") unordered, align 4
238 store i32 %val, i32 addrspace(4)* %out
242 ; CHECK-LABEL: {{^}}wavefront_monotonic
243 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
244 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
245 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
246 ; CHECK-NOT: buffer_wbinvl1_vol
247 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
248 define amdgpu_kernel void @wavefront_monotonic(
249 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
251 %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") monotonic, align 4
252 store i32 %val, i32 addrspace(4)* %out
256 ; CHECK-LABEL: {{^}}wavefront_acquire
257 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
258 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
259 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
260 ; CHECK-NOT: buffer_wbinvl1_vol
261 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
262 define amdgpu_kernel void @wavefront_acquire(
263 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
265 %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") acquire, align 4
266 store i32 %val, i32 addrspace(4)* %out
270 ; CHECK-LABEL: {{^}}wavefront_seq_cst
271 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
272 ; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
273 ; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
274 ; CHECK-NOT: buffer_wbinvl1_vol
275 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
276 define amdgpu_kernel void @wavefront_seq_cst(
277 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
279 %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") seq_cst, align 4
280 store i32 %val, i32 addrspace(4)* %out