1 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
3 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
6 declare i32 @llvm.amdgcn.workitem.id.x()
8 ; GCN-LABEL: {{^}}system_unordered:
9 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
10 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
11 define amdgpu_kernel void @system_unordered(
14 store atomic i32 %in, i32* %out unordered, align 4
18 ; GCN-LABEL: {{^}}system_monotonic:
19 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
20 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
21 define amdgpu_kernel void @system_monotonic(
24 store atomic i32 %in, i32* %out monotonic, align 4
28 ; GCN-LABEL: {{^}}system_release:
29 ; GCN: s_waitcnt vmcnt(0){{$}}
30 ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
31 define amdgpu_kernel void @system_release(
34 store atomic i32 %in, i32* %out release, align 4
38 ; GCN-LABEL: {{^}}system_seq_cst:
39 ; GCN: s_waitcnt vmcnt(0){{$}}
40 ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
41 define amdgpu_kernel void @system_seq_cst(
44 store atomic i32 %in, i32* %out seq_cst, align 4
48 ; GCN-LABEL: {{^}}singlethread_unordered:
49 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
50 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
51 define amdgpu_kernel void @singlethread_unordered(
54 store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4
58 ; GCN-LABEL: {{^}}singlethread_monotonic:
59 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
60 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
61 define amdgpu_kernel void @singlethread_monotonic(
64 store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4
68 ; GCN-LABEL: {{^}}singlethread_release:
69 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
70 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
71 define amdgpu_kernel void @singlethread_release(
74 store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4
78 ; GCN-LABEL: {{^}}singlethread_seq_cst:
79 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
80 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
81 define amdgpu_kernel void @singlethread_seq_cst(
84 store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4
88 ; GCN-LABEL: {{^}}agent_unordered:
89 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
90 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
91 define amdgpu_kernel void @agent_unordered(
94 store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4
98 ; GCN-LABEL: {{^}}agent_monotonic:
99 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
100 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
101 define amdgpu_kernel void @agent_monotonic(
102 i32 %in, i32* %out) {
104 store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4
108 ; GCN-LABEL: {{^}}agent_release:
109 ; GCN: s_waitcnt vmcnt(0){{$}}
110 ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
111 define amdgpu_kernel void @agent_release(
112 i32 %in, i32* %out) {
114 store atomic i32 %in, i32* %out syncscope("agent") release, align 4
118 ; GCN-LABEL: {{^}}agent_seq_cst:
119 ; GCN: s_waitcnt vmcnt(0){{$}}
120 ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
121 define amdgpu_kernel void @agent_seq_cst(
122 i32 %in, i32* %out) {
124 store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4
128 ; GCN-LABEL: {{^}}workgroup_unordered:
129 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
130 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
131 define amdgpu_kernel void @workgroup_unordered(
132 i32 %in, i32* %out) {
134 store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4
138 ; GCN-LABEL: {{^}}workgroup_monotonic:
139 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
140 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
141 define amdgpu_kernel void @workgroup_monotonic(
142 i32 %in, i32* %out) {
144 store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4
148 ; GCN-LABEL: {{^}}workgroup_release:
149 ; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
150 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
151 define amdgpu_kernel void @workgroup_release(
152 i32 %in, i32* %out) {
154 store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4
158 ; GCN-LABEL: {{^}}workgroup_seq_cst:
159 ; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
160 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
161 define amdgpu_kernel void @workgroup_seq_cst(
162 i32 %in, i32* %out) {
164 store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4
168 ; GCN-LABEL: {{^}}wavefront_unordered:
169 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
170 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
171 define amdgpu_kernel void @wavefront_unordered(
172 i32 %in, i32* %out) {
174 store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4
178 ; GCN-LABEL: {{^}}wavefront_monotonic:
179 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
180 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
181 define amdgpu_kernel void @wavefront_monotonic(
182 i32 %in, i32* %out) {
184 store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4
188 ; GCN-LABEL: {{^}}wavefront_release:
189 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
190 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
191 define amdgpu_kernel void @wavefront_release(
192 i32 %in, i32* %out) {
194 store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4
198 ; GCN-LABEL: {{^}}wavefront_seq_cst:
199 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
200 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
201 define amdgpu_kernel void @wavefront_seq_cst(
202 i32 %in, i32* %out) {
204 store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4
208 ; GCN-LABEL: {{^}}nontemporal_private_0:
209 ; GFX89: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
210 define amdgpu_kernel void @nontemporal_private_0(
211 i32* %in, i32 addrspace(5)* %out) {
213 %val = load i32, i32* %in, align 4
214 store i32 %val, i32 addrspace(5)* %out, !nontemporal !0
218 ; GCN-LABEL: {{^}}nontemporal_private_1:
219 ; GFX89: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
220 define amdgpu_kernel void @nontemporal_private_1(
221 i32* %in, i32 addrspace(5)* %out) {
223 %tid = call i32 @llvm.amdgcn.workitem.id.x()
224 %val = load i32, i32* %in, align 4
225 %out.gep = getelementptr inbounds i32, i32 addrspace(5)* %out, i32 %tid
226 store i32 %val, i32 addrspace(5)* %out.gep, !nontemporal !0
230 ; GCN-LABEL: {{^}}nontemporal_global_0:
231 ; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
232 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
233 define amdgpu_kernel void @nontemporal_global_0(
234 i32* %in, i32 addrspace(1)* %out) {
236 %val = load i32, i32* %in, align 4
237 store i32 %val, i32 addrspace(1)* %out, !nontemporal !0
241 ; GCN-LABEL: {{^}}nontemporal_global_1:
242 ; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
243 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
244 define amdgpu_kernel void @nontemporal_global_1(
245 i32* %in, i32 addrspace(1)* %out) {
247 %tid = call i32 @llvm.amdgcn.workitem.id.x()
248 %val = load i32, i32* %in, align 4
249 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
250 store i32 %val, i32 addrspace(1)* %out.gep, !nontemporal !0
254 ; GCN-LABEL: {{^}}nontemporal_local_0:
255 ; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
256 define amdgpu_kernel void @nontemporal_local_0(
257 i32* %in, i32 addrspace(3)* %out) {
259 %val = load i32, i32* %in, align 4
260 store i32 %val, i32 addrspace(3)* %out, !nontemporal !0
264 ; GCN-LABEL: {{^}}nontemporal_local_1:
265 ; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
266 define amdgpu_kernel void @nontemporal_local_1(
267 i32* %in, i32 addrspace(3)* %out) {
269 %tid = call i32 @llvm.amdgcn.workitem.id.x()
270 %val = load i32, i32* %in, align 4
271 %out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid
272 store i32 %val, i32 addrspace(3)* %out.gep, !nontemporal !0
276 ; GCN-LABEL: {{^}}nontemporal_flat_0:
277 ; GFX89: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
278 define amdgpu_kernel void @nontemporal_flat_0(
279 i32* %in, i32* %out) {
281 %val = load i32, i32* %in, align 4
282 store i32 %val, i32* %out, !nontemporal !0
286 ; GCN-LABEL: {{^}}nontemporal_flat_1:
287 ; GFX89: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
288 define amdgpu_kernel void @nontemporal_flat_1(
289 i32* %in, i32* %out) {
291 %tid = call i32 @llvm.amdgcn.workitem.id.x()
292 %val = load i32, i32* %in, align 4
293 %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid
294 store i32 %val, i32* %out.gep, !nontemporal !0