1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd- -O0 -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
7 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
8 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
9 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
10 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
12 define amdgpu_kernel void @global_volatile_load_0(
13 ; GFX6-LABEL: global_volatile_load_0:
14 ; GFX6: ; %bb.0: ; %entry
15 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x9
16 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xb
17 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
18 ; GFX6-NEXT: s_mov_b32 s2, s5
19 ; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
20 ; GFX6-NEXT: s_mov_b32 s8, 0xf000
21 ; GFX6-NEXT: s_mov_b32 s9, -1
22 ; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
23 ; GFX6-NEXT: s_mov_b32 s5, s2
24 ; GFX6-NEXT: s_mov_b32 s6, s9
25 ; GFX6-NEXT: s_mov_b32 s7, s8
26 ; GFX6-NEXT: s_mov_b32 s10, s1
27 ; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
28 ; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
29 ; GFX6-NEXT: s_mov_b32 s1, s10
30 ; GFX6-NEXT: s_mov_b32 s2, s9
31 ; GFX6-NEXT: s_mov_b32 s3, s8
32 ; GFX6-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc
33 ; GFX6-NEXT: s_waitcnt vmcnt(0)
34 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
37 ; GFX7-LABEL: global_volatile_load_0:
38 ; GFX7: ; %bb.0: ; %entry
39 ; GFX7-NEXT: s_mov_b64 s[4:5], s[6:7]
40 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
41 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
42 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
43 ; GFX7-NEXT: v_mov_b32_e32 v0, s6
44 ; GFX7-NEXT: v_mov_b32_e32 v1, s7
45 ; GFX7-NEXT: flat_load_dword v2, v[0:1] glc
46 ; GFX7-NEXT: s_waitcnt vmcnt(0)
47 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
48 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
49 ; GFX7-NEXT: flat_store_dword v[0:1], v2
52 ; GFX10-WGP-LABEL: global_volatile_load_0:
53 ; GFX10-WGP: ; %bb.0: ; %entry
54 ; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[6:7]
55 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
56 ; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
57 ; GFX10-WGP-NEXT: s_nop 0
58 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
59 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
60 ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc dlc
61 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
62 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5]
63 ; GFX10-WGP-NEXT: s_endpgm
65 ; GFX10-CU-LABEL: global_volatile_load_0:
66 ; GFX10-CU: ; %bb.0: ; %entry
67 ; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[6:7]
68 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
69 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
70 ; GFX10-CU-NEXT: s_nop 0
71 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
72 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
73 ; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] glc dlc
74 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
75 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5]
76 ; GFX10-CU-NEXT: s_endpgm
78 ; SKIP-CACHE-INV-LABEL: global_volatile_load_0:
79 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
80 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
81 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
82 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
83 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5
84 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
85 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000
86 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1
87 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
88 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2
89 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9
90 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8
91 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1
92 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
93 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
94 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10
95 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9
96 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8
97 ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc
98 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
99 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
100 ; SKIP-CACHE-INV-NEXT: s_endpgm
102 ; GFX11-WGP-LABEL: global_volatile_load_0:
103 ; GFX11-WGP: ; %bb.0: ; %entry
104 ; GFX11-WGP-NEXT: s_mov_b64 s[0:1], s[2:3]
105 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
106 ; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
107 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
108 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
109 ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
110 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
111 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
112 ; GFX11-WGP-NEXT: s_endpgm
114 ; GFX11-CU-LABEL: global_volatile_load_0:
115 ; GFX11-CU: ; %bb.0: ; %entry
116 ; GFX11-CU-NEXT: s_mov_b64 s[0:1], s[2:3]
117 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
118 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
119 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
120 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
121 ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
122 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
123 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
124 ; GFX11-CU-NEXT: s_endpgm
126 ; GFX12-WGP-LABEL: global_volatile_load_0:
127 ; GFX12-WGP: ; %bb.0: ; %entry
128 ; GFX12-WGP-NEXT: s_mov_b64 s[0:1], s[2:3]
129 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
130 ; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
131 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
132 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
133 ; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
134 ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
135 ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
136 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
137 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
138 ; GFX12-WGP-NEXT: s_endpgm
140 ; GFX12-CU-LABEL: global_volatile_load_0:
141 ; GFX12-CU: ; %bb.0: ; %entry
142 ; GFX12-CU-NEXT: s_mov_b64 s[0:1], s[2:3]
143 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
144 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
145 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
146 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
147 ; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
148 ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
149 ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
150 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
151 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
152 ; GFX12-CU-NEXT: s_endpgm
153 ptr addrspace(1) %in, ptr addrspace(1) %out) {
155 %val = load volatile i32, ptr addrspace(1) %in, align 4
156 store i32 %val, ptr addrspace(1) %out
160 define amdgpu_kernel void @global_volatile_load_1(
161 ; GFX6-LABEL: global_volatile_load_1:
162 ; GFX6: ; %bb.0: ; %entry
163 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x9
164 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xb
165 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
166 ; GFX6-NEXT: s_mov_b32 s8, s1
167 ; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
168 ; GFX6-NEXT: s_mov_b32 s6, 0xf000
169 ; GFX6-NEXT: s_mov_b32 s7, -1
170 ; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
171 ; GFX6-NEXT: s_mov_b32 s1, s8
172 ; GFX6-NEXT: s_mov_b32 s2, s7
173 ; GFX6-NEXT: s_mov_b32 s3, s6
174 ; GFX6-NEXT: s_mov_b32 s8, 0
175 ; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9
176 ; GFX6-NEXT: s_mov_b32 s9, s6
177 ; GFX6-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
178 ; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9]
179 ; GFX6-NEXT: s_mov_b32 s8, 2
180 ; GFX6-NEXT: v_lshlrev_b32_e64 v0, s8, v0
181 ; GFX6-NEXT: s_mov_b32 s8, 0
182 ; GFX6-NEXT: ; implicit-def: $sgpr8
183 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
184 ; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
185 ; GFX6-NEXT: v_mov_b32_e32 v1, v2
186 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 glc
187 ; GFX6-NEXT: s_waitcnt vmcnt(0)
188 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
189 ; GFX6-NEXT: s_endpgm
191 ; GFX7-LABEL: global_volatile_load_1:
192 ; GFX7: ; %bb.0: ; %entry
193 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0
194 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x2
195 ; GFX7-NEXT: s_mov_b32 s6, 2
196 ; GFX7-NEXT: v_lshlrev_b32_e64 v1, s6, v0
197 ; GFX7-NEXT: s_mov_b32 s6, 0
198 ; GFX7-NEXT: ; implicit-def: $sgpr6
199 ; GFX7-NEXT: v_mov_b32_e32 v0, 0
200 ; GFX7-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
201 ; GFX7-NEXT: v_mov_b32_e32 v2, v0
202 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
203 ; GFX7-NEXT: s_mov_b32 s6, s8
204 ; GFX7-NEXT: v_mov_b32_e32 v0, v1
205 ; GFX7-NEXT: s_mov_b32 s8, s9
206 ; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr1_vgpr2 killed $exec
207 ; GFX7-NEXT: v_add_i32_e64 v0, s[6:7], s6, v0
208 ; GFX7-NEXT: v_mov_b32_e32 v1, s8
209 ; GFX7-NEXT: v_addc_u32_e64 v2, s[6:7], v1, v2, s[6:7]
210 ; GFX7-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
211 ; GFX7-NEXT: v_mov_b32_e32 v1, v2
212 ; GFX7-NEXT: flat_load_dword v2, v[0:1] glc
213 ; GFX7-NEXT: s_waitcnt vmcnt(0)
214 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
215 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
216 ; GFX7-NEXT: flat_store_dword v[0:1], v2
217 ; GFX7-NEXT: s_endpgm
219 ; GFX10-WGP-LABEL: global_volatile_load_1:
220 ; GFX10-WGP: ; %bb.0: ; %entry
221 ; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[6:7]
222 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, v0
223 ; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
224 ; GFX10-WGP-NEXT: s_nop 0
225 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
226 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
227 ; GFX10-WGP-NEXT: s_mov_b32 s8, 2
228 ; GFX10-WGP-NEXT: v_lshlrev_b32_e64 v1, s8, v1
229 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
230 ; GFX10-WGP-NEXT: global_load_dword v1, v1, s[6:7] glc dlc
231 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
232 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5]
233 ; GFX10-WGP-NEXT: s_endpgm
235 ; GFX10-CU-LABEL: global_volatile_load_1:
236 ; GFX10-CU: ; %bb.0: ; %entry
237 ; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[6:7]
238 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, v0
239 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
240 ; GFX10-CU-NEXT: s_nop 0
241 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
242 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
243 ; GFX10-CU-NEXT: s_mov_b32 s8, 2
244 ; GFX10-CU-NEXT: v_lshlrev_b32_e64 v1, s8, v1
245 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
246 ; GFX10-CU-NEXT: global_load_dword v1, v1, s[6:7] glc dlc
247 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
248 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5]
249 ; GFX10-CU-NEXT: s_endpgm
251 ; SKIP-CACHE-INV-LABEL: global_volatile_load_1:
252 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
253 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
254 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
255 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
256 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1
257 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
258 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
259 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1
260 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
261 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8
262 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7
263 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6
264 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0
265 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9
266 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, s6
267 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
268 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], s[8:9]
269 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 2
270 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e64 v0, s8, v0
271 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0
272 ; SKIP-CACHE-INV-NEXT: ; implicit-def: $sgpr8
273 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, 0
274 ; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
275 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2
276 ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 glc
277 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
278 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
279 ; SKIP-CACHE-INV-NEXT: s_endpgm
281 ; GFX11-WGP-LABEL: global_volatile_load_1:
282 ; GFX11-WGP: ; %bb.0: ; %entry
283 ; GFX11-WGP-NEXT: s_mov_b64 s[0:1], s[2:3]
284 ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, v0
285 ; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
286 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
287 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
288 ; GFX11-WGP-NEXT: s_mov_b32 s4, 0x3ff
289 ; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s4
290 ; GFX11-WGP-NEXT: s_mov_b32 s4, 2
291 ; GFX11-WGP-NEXT: v_lshlrev_b32_e64 v1, s4, v1
292 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
293 ; GFX11-WGP-NEXT: global_load_b32 v1, v1, s[2:3] glc dlc
294 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
295 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
296 ; GFX11-WGP-NEXT: s_endpgm
298 ; GFX11-CU-LABEL: global_volatile_load_1:
299 ; GFX11-CU: ; %bb.0: ; %entry
300 ; GFX11-CU-NEXT: s_mov_b64 s[0:1], s[2:3]
301 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, v0
302 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
303 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
304 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
305 ; GFX11-CU-NEXT: s_mov_b32 s4, 0x3ff
306 ; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s4
307 ; GFX11-CU-NEXT: s_mov_b32 s4, 2
308 ; GFX11-CU-NEXT: v_lshlrev_b32_e64 v1, s4, v1
309 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
310 ; GFX11-CU-NEXT: global_load_b32 v1, v1, s[2:3] glc dlc
311 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
312 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
313 ; GFX11-CU-NEXT: s_endpgm
315 ; GFX12-WGP-LABEL: global_volatile_load_1:
316 ; GFX12-WGP: ; %bb.0: ; %entry
317 ; GFX12-WGP-NEXT: s_mov_b64 s[0:1], s[2:3]
318 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, v0
319 ; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
320 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
321 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
322 ; GFX12-WGP-NEXT: s_mov_b32 s4, 0x3ff
323 ; GFX12-WGP-NEXT: v_and_b32_e64 v1, v1, s4
324 ; GFX12-WGP-NEXT: s_mov_b32 s4, 2
325 ; GFX12-WGP-NEXT: v_lshlrev_b32_e64 v1, s4, v1
326 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
327 ; GFX12-WGP-NEXT: global_load_b32 v1, v1, s[2:3] scope:SCOPE_SYS
328 ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
329 ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
330 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
331 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
332 ; GFX12-WGP-NEXT: s_endpgm
334 ; GFX12-CU-LABEL: global_volatile_load_1:
335 ; GFX12-CU: ; %bb.0: ; %entry
336 ; GFX12-CU-NEXT: s_mov_b64 s[0:1], s[2:3]
337 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, v0
338 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
339 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
340 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
341 ; GFX12-CU-NEXT: s_mov_b32 s4, 0x3ff
342 ; GFX12-CU-NEXT: v_and_b32_e64 v1, v1, s4
343 ; GFX12-CU-NEXT: s_mov_b32 s4, 2
344 ; GFX12-CU-NEXT: v_lshlrev_b32_e64 v1, s4, v1
345 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
346 ; GFX12-CU-NEXT: global_load_b32 v1, v1, s[2:3] scope:SCOPE_SYS
347 ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
348 ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
349 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
350 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
351 ; GFX12-CU-NEXT: s_endpgm
352 ptr addrspace(1) %in, ptr addrspace(1) %out) {
354 %tid = call i32 @llvm.amdgcn.workitem.id.x()
355 %val.gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
356 %val = load volatile i32, ptr addrspace(1) %val.gep, align 4
357 store i32 %val, ptr addrspace(1) %out
361 define amdgpu_kernel void @global_volatile_store_0(
362 ; GFX6-LABEL: global_volatile_store_0:
363 ; GFX6: ; %bb.0: ; %entry
364 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x9
365 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xb
366 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
367 ; GFX6-NEXT: s_mov_b32 s8, s1
368 ; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
369 ; GFX6-NEXT: s_mov_b32 s6, 0xf000
370 ; GFX6-NEXT: s_mov_b32 s7, -1
371 ; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
372 ; GFX6-NEXT: s_mov_b32 s1, s8
373 ; GFX6-NEXT: s_mov_b32 s2, s7
374 ; GFX6-NEXT: s_mov_b32 s3, s6
375 ; GFX6-NEXT: s_load_dword s4, s[4:5], 0x0
376 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
377 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
378 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
379 ; GFX6-NEXT: s_waitcnt vmcnt(0)
380 ; GFX6-NEXT: s_endpgm
382 ; GFX7-LABEL: global_volatile_store_0:
383 ; GFX7: ; %bb.0: ; %entry
384 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
385 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x2
386 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
387 ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0
388 ; GFX7-NEXT: v_mov_b32_e32 v0, s6
389 ; GFX7-NEXT: v_mov_b32_e32 v1, s7
390 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
391 ; GFX7-NEXT: v_mov_b32_e32 v2, s4
392 ; GFX7-NEXT: flat_store_dword v[0:1], v2
393 ; GFX7-NEXT: s_waitcnt vmcnt(0)
394 ; GFX7-NEXT: s_endpgm
396 ; GFX10-WGP-LABEL: global_volatile_store_0:
397 ; GFX10-WGP: ; %bb.0: ; %entry
398 ; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[6:7]
399 ; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
400 ; GFX10-WGP-NEXT: s_nop 0
401 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
402 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
403 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
404 ; GFX10-WGP-NEXT: s_load_dword s6, s[6:7], 0x0
405 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
406 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
407 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5]
408 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
409 ; GFX10-WGP-NEXT: s_endpgm
411 ; GFX10-CU-LABEL: global_volatile_store_0:
412 ; GFX10-CU: ; %bb.0: ; %entry
413 ; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[6:7]
414 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
415 ; GFX10-CU-NEXT: s_nop 0
416 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
417 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
418 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
419 ; GFX10-CU-NEXT: s_load_dword s6, s[6:7], 0x0
420 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
421 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
422 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5]
423 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
424 ; GFX10-CU-NEXT: s_endpgm
426 ; SKIP-CACHE-INV-LABEL: global_volatile_store_0:
427 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
428 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
429 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
430 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
431 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1
432 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
433 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
434 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1
435 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
436 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8
437 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7
438 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6
439 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x0
440 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
441 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4
442 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
443 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
444 ; SKIP-CACHE-INV-NEXT: s_endpgm
446 ; GFX11-WGP-LABEL: global_volatile_store_0:
447 ; GFX11-WGP: ; %bb.0: ; %entry
448 ; GFX11-WGP-NEXT: s_mov_b64 s[0:1], s[2:3]
449 ; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
450 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
451 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
452 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
453 ; GFX11-WGP-NEXT: s_load_b32 s2, s[2:3], 0x0
454 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
455 ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
456 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] dlc
457 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
458 ; GFX11-WGP-NEXT: s_endpgm
460 ; GFX11-CU-LABEL: global_volatile_store_0:
461 ; GFX11-CU: ; %bb.0: ; %entry
462 ; GFX11-CU-NEXT: s_mov_b64 s[0:1], s[2:3]
463 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
464 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
465 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
466 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
467 ; GFX11-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
468 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
469 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
470 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] dlc
471 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
472 ; GFX11-CU-NEXT: s_endpgm
474 ; GFX12-WGP-LABEL: global_volatile_store_0:
475 ; GFX12-WGP: ; %bb.0: ; %entry
476 ; GFX12-WGP-NEXT: s_mov_b64 s[0:1], s[2:3]
477 ; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
478 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
479 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
480 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
481 ; GFX12-WGP-NEXT: s_load_b32 s2, s[2:3], 0x0
482 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
483 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
484 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
485 ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
486 ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
487 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
488 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0
489 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
490 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0
491 ; GFX12-WGP-NEXT: s_endpgm
493 ; GFX12-CU-LABEL: global_volatile_store_0:
494 ; GFX12-CU: ; %bb.0: ; %entry
495 ; GFX12-CU-NEXT: s_mov_b64 s[0:1], s[2:3]
496 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
497 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
498 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
499 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
500 ; GFX12-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
501 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
502 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
503 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
504 ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
505 ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
506 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
507 ; GFX12-CU-NEXT: s_wait_storecnt 0x0
508 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
509 ; GFX12-CU-NEXT: s_wait_storecnt 0x0
510 ; GFX12-CU-NEXT: s_endpgm
511 ptr addrspace(1) %in, ptr addrspace(1) %out) {
513 %val = load i32, ptr addrspace(1) %in, align 4
514 store volatile i32 %val, ptr addrspace(1) %out
518 define amdgpu_kernel void @global_volatile_store_1(
519 ; GFX6-LABEL: global_volatile_store_1:
520 ; GFX6: ; %bb.0: ; %entry
521 ; GFX6-NEXT: s_mov_b64 s[0:1], s[2:3]
522 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
523 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
524 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
525 ; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0
526 ; GFX6-NEXT: s_mov_b32 s2, 0xf000
527 ; GFX6-NEXT: s_mov_b32 s6, 0
528 ; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
529 ; GFX6-NEXT: s_mov_b32 s7, s2
530 ; GFX6-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
531 ; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
532 ; GFX6-NEXT: s_mov_b32 s5, 2
533 ; GFX6-NEXT: v_lshlrev_b32_e64 v1, s5, v0
534 ; GFX6-NEXT: s_mov_b32 s5, 0
535 ; GFX6-NEXT: ; implicit-def: $sgpr5
536 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
537 ; GFX6-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
538 ; GFX6-NEXT: v_mov_b32_e32 v2, v0
539 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
540 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
541 ; GFX6-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
542 ; GFX6-NEXT: s_waitcnt vmcnt(0)
543 ; GFX6-NEXT: s_endpgm
545 ; GFX7-LABEL: global_volatile_store_1:
546 ; GFX7: ; %bb.0: ; %entry
547 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
548 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x2
549 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
550 ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0
551 ; GFX7-NEXT: s_mov_b32 s5, 2
552 ; GFX7-NEXT: v_lshlrev_b32_e64 v1, s5, v0
553 ; GFX7-NEXT: s_mov_b32 s5, 0
554 ; GFX7-NEXT: ; implicit-def: $sgpr5
555 ; GFX7-NEXT: v_mov_b32_e32 v0, 0
556 ; GFX7-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
557 ; GFX7-NEXT: v_mov_b32_e32 v2, v0
558 ; GFX7-NEXT: s_mov_b32 s6, s8
559 ; GFX7-NEXT: v_mov_b32_e32 v0, v1
560 ; GFX7-NEXT: s_mov_b32 s5, s9
561 ; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr1_vgpr2 killed $exec
562 ; GFX7-NEXT: v_add_i32_e64 v0, s[6:7], s6, v0
563 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
564 ; GFX7-NEXT: v_addc_u32_e64 v2, s[6:7], v1, v2, s[6:7]
565 ; GFX7-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
566 ; GFX7-NEXT: v_mov_b32_e32 v1, v2
567 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
568 ; GFX7-NEXT: v_mov_b32_e32 v2, s4
569 ; GFX7-NEXT: flat_store_dword v[0:1], v2
570 ; GFX7-NEXT: s_waitcnt vmcnt(0)
571 ; GFX7-NEXT: s_endpgm
573 ; GFX10-WGP-LABEL: global_volatile_store_1:
574 ; GFX10-WGP: ; %bb.0: ; %entry
575 ; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[6:7]
576 ; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
577 ; GFX10-WGP-NEXT: s_nop 0
578 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
579 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
580 ; GFX10-WGP-NEXT: s_load_dword s6, s[6:7], 0x0
581 ; GFX10-WGP-NEXT: s_mov_b32 s7, 2
582 ; GFX10-WGP-NEXT: v_lshlrev_b32_e64 v0, s7, v0
583 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
584 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
585 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5]
586 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
587 ; GFX10-WGP-NEXT: s_endpgm
589 ; GFX10-CU-LABEL: global_volatile_store_1:
590 ; GFX10-CU: ; %bb.0: ; %entry
591 ; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[6:7]
592 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
593 ; GFX10-CU-NEXT: s_nop 0
594 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
595 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
596 ; GFX10-CU-NEXT: s_load_dword s6, s[6:7], 0x0
597 ; GFX10-CU-NEXT: s_mov_b32 s7, 2
598 ; GFX10-CU-NEXT: v_lshlrev_b32_e64 v0, s7, v0
599 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
600 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
601 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5]
602 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
603 ; GFX10-CU-NEXT: s_endpgm
605 ; SKIP-CACHE-INV-LABEL: global_volatile_store_1:
606 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
607 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[2:3]
608 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
609 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
610 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
611 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x0
612 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, 0xf000
613 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0
614 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
615 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s2
616 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
617 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[6:7]
618 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 2
619 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e64 v1, s5, v0
620 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0
621 ; SKIP-CACHE-INV-NEXT: ; implicit-def: $sgpr5
622 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, 0
623 ; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
624 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, v0
625 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
626 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4
627 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
628 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
629 ; SKIP-CACHE-INV-NEXT: s_endpgm
631 ; GFX11-WGP-LABEL: global_volatile_store_1:
632 ; GFX11-WGP: ; %bb.0: ; %entry
633 ; GFX11-WGP-NEXT: s_mov_b64 s[0:1], s[2:3]
634 ; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
635 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
636 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
637 ; GFX11-WGP-NEXT: s_load_b32 s2, s[2:3], 0x0
638 ; GFX11-WGP-NEXT: s_mov_b32 s3, 0x3ff
639 ; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s3
640 ; GFX11-WGP-NEXT: s_mov_b32 s3, 2
641 ; GFX11-WGP-NEXT: v_lshlrev_b32_e64 v0, s3, v0
642 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
643 ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
644 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] dlc
645 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
646 ; GFX11-WGP-NEXT: s_endpgm
648 ; GFX11-CU-LABEL: global_volatile_store_1:
649 ; GFX11-CU: ; %bb.0: ; %entry
650 ; GFX11-CU-NEXT: s_mov_b64 s[0:1], s[2:3]
651 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
652 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
653 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
654 ; GFX11-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
655 ; GFX11-CU-NEXT: s_mov_b32 s3, 0x3ff
656 ; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s3
657 ; GFX11-CU-NEXT: s_mov_b32 s3, 2
658 ; GFX11-CU-NEXT: v_lshlrev_b32_e64 v0, s3, v0
659 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
660 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
661 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] dlc
662 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
663 ; GFX11-CU-NEXT: s_endpgm
665 ; GFX12-WGP-LABEL: global_volatile_store_1:
666 ; GFX12-WGP: ; %bb.0: ; %entry
667 ; GFX12-WGP-NEXT: s_mov_b64 s[0:1], s[2:3]
668 ; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
669 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
670 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
671 ; GFX12-WGP-NEXT: s_load_b32 s2, s[2:3], 0x0
672 ; GFX12-WGP-NEXT: s_mov_b32 s3, 0x3ff
673 ; GFX12-WGP-NEXT: v_and_b32_e64 v0, v0, s3
674 ; GFX12-WGP-NEXT: s_mov_b32 s3, 2
675 ; GFX12-WGP-NEXT: v_lshlrev_b32_e64 v0, s3, v0
676 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
677 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
678 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
679 ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
680 ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
681 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
682 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0
683 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
684 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0
685 ; GFX12-WGP-NEXT: s_endpgm
687 ; GFX12-CU-LABEL: global_volatile_store_1:
688 ; GFX12-CU: ; %bb.0: ; %entry
689 ; GFX12-CU-NEXT: s_mov_b64 s[0:1], s[2:3]
690 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
691 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
692 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
693 ; GFX12-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
694 ; GFX12-CU-NEXT: s_mov_b32 s3, 0x3ff
695 ; GFX12-CU-NEXT: v_and_b32_e64 v0, v0, s3
696 ; GFX12-CU-NEXT: s_mov_b32 s3, 2
697 ; GFX12-CU-NEXT: v_lshlrev_b32_e64 v0, s3, v0
698 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
699 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
700 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
701 ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
702 ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
703 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
704 ; GFX12-CU-NEXT: s_wait_storecnt 0x0
705 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
706 ; GFX12-CU-NEXT: s_wait_storecnt 0x0
707 ; GFX12-CU-NEXT: s_endpgm
708 ptr addrspace(1) %in, ptr addrspace(1) %out) {
710 %tid = call i32 @llvm.amdgcn.workitem.id.x()
711 %val = load i32, ptr addrspace(1) %in, align 4
712 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %tid
713 store volatile i32 %val, ptr addrspace(1) %out.gep
717 define amdgpu_kernel void @global_volatile_workgroup_acquire_load(
718 ; GFX6-LABEL: global_volatile_workgroup_acquire_load:
719 ; GFX6: ; %bb.0: ; %entry
720 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x9
721 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xb
722 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
723 ; GFX6-NEXT: s_mov_b32 s2, s5
724 ; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
725 ; GFX6-NEXT: s_mov_b32 s8, 0xf000
726 ; GFX6-NEXT: s_mov_b32 s9, -1
727 ; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
728 ; GFX6-NEXT: s_mov_b32 s5, s2
729 ; GFX6-NEXT: s_mov_b32 s6, s9
730 ; GFX6-NEXT: s_mov_b32 s7, s8
731 ; GFX6-NEXT: s_mov_b32 s10, s1
732 ; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
733 ; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
734 ; GFX6-NEXT: s_mov_b32 s1, s10
735 ; GFX6-NEXT: s_mov_b32 s2, s9
736 ; GFX6-NEXT: s_mov_b32 s3, s8
737 ; GFX6-NEXT: buffer_load_dword v0, off, s[4:7], 0
738 ; GFX6-NEXT: s_waitcnt vmcnt(0)
739 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
740 ; GFX6-NEXT: s_endpgm
742 ; GFX7-LABEL: global_volatile_workgroup_acquire_load:
743 ; GFX7: ; %bb.0: ; %entry
744 ; GFX7-NEXT: s_mov_b64 s[4:5], s[6:7]
745 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
746 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
747 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
748 ; GFX7-NEXT: v_mov_b32_e32 v0, s6
749 ; GFX7-NEXT: v_mov_b32_e32 v1, s7
750 ; GFX7-NEXT: flat_load_dword v2, v[0:1]
751 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
752 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
753 ; GFX7-NEXT: s_waitcnt vmcnt(0)
754 ; GFX7-NEXT: flat_store_dword v[0:1], v2
755 ; GFX7-NEXT: s_endpgm
757 ; GFX10-WGP-LABEL: global_volatile_workgroup_acquire_load:
758 ; GFX10-WGP: ; %bb.0: ; %entry
759 ; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[6:7]
760 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
761 ; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
762 ; GFX10-WGP-NEXT: s_nop 0
763 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
764 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
765 ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc
766 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
767 ; GFX10-WGP-NEXT: buffer_gl0_inv
768 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5]
769 ; GFX10-WGP-NEXT: s_endpgm
771 ; GFX10-CU-LABEL: global_volatile_workgroup_acquire_load:
772 ; GFX10-CU: ; %bb.0: ; %entry
773 ; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[6:7]
774 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
775 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
776 ; GFX10-CU-NEXT: s_nop 0
777 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
778 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
779 ; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7]
780 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
781 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5]
782 ; GFX10-CU-NEXT: s_endpgm
784 ; SKIP-CACHE-INV-LABEL: global_volatile_workgroup_acquire_load:
785 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
786 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
787 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
788 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
789 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5
790 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
791 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000
792 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1
793 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
794 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2
795 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9
796 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8
797 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1
798 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
799 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
800 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10
801 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9
802 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8
803 ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0
804 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
805 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
806 ; SKIP-CACHE-INV-NEXT: s_endpgm
808 ; GFX11-WGP-LABEL: global_volatile_workgroup_acquire_load:
809 ; GFX11-WGP: ; %bb.0: ; %entry
810 ; GFX11-WGP-NEXT: s_mov_b64 s[0:1], s[2:3]
811 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
812 ; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
813 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
814 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
815 ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc
816 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
817 ; GFX11-WGP-NEXT: buffer_gl0_inv
818 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
819 ; GFX11-WGP-NEXT: s_endpgm
821 ; GFX11-CU-LABEL: global_volatile_workgroup_acquire_load:
822 ; GFX11-CU: ; %bb.0: ; %entry
823 ; GFX11-CU-NEXT: s_mov_b64 s[0:1], s[2:3]
824 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
825 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
826 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
827 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
828 ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3]
829 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
830 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
831 ; GFX11-CU-NEXT: s_endpgm
833 ; GFX12-WGP-LABEL: global_volatile_workgroup_acquire_load:
834 ; GFX12-WGP: ; %bb.0: ; %entry
835 ; GFX12-WGP-NEXT: s_mov_b64 s[0:1], s[2:3]
836 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
837 ; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
838 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
839 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
840 ; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SE
841 ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
842 ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
843 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
844 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
845 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
846 ; GFX12-WGP-NEXT: s_endpgm
848 ; GFX12-CU-LABEL: global_volatile_workgroup_acquire_load:
849 ; GFX12-CU: ; %bb.0: ; %entry
850 ; GFX12-CU-NEXT: s_mov_b64 s[0:1], s[2:3]
851 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
852 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
853 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
854 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
855 ; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3]
856 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
857 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
858 ; GFX12-CU-NEXT: s_endpgm
859 ptr addrspace(1) %in, ptr addrspace(1) %out) {
861 %val = load atomic volatile i32, ptr addrspace(1) %in syncscope("workgroup") acquire, align 4
862 store i32 %val, ptr addrspace(1) %out
866 define amdgpu_kernel void @global_volatile_workgroup_release_store(
867 ; GFX6-LABEL: global_volatile_workgroup_release_store:
868 ; GFX6: ; %bb.0: ; %entry
869 ; GFX6-NEXT: s_load_dword s4, s[2:3], 0x9
870 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xb
871 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
872 ; GFX6-NEXT: s_mov_b32 s7, s1
873 ; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
874 ; GFX6-NEXT: s_mov_b32 s5, 0xf000
875 ; GFX6-NEXT: s_mov_b32 s6, -1
876 ; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
877 ; GFX6-NEXT: s_mov_b32 s1, s7
878 ; GFX6-NEXT: s_mov_b32 s2, s6
879 ; GFX6-NEXT: s_mov_b32 s3, s5
880 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
881 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
882 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
883 ; GFX6-NEXT: s_endpgm
885 ; GFX7-LABEL: global_volatile_workgroup_release_store:
886 ; GFX7: ; %bb.0: ; %entry
887 ; GFX7-NEXT: s_load_dword s4, s[6:7], 0x0
888 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x2
889 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
890 ; GFX7-NEXT: v_mov_b32_e32 v0, s6
891 ; GFX7-NEXT: v_mov_b32_e32 v1, s7
892 ; GFX7-NEXT: v_mov_b32_e32 v2, s4
893 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
894 ; GFX7-NEXT: flat_store_dword v[0:1], v2
895 ; GFX7-NEXT: s_endpgm
897 ; GFX10-WGP-LABEL: global_volatile_workgroup_release_store:
898 ; GFX10-WGP: ; %bb.0: ; %entry
899 ; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[6:7]
900 ; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0x0
901 ; GFX10-WGP-NEXT: s_nop 0
902 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
903 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
904 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
905 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
906 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
907 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
908 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5]
909 ; GFX10-WGP-NEXT: s_endpgm
911 ; GFX10-CU-LABEL: global_volatile_workgroup_release_store:
912 ; GFX10-CU: ; %bb.0: ; %entry
913 ; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[6:7]
914 ; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0x0
915 ; GFX10-CU-NEXT: s_nop 0
916 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
917 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
918 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
919 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
920 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
921 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5]
922 ; GFX10-CU-NEXT: s_endpgm
924 ; SKIP-CACHE-INV-LABEL: global_volatile_workgroup_release_store:
925 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
926 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x0
927 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
928 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
929 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1
930 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
931 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
932 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1
933 ; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
934 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7
935 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6
936 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5
937 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4
938 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
939 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
940 ; SKIP-CACHE-INV-NEXT: s_endpgm
942 ; GFX11-WGP-LABEL: global_volatile_workgroup_release_store:
943 ; GFX11-WGP: ; %bb.0: ; %entry
944 ; GFX11-WGP-NEXT: s_mov_b64 s[0:1], s[2:3]
945 ; GFX11-WGP-NEXT: s_load_b32 s2, s[0:1], 0x0
946 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
947 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
948 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
949 ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
950 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
951 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
952 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
953 ; GFX11-WGP-NEXT: s_endpgm
955 ; GFX11-CU-LABEL: global_volatile_workgroup_release_store:
956 ; GFX11-CU: ; %bb.0: ; %entry
957 ; GFX11-CU-NEXT: s_mov_b64 s[0:1], s[2:3]
958 ; GFX11-CU-NEXT: s_load_b32 s2, s[0:1], 0x0
959 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
960 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
961 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
962 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
963 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
964 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
965 ; GFX11-CU-NEXT: s_endpgm
967 ; GFX12-WGP-LABEL: global_volatile_workgroup_release_store:
968 ; GFX12-WGP: ; %bb.0: ; %entry
969 ; GFX12-WGP-NEXT: s_mov_b64 s[0:1], s[2:3]
970 ; GFX12-WGP-NEXT: s_load_b32 s2, s[0:1], 0x0
971 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
972 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
973 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
974 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
975 ; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
976 ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
977 ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
978 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0
979 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
980 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
981 ; GFX12-WGP-NEXT: s_endpgm
983 ; GFX12-CU-LABEL: global_volatile_workgroup_release_store:
984 ; GFX12-CU: ; %bb.0: ; %entry
985 ; GFX12-CU-NEXT: s_mov_b64 s[0:1], s[2:3]
986 ; GFX12-CU-NEXT: s_load_b32 s2, s[0:1], 0x0
987 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
988 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
989 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
990 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
991 ; GFX12-CU-NEXT: s_wait_dscnt 0x0
992 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
993 ; GFX12-CU-NEXT: s_endpgm
994 i32 %in, ptr addrspace(1) %out) {
996 store atomic volatile i32 %in, ptr addrspace(1) %out syncscope("workgroup") release, align 4
1000 declare i32 @llvm.amdgcn.workitem.id.x()