1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefix=GFX7 %s
5 ; Test end to end matching of addressing modes when MUBUF is used for
8 define amdgpu_ps void @mubuf_store_sgpr_ptr(ptr addrspace(1) inreg %ptr) {
9 ; GFX6-LABEL: mubuf_store_sgpr_ptr:
11 ; GFX6-NEXT: s_mov_b32 s0, s2
12 ; GFX6-NEXT: s_mov_b32 s1, s3
13 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
14 ; GFX6-NEXT: s_mov_b32 s2, -1
15 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
16 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
19 ; GFX7-LABEL: mubuf_store_sgpr_ptr:
21 ; GFX7-NEXT: s_mov_b32 s0, s2
22 ; GFX7-NEXT: s_mov_b32 s1, s3
23 ; GFX7-NEXT: v_mov_b32_e32 v0, 0
24 ; GFX7-NEXT: s_mov_b32 s2, -1
25 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
26 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
28 store i32 0, ptr addrspace(1) %ptr
32 define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(ptr addrspace(1) inreg %ptr) {
33 ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095:
35 ; GFX6-NEXT: s_mov_b32 s0, s2
36 ; GFX6-NEXT: s_mov_b32 s1, s3
37 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
38 ; GFX6-NEXT: s_mov_b32 s2, -1
39 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
40 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
41 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], s4
44 ; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095:
46 ; GFX7-NEXT: s_mov_b32 s0, s2
47 ; GFX7-NEXT: s_mov_b32 s1, s3
48 ; GFX7-NEXT: v_mov_b32_e32 v0, 0
49 ; GFX7-NEXT: s_mov_b32 s2, -1
50 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
51 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
52 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s4
54 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
55 store i32 0, ptr addrspace(1) %gep
59 define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
60 ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
62 ; GFX6-NEXT: s_mov_b32 s4, 0
63 ; GFX6-NEXT: s_mov_b32 s5, 4
64 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
65 ; GFX6-NEXT: s_mov_b32 s0, s2
66 ; GFX6-NEXT: s_mov_b32 s1, s3
67 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
68 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
69 ; GFX6-NEXT: s_mov_b32 s2, s4
70 ; GFX6-NEXT: v_mov_b32_e32 v1, s5
71 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
74 ; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
76 ; GFX7-NEXT: s_mov_b32 s4, 0
77 ; GFX7-NEXT: s_mov_b32 s5, 4
78 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
79 ; GFX7-NEXT: s_mov_b32 s0, s2
80 ; GFX7-NEXT: s_mov_b32 s1, s3
81 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
82 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
83 ; GFX7-NEXT: s_mov_b32 s2, s4
84 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
85 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
87 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
88 store i32 0, ptr addrspace(1) %gep
92 define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(ptr addrspace(1) inreg %ptr) {
93 ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
95 ; GFX6-NEXT: s_mov_b32 s4, 4
96 ; GFX6-NEXT: s_mov_b32 s5, s4
97 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
98 ; GFX6-NEXT: s_mov_b32 s0, s2
99 ; GFX6-NEXT: s_mov_b32 s1, s3
100 ; GFX6-NEXT: s_mov_b32 s2, 0
101 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
102 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
103 ; GFX6-NEXT: v_mov_b32_e32 v1, s5
104 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
105 ; GFX6-NEXT: s_endpgm
107 ; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
109 ; GFX7-NEXT: s_mov_b32 s4, 4
110 ; GFX7-NEXT: s_mov_b32 s5, s4
111 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
112 ; GFX7-NEXT: s_mov_b32 s0, s2
113 ; GFX7-NEXT: s_mov_b32 s1, s3
114 ; GFX7-NEXT: s_mov_b32 s2, 0
115 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
116 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
117 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
118 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
119 ; GFX7-NEXT: s_endpgm
120 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967297
121 store i32 0, ptr addrspace(1) %gep
125 define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(ptr addrspace(1) inreg %ptr) {
126 ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4096:
128 ; GFX6-NEXT: s_mov_b32 s0, s2
129 ; GFX6-NEXT: s_mov_b32 s1, s3
130 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
131 ; GFX6-NEXT: s_mov_b32 s2, -1
132 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
133 ; GFX6-NEXT: s_movk_i32 s4, 0x4000
134 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], s4
135 ; GFX6-NEXT: s_endpgm
137 ; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4096:
139 ; GFX7-NEXT: s_mov_b32 s0, s2
140 ; GFX7-NEXT: s_mov_b32 s1, s3
141 ; GFX7-NEXT: v_mov_b32_e32 v0, 0
142 ; GFX7-NEXT: s_mov_b32 s2, -1
143 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
144 ; GFX7-NEXT: s_movk_i32 s4, 0x4000
145 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s4
146 ; GFX7-NEXT: s_endpgm
147 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4096
148 store i32 0, ptr addrspace(1) %gep
152 define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(ptr addrspace(1) %ptr) {
153 ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4095:
155 ; GFX6-NEXT: s_mov_b32 s2, 0
156 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
157 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
158 ; GFX6-NEXT: s_mov_b64 s[0:1], 0
159 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
160 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
161 ; GFX6-NEXT: s_endpgm
163 ; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4095:
165 ; GFX7-NEXT: s_mov_b32 s2, 0
166 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
167 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
168 ; GFX7-NEXT: s_mov_b64 s[0:1], 0
169 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
170 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
171 ; GFX7-NEXT: s_endpgm
172 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
173 store i32 0, ptr addrspace(1) %gep
177 define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(ptr addrspace(1) %ptr) {
178 ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967296:
180 ; GFX6-NEXT: s_mov_b32 s0, 0
181 ; GFX6-NEXT: s_mov_b32 s1, 4
182 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
183 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
184 ; GFX6-NEXT: s_mov_b32 s2, s0
185 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
186 ; GFX6-NEXT: s_endpgm
188 ; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967296:
190 ; GFX7-NEXT: s_mov_b32 s0, 0
191 ; GFX7-NEXT: s_mov_b32 s1, 4
192 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
193 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
194 ; GFX7-NEXT: s_mov_b32 s2, s0
195 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
196 ; GFX7-NEXT: s_endpgm
197 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
198 store i32 0, ptr addrspace(1) %gep
202 define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(ptr addrspace(1) %ptr) {
203 ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
205 ; GFX6-NEXT: s_mov_b32 s0, 4
206 ; GFX6-NEXT: s_mov_b32 s1, s0
207 ; GFX6-NEXT: s_mov_b32 s2, 0
208 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
209 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
210 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
211 ; GFX6-NEXT: s_endpgm
213 ; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
215 ; GFX7-NEXT: s_mov_b32 s0, 4
216 ; GFX7-NEXT: s_mov_b32 s1, s0
217 ; GFX7-NEXT: s_mov_b32 s2, 0
218 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
219 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
220 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
221 ; GFX7-NEXT: s_endpgm
222 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967297
223 store i32 0, ptr addrspace(1) %gep
227 define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(ptr addrspace(1) %ptr) {
228 ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4096:
230 ; GFX6-NEXT: s_mov_b32 s2, 0
231 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
232 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
233 ; GFX6-NEXT: s_mov_b64 s[0:1], 0
234 ; GFX6-NEXT: s_movk_i32 s4, 0x4000
235 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
236 ; GFX6-NEXT: s_endpgm
238 ; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4096:
240 ; GFX7-NEXT: s_mov_b32 s2, 0
241 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
242 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
243 ; GFX7-NEXT: s_mov_b64 s[0:1], 0
244 ; GFX7-NEXT: s_movk_i32 s4, 0x4000
245 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
246 ; GFX7-NEXT: s_endpgm
247 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4096
248 store i32 0, ptr addrspace(1) %gep
252 define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(ptr addrspace(1) inreg %ptr, i32 inreg %soffset) {
253 ; GFX6-LABEL: mubuf_store_sgpr_ptr_sgpr_offset:
255 ; GFX6-NEXT: s_ashr_i32 s5, s4, 31
256 ; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
257 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
258 ; GFX6-NEXT: s_mov_b32 s0, s2
259 ; GFX6-NEXT: s_mov_b32 s1, s3
260 ; GFX6-NEXT: s_mov_b32 s2, 0
261 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
262 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
263 ; GFX6-NEXT: v_mov_b32_e32 v1, s5
264 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
265 ; GFX6-NEXT: s_endpgm
267 ; GFX7-LABEL: mubuf_store_sgpr_ptr_sgpr_offset:
269 ; GFX7-NEXT: s_ashr_i32 s5, s4, 31
270 ; GFX7-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
271 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
272 ; GFX7-NEXT: s_mov_b32 s0, s2
273 ; GFX7-NEXT: s_mov_b32 s1, s3
274 ; GFX7-NEXT: s_mov_b32 s2, 0
275 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
276 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
277 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
278 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
279 ; GFX7-NEXT: s_endpgm
280 %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %soffset
281 store i32 0, ptr addrspace(1) %gep
285 define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(ptr addrspace(1) %ptr, i32 inreg %soffset) {
286 ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset:
288 ; GFX6-NEXT: s_ashr_i32 s3, s2, 31
289 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
290 ; GFX6-NEXT: s_mov_b32 s2, 0
291 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
292 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
293 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
294 ; GFX6-NEXT: s_endpgm
296 ; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset:
298 ; GFX7-NEXT: s_ashr_i32 s3, s2, 31
299 ; GFX7-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
300 ; GFX7-NEXT: s_mov_b32 s2, 0
301 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
302 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
303 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
304 ; GFX7-NEXT: s_endpgm
305 %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %soffset
306 store i32 0, ptr addrspace(1) %gep
310 define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(ptr addrspace(1) %ptr, i32 inreg %soffset) {
311 ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256:
313 ; GFX6-NEXT: s_ashr_i32 s3, s2, 31
314 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
315 ; GFX6-NEXT: s_mov_b32 s2, 0
316 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
317 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
318 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
319 ; GFX6-NEXT: s_endpgm
321 ; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256:
323 ; GFX7-NEXT: s_ashr_i32 s3, s2, 31
324 ; GFX7-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
325 ; GFX7-NEXT: s_mov_b32 s2, 0
326 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
327 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
328 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
329 ; GFX7-NEXT: s_endpgm
330 %gep0 = getelementptr i32, ptr addrspace(1) %ptr, i32 %soffset
331 %gep1 = getelementptr i32, ptr addrspace(1) %gep0, i32 256
332 store i32 0, ptr addrspace(1) %gep1
336 define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(ptr addrspace(1) %ptr, i32 inreg %soffset) {
337 ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset:
339 ; GFX6-NEXT: s_ashr_i32 s3, s2, 31
340 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
341 ; GFX6-NEXT: s_mov_b32 s2, 0
342 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
343 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
344 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
345 ; GFX6-NEXT: s_endpgm
347 ; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset:
349 ; GFX7-NEXT: s_ashr_i32 s3, s2, 31
350 ; GFX7-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
351 ; GFX7-NEXT: s_mov_b32 s2, 0
352 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
353 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
354 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
355 ; GFX7-NEXT: s_endpgm
356 %gep0 = getelementptr i32, ptr addrspace(1) %ptr, i32 256
357 %gep1 = getelementptr i32, ptr addrspace(1) %gep0, i32 %soffset
358 store i32 0, ptr addrspace(1) %gep1
362 define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
363 ; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset:
365 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
366 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
367 ; GFX6-NEXT: s_mov_b32 s0, s2
368 ; GFX6-NEXT: s_mov_b32 s1, s3
369 ; GFX6-NEXT: s_mov_b32 s2, 0
370 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
371 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
372 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
373 ; GFX6-NEXT: s_endpgm
375 ; GFX7-LABEL: mubuf_store_sgpr_ptr_vgpr_offset:
377 ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
378 ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
379 ; GFX7-NEXT: s_mov_b32 s0, s2
380 ; GFX7-NEXT: s_mov_b32 s1, s3
381 ; GFX7-NEXT: s_mov_b32 s2, 0
382 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
383 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
384 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
385 ; GFX7-NEXT: s_endpgm
386 %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset
387 store i32 0, ptr addrspace(1) %gep
391 define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(ptr addrspace(1) inreg %ptr, i32 %voffset) {
392 ; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095:
394 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
395 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
396 ; GFX6-NEXT: s_mov_b32 s0, s2
397 ; GFX6-NEXT: s_mov_b32 s1, s3
398 ; GFX6-NEXT: s_mov_b32 s2, 0
399 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
400 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
401 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
402 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
403 ; GFX6-NEXT: s_endpgm
405 ; GFX7-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095:
407 ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
408 ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
409 ; GFX7-NEXT: s_mov_b32 s0, s2
410 ; GFX7-NEXT: s_mov_b32 s1, s3
411 ; GFX7-NEXT: s_mov_b32 s2, 0
412 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
413 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
414 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
415 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
416 ; GFX7-NEXT: s_endpgm
417 %gep0 = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset
418 %gep1 = getelementptr i32, ptr addrspace(1) %gep0, i32 4095
419 store i32 0, ptr addrspace(1) %gep1
422 define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
423 ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
425 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
426 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
427 ; GFX6-NEXT: s_mov_b32 s0, s2
428 ; GFX6-NEXT: s_mov_b32 s1, s3
429 ; GFX6-NEXT: s_mov_b32 s2, 0
430 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
431 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
432 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
433 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
434 ; GFX6-NEXT: s_endpgm
436 ; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
438 ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
439 ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
440 ; GFX7-NEXT: s_mov_b32 s0, s2
441 ; GFX7-NEXT: s_mov_b32 s1, s3
442 ; GFX7-NEXT: s_mov_b32 s2, 0
443 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
444 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
445 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
446 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
447 ; GFX7-NEXT: s_endpgm
448 %gep0 = getelementptr i32, ptr addrspace(1) %ptr, i32 4095
449 %gep1 = getelementptr i32, ptr addrspace(1) %gep0, i32 %voffset
450 store i32 0, ptr addrspace(1) %gep1
454 define amdgpu_ps float @mubuf_load_sgpr_ptr(ptr addrspace(1) inreg %ptr) {
455 ; GFX6-LABEL: mubuf_load_sgpr_ptr:
457 ; GFX6-NEXT: s_mov_b32 s0, s2
458 ; GFX6-NEXT: s_mov_b32 s1, s3
459 ; GFX6-NEXT: s_mov_b32 s2, -1
460 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
461 ; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
462 ; GFX6-NEXT: s_waitcnt vmcnt(0)
463 ; GFX6-NEXT: ; return to shader part epilog
465 ; GFX7-LABEL: mubuf_load_sgpr_ptr:
467 ; GFX7-NEXT: s_mov_b32 s0, s2
468 ; GFX7-NEXT: s_mov_b32 s1, s3
469 ; GFX7-NEXT: s_mov_b32 s2, -1
470 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
471 ; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
472 ; GFX7-NEXT: s_waitcnt vmcnt(0)
473 ; GFX7-NEXT: ; return to shader part epilog
474 %val = load volatile float, ptr addrspace(1) %ptr
478 define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(ptr addrspace(1) inreg %ptr) {
479 ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095:
481 ; GFX6-NEXT: s_mov_b32 s0, s2
482 ; GFX6-NEXT: s_mov_b32 s1, s3
483 ; GFX6-NEXT: s_mov_b32 s2, -1
484 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
485 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
486 ; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], s4 glc
487 ; GFX6-NEXT: s_waitcnt vmcnt(0)
488 ; GFX6-NEXT: ; return to shader part epilog
490 ; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095:
492 ; GFX7-NEXT: s_mov_b32 s0, s2
493 ; GFX7-NEXT: s_mov_b32 s1, s3
494 ; GFX7-NEXT: s_mov_b32 s2, -1
495 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
496 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
497 ; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s4 glc
498 ; GFX7-NEXT: s_waitcnt vmcnt(0)
499 ; GFX7-NEXT: ; return to shader part epilog
500 %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4095
501 %val = load volatile float, ptr addrspace(1) %gep
505 define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
506 ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
508 ; GFX6-NEXT: s_mov_b32 s4, 0
509 ; GFX6-NEXT: s_mov_b32 s5, 4
510 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
511 ; GFX6-NEXT: s_mov_b32 s0, s2
512 ; GFX6-NEXT: s_mov_b32 s1, s3
513 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
514 ; GFX6-NEXT: s_mov_b32 s2, s4
515 ; GFX6-NEXT: v_mov_b32_e32 v1, s5
516 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
517 ; GFX6-NEXT: s_waitcnt vmcnt(0)
518 ; GFX6-NEXT: ; return to shader part epilog
520 ; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
522 ; GFX7-NEXT: s_mov_b32 s4, 0
523 ; GFX7-NEXT: s_mov_b32 s5, 4
524 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
525 ; GFX7-NEXT: s_mov_b32 s0, s2
526 ; GFX7-NEXT: s_mov_b32 s1, s3
527 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
528 ; GFX7-NEXT: s_mov_b32 s2, s4
529 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
530 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
531 ; GFX7-NEXT: s_waitcnt vmcnt(0)
532 ; GFX7-NEXT: ; return to shader part epilog
533 %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4294967296
534 %val = load volatile float, ptr addrspace(1) %gep
538 define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(ptr addrspace(1) inreg %ptr) {
539 ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
541 ; GFX6-NEXT: s_mov_b32 s4, 4
542 ; GFX6-NEXT: s_mov_b32 s5, s4
543 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
544 ; GFX6-NEXT: s_mov_b32 s0, s2
545 ; GFX6-NEXT: s_mov_b32 s1, s3
546 ; GFX6-NEXT: s_mov_b32 s2, 0
547 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
548 ; GFX6-NEXT: v_mov_b32_e32 v1, s5
549 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
550 ; GFX6-NEXT: s_waitcnt vmcnt(0)
551 ; GFX6-NEXT: ; return to shader part epilog
553 ; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
555 ; GFX7-NEXT: s_mov_b32 s4, 4
556 ; GFX7-NEXT: s_mov_b32 s5, s4
557 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
558 ; GFX7-NEXT: s_mov_b32 s0, s2
559 ; GFX7-NEXT: s_mov_b32 s1, s3
560 ; GFX7-NEXT: s_mov_b32 s2, 0
561 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
562 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
563 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
564 ; GFX7-NEXT: s_waitcnt vmcnt(0)
565 ; GFX7-NEXT: ; return to shader part epilog
566 %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4294967297
567 %val = load volatile float, ptr addrspace(1) %gep
571 define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(ptr addrspace(1) inreg %ptr) {
572 ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4096:
574 ; GFX6-NEXT: s_mov_b32 s0, s2
575 ; GFX6-NEXT: s_mov_b32 s1, s3
576 ; GFX6-NEXT: s_mov_b32 s2, -1
577 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
578 ; GFX6-NEXT: s_movk_i32 s4, 0x4000
579 ; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], s4 glc
580 ; GFX6-NEXT: s_waitcnt vmcnt(0)
581 ; GFX6-NEXT: ; return to shader part epilog
583 ; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4096:
585 ; GFX7-NEXT: s_mov_b32 s0, s2
586 ; GFX7-NEXT: s_mov_b32 s1, s3
587 ; GFX7-NEXT: s_mov_b32 s2, -1
588 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
589 ; GFX7-NEXT: s_movk_i32 s4, 0x4000
590 ; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s4 glc
591 ; GFX7-NEXT: s_waitcnt vmcnt(0)
592 ; GFX7-NEXT: ; return to shader part epilog
593 %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4096
594 %val = load volatile float, ptr addrspace(1) %gep
598 define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(ptr addrspace(1) %ptr) {
599 ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4095:
601 ; GFX6-NEXT: s_mov_b32 s2, 0
602 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
603 ; GFX6-NEXT: s_mov_b64 s[0:1], 0
604 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
605 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
606 ; GFX6-NEXT: s_waitcnt vmcnt(0)
607 ; GFX6-NEXT: ; return to shader part epilog
609 ; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4095:
611 ; GFX7-NEXT: s_mov_b32 s2, 0
612 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
613 ; GFX7-NEXT: s_mov_b64 s[0:1], 0
614 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
615 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
616 ; GFX7-NEXT: s_waitcnt vmcnt(0)
617 ; GFX7-NEXT: ; return to shader part epilog
618 %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4095
619 %val = load volatile float, ptr addrspace(1) %gep
623 define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(ptr addrspace(1) %ptr) {
624 ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967296:
626 ; GFX6-NEXT: s_mov_b32 s0, 0
627 ; GFX6-NEXT: s_mov_b32 s1, 4
628 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
629 ; GFX6-NEXT: s_mov_b32 s2, s0
630 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
631 ; GFX6-NEXT: s_waitcnt vmcnt(0)
632 ; GFX6-NEXT: ; return to shader part epilog
634 ; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967296:
636 ; GFX7-NEXT: s_mov_b32 s0, 0
637 ; GFX7-NEXT: s_mov_b32 s1, 4
638 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
639 ; GFX7-NEXT: s_mov_b32 s2, s0
640 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
641 ; GFX7-NEXT: s_waitcnt vmcnt(0)
642 ; GFX7-NEXT: ; return to shader part epilog
643 %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4294967296
644 %val = load volatile float, ptr addrspace(1) %gep
648 define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(ptr addrspace(1) %ptr) {
649 ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
651 ; GFX6-NEXT: s_mov_b32 s0, 4
652 ; GFX6-NEXT: s_mov_b32 s1, s0
653 ; GFX6-NEXT: s_mov_b32 s2, 0
654 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
655 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
656 ; GFX6-NEXT: s_waitcnt vmcnt(0)
657 ; GFX6-NEXT: ; return to shader part epilog
659 ; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
661 ; GFX7-NEXT: s_mov_b32 s0, 4
662 ; GFX7-NEXT: s_mov_b32 s1, s0
663 ; GFX7-NEXT: s_mov_b32 s2, 0
664 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
665 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
666 ; GFX7-NEXT: s_waitcnt vmcnt(0)
667 ; GFX7-NEXT: ; return to shader part epilog
668 %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4294967297
669 %val = load volatile float, ptr addrspace(1) %gep
673 define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(ptr addrspace(1) %ptr) {
674 ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4096:
676 ; GFX6-NEXT: s_mov_b32 s2, 0
677 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
678 ; GFX6-NEXT: s_mov_b64 s[0:1], 0
679 ; GFX6-NEXT: s_movk_i32 s4, 0x4000
680 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
681 ; GFX6-NEXT: s_waitcnt vmcnt(0)
682 ; GFX6-NEXT: ; return to shader part epilog
684 ; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4096:
686 ; GFX7-NEXT: s_mov_b32 s2, 0
687 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
688 ; GFX7-NEXT: s_mov_b64 s[0:1], 0
689 ; GFX7-NEXT: s_movk_i32 s4, 0x4000
690 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
691 ; GFX7-NEXT: s_waitcnt vmcnt(0)
692 ; GFX7-NEXT: ; return to shader part epilog
693 %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4096
694 %val = load volatile float, ptr addrspace(1) %gep
698 define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(ptr addrspace(1) inreg %ptr, i32 inreg %soffset) {
699 ; GFX6-LABEL: mubuf_load_sgpr_ptr_sgpr_offset:
701 ; GFX6-NEXT: s_ashr_i32 s5, s4, 31
702 ; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
703 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
704 ; GFX6-NEXT: s_mov_b32 s0, s2
705 ; GFX6-NEXT: s_mov_b32 s1, s3
706 ; GFX6-NEXT: s_mov_b32 s2, 0
707 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
708 ; GFX6-NEXT: v_mov_b32_e32 v1, s5
709 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
710 ; GFX6-NEXT: s_waitcnt vmcnt(0)
711 ; GFX6-NEXT: ; return to shader part epilog
713 ; GFX7-LABEL: mubuf_load_sgpr_ptr_sgpr_offset:
715 ; GFX7-NEXT: s_ashr_i32 s5, s4, 31
716 ; GFX7-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
717 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
718 ; GFX7-NEXT: s_mov_b32 s0, s2
719 ; GFX7-NEXT: s_mov_b32 s1, s3
720 ; GFX7-NEXT: s_mov_b32 s2, 0
721 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
722 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
723 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
724 ; GFX7-NEXT: s_waitcnt vmcnt(0)
725 ; GFX7-NEXT: ; return to shader part epilog
726 %gep = getelementptr float, ptr addrspace(1) %ptr, i32 %soffset
727 %val = load volatile float, ptr addrspace(1) %gep
731 define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(ptr addrspace(1) %ptr, i32 inreg %soffset) {
732 ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset:
734 ; GFX6-NEXT: s_ashr_i32 s3, s2, 31
735 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
736 ; GFX6-NEXT: s_mov_b32 s2, 0
737 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
738 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
739 ; GFX6-NEXT: s_waitcnt vmcnt(0)
740 ; GFX6-NEXT: ; return to shader part epilog
742 ; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset:
744 ; GFX7-NEXT: s_ashr_i32 s3, s2, 31
745 ; GFX7-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
746 ; GFX7-NEXT: s_mov_b32 s2, 0
747 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
748 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
749 ; GFX7-NEXT: s_waitcnt vmcnt(0)
750 ; GFX7-NEXT: ; return to shader part epilog
751 %gep = getelementptr float, ptr addrspace(1) %ptr, i32 %soffset
752 %val = load volatile float, ptr addrspace(1) %gep
756 define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(ptr addrspace(1) %ptr, i32 inreg %soffset) {
757 ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256:
759 ; GFX6-NEXT: s_ashr_i32 s3, s2, 31
760 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
761 ; GFX6-NEXT: s_mov_b32 s2, 0
762 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
763 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
764 ; GFX6-NEXT: s_waitcnt vmcnt(0)
765 ; GFX6-NEXT: ; return to shader part epilog
767 ; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256:
769 ; GFX7-NEXT: s_ashr_i32 s3, s2, 31
770 ; GFX7-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
771 ; GFX7-NEXT: s_mov_b32 s2, 0
772 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
773 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
774 ; GFX7-NEXT: s_waitcnt vmcnt(0)
775 ; GFX7-NEXT: ; return to shader part epilog
776 %gep0 = getelementptr float, ptr addrspace(1) %ptr, i32 %soffset
777 %gep1 = getelementptr float, ptr addrspace(1) %gep0, i32 256
778 %val = load volatile float, ptr addrspace(1) %gep1
782 define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(ptr addrspace(1) %ptr, i32 inreg %soffset) {
783 ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset:
785 ; GFX6-NEXT: s_ashr_i32 s3, s2, 31
786 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
787 ; GFX6-NEXT: s_mov_b32 s2, 0
788 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
789 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
790 ; GFX6-NEXT: s_waitcnt vmcnt(0)
791 ; GFX6-NEXT: ; return to shader part epilog
793 ; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset:
795 ; GFX7-NEXT: s_ashr_i32 s3, s2, 31
796 ; GFX7-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
797 ; GFX7-NEXT: s_mov_b32 s2, 0
798 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
799 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
800 ; GFX7-NEXT: s_waitcnt vmcnt(0)
801 ; GFX7-NEXT: ; return to shader part epilog
802 %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 256
803 %gep1 = getelementptr float, ptr addrspace(1) %gep0, i32 %soffset
804 %val = load volatile float, ptr addrspace(1) %gep1
808 define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
809 ; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset:
811 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
812 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
813 ; GFX6-NEXT: s_mov_b32 s0, s2
814 ; GFX6-NEXT: s_mov_b32 s1, s3
815 ; GFX6-NEXT: s_mov_b32 s2, 0
816 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
817 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
818 ; GFX6-NEXT: s_waitcnt vmcnt(0)
819 ; GFX6-NEXT: ; return to shader part epilog
821 ; GFX7-LABEL: mubuf_load_sgpr_ptr_vgpr_offset:
823 ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
824 ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
825 ; GFX7-NEXT: s_mov_b32 s0, s2
826 ; GFX7-NEXT: s_mov_b32 s1, s3
827 ; GFX7-NEXT: s_mov_b32 s2, 0
828 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
829 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
830 ; GFX7-NEXT: s_waitcnt vmcnt(0)
831 ; GFX7-NEXT: ; return to shader part epilog
832 %gep = getelementptr float, ptr addrspace(1) %ptr, i32 %voffset
833 %val = load volatile float, ptr addrspace(1) %gep
837 define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(ptr addrspace(1) inreg %ptr, i32 %voffset) {
838 ; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095:
840 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
841 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
842 ; GFX6-NEXT: s_mov_b32 s0, s2
843 ; GFX6-NEXT: s_mov_b32 s1, s3
844 ; GFX6-NEXT: s_mov_b32 s2, 0
845 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
846 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
847 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
848 ; GFX6-NEXT: s_waitcnt vmcnt(0)
849 ; GFX6-NEXT: ; return to shader part epilog
851 ; GFX7-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095:
853 ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
854 ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
855 ; GFX7-NEXT: s_mov_b32 s0, s2
856 ; GFX7-NEXT: s_mov_b32 s1, s3
857 ; GFX7-NEXT: s_mov_b32 s2, 0
858 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
859 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
860 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
861 ; GFX7-NEXT: s_waitcnt vmcnt(0)
862 ; GFX7-NEXT: ; return to shader part epilog
863 %gep0 = getelementptr float, ptr addrspace(1) %ptr, i32 %voffset
864 %gep1 = getelementptr float, ptr addrspace(1) %gep0, i64 4095
865 %val = load volatile float, ptr addrspace(1) %gep1
868 define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
869 ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset:
871 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
872 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
873 ; GFX6-NEXT: s_mov_b32 s0, s2
874 ; GFX6-NEXT: s_mov_b32 s1, s3
875 ; GFX6-NEXT: s_mov_b32 s2, 0
876 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
877 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
878 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
879 ; GFX6-NEXT: s_waitcnt vmcnt(0)
880 ; GFX6-NEXT: ; return to shader part epilog
882 ; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset:
884 ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
885 ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
886 ; GFX7-NEXT: s_mov_b32 s0, s2
887 ; GFX7-NEXT: s_mov_b32 s1, s3
888 ; GFX7-NEXT: s_mov_b32 s2, 0
889 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
890 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
891 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
892 ; GFX7-NEXT: s_waitcnt vmcnt(0)
893 ; GFX7-NEXT: ; return to shader part epilog
894 %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 4095
895 %gep1 = getelementptr float, ptr addrspace(1) %gep0, i32 %voffset
896 %val = load volatile float, ptr addrspace(1) %gep1
900 define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(ptr addrspace(1) inreg %ptr) {
901 ; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095:
903 ; GFX6-NEXT: s_mov_b32 s0, s2
904 ; GFX6-NEXT: s_mov_b32 s1, s3
905 ; GFX6-NEXT: v_mov_b32_e32 v0, 2
906 ; GFX6-NEXT: s_mov_b32 s2, -1
907 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
908 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
909 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
910 ; GFX6-NEXT: buffer_atomic_add v0, off, s[0:3], s4 glc
911 ; GFX6-NEXT: s_waitcnt vmcnt(0)
912 ; GFX6-NEXT: buffer_wbinvl1
913 ; GFX6-NEXT: s_waitcnt expcnt(0)
914 ; GFX6-NEXT: ; return to shader part epilog
916 ; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095:
918 ; GFX7-NEXT: s_mov_b32 s0, s2
919 ; GFX7-NEXT: s_mov_b32 s1, s3
920 ; GFX7-NEXT: v_mov_b32_e32 v0, 2
921 ; GFX7-NEXT: s_mov_b32 s2, -1
922 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
923 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
924 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
925 ; GFX7-NEXT: buffer_atomic_add v0, off, s[0:3], s4 glc
926 ; GFX7-NEXT: s_waitcnt vmcnt(0)
927 ; GFX7-NEXT: buffer_wbinvl1
928 ; GFX7-NEXT: ; return to shader part epilog
929 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
930 %result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
931 %cast = bitcast i32 %result to float
935 define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
936 ; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
938 ; GFX6-NEXT: s_mov_b32 s4, 0
939 ; GFX6-NEXT: s_mov_b32 s5, 4
940 ; GFX6-NEXT: v_mov_b32_e32 v1, s4
941 ; GFX6-NEXT: s_mov_b32 s0, s2
942 ; GFX6-NEXT: s_mov_b32 s1, s3
943 ; GFX6-NEXT: v_mov_b32_e32 v0, 2
944 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
945 ; GFX6-NEXT: s_mov_b32 s2, s4
946 ; GFX6-NEXT: v_mov_b32_e32 v2, s5
947 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
948 ; GFX6-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
949 ; GFX6-NEXT: s_waitcnt vmcnt(0)
950 ; GFX6-NEXT: buffer_wbinvl1
951 ; GFX6-NEXT: s_waitcnt expcnt(0)
952 ; GFX6-NEXT: ; return to shader part epilog
954 ; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
956 ; GFX7-NEXT: s_mov_b32 s4, 0
957 ; GFX7-NEXT: s_mov_b32 s5, 4
958 ; GFX7-NEXT: v_mov_b32_e32 v1, s4
959 ; GFX7-NEXT: s_mov_b32 s0, s2
960 ; GFX7-NEXT: s_mov_b32 s1, s3
961 ; GFX7-NEXT: v_mov_b32_e32 v0, 2
962 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
963 ; GFX7-NEXT: s_mov_b32 s2, s4
964 ; GFX7-NEXT: v_mov_b32_e32 v2, s5
965 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
966 ; GFX7-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
967 ; GFX7-NEXT: s_waitcnt vmcnt(0)
968 ; GFX7-NEXT: buffer_wbinvl1
969 ; GFX7-NEXT: ; return to shader part epilog
970 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
971 %result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
972 %cast = bitcast i32 %result to float
976 define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(ptr addrspace(1) %ptr) {
977 ; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095:
979 ; GFX6-NEXT: v_mov_b32_e32 v2, 2
980 ; GFX6-NEXT: s_mov_b32 s2, 0
981 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
982 ; GFX6-NEXT: s_mov_b64 s[0:1], 0
983 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
984 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
985 ; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc
986 ; GFX6-NEXT: s_waitcnt vmcnt(0)
987 ; GFX6-NEXT: buffer_wbinvl1
988 ; GFX6-NEXT: v_mov_b32_e32 v0, v2
989 ; GFX6-NEXT: s_waitcnt expcnt(0)
990 ; GFX6-NEXT: ; return to shader part epilog
992 ; GFX7-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095:
994 ; GFX7-NEXT: v_mov_b32_e32 v2, 2
995 ; GFX7-NEXT: s_mov_b32 s2, 0
996 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
997 ; GFX7-NEXT: s_mov_b64 s[0:1], 0
998 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
999 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1000 ; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc
1001 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1002 ; GFX7-NEXT: buffer_wbinvl1
1003 ; GFX7-NEXT: v_mov_b32_e32 v0, v2
1004 ; GFX7-NEXT: ; return to shader part epilog
1005 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
1006 %result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
1007 %cast = bitcast i32 %result to float
1011 define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(1) %ptr) {
1012 ; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296:
1014 ; GFX6-NEXT: s_mov_b32 s0, 0
1015 ; GFX6-NEXT: s_mov_b32 s1, 4
1016 ; GFX6-NEXT: v_mov_b32_e32 v2, 2
1017 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1018 ; GFX6-NEXT: s_mov_b32 s2, s0
1019 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1020 ; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
1021 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1022 ; GFX6-NEXT: buffer_wbinvl1
1023 ; GFX6-NEXT: v_mov_b32_e32 v0, v2
1024 ; GFX6-NEXT: s_waitcnt expcnt(0)
1025 ; GFX6-NEXT: ; return to shader part epilog
1027 ; GFX7-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296:
1029 ; GFX7-NEXT: s_mov_b32 s0, 0
1030 ; GFX7-NEXT: s_mov_b32 s1, 4
1031 ; GFX7-NEXT: v_mov_b32_e32 v2, 2
1032 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1033 ; GFX7-NEXT: s_mov_b32 s2, s0
1034 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1035 ; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
1036 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1037 ; GFX7-NEXT: buffer_wbinvl1
1038 ; GFX7-NEXT: v_mov_b32_e32 v0, v2
1039 ; GFX7-NEXT: ; return to shader part epilog
1040 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
1041 %result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
1042 %cast = bitcast i32 %result to float
1046 define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
1047 ; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset:
1049 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1050 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
1051 ; GFX6-NEXT: s_mov_b32 s0, s2
1052 ; GFX6-NEXT: s_mov_b32 s1, s3
1053 ; GFX6-NEXT: v_mov_b32_e32 v2, 2
1054 ; GFX6-NEXT: s_mov_b32 s2, 0
1055 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1056 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1057 ; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
1058 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1059 ; GFX6-NEXT: buffer_wbinvl1
1060 ; GFX6-NEXT: v_mov_b32_e32 v0, v2
1061 ; GFX6-NEXT: s_waitcnt expcnt(0)
1062 ; GFX6-NEXT: ; return to shader part epilog
1064 ; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset:
1066 ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1067 ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
1068 ; GFX7-NEXT: s_mov_b32 s0, s2
1069 ; GFX7-NEXT: s_mov_b32 s1, s3
1070 ; GFX7-NEXT: v_mov_b32_e32 v2, 2
1071 ; GFX7-NEXT: s_mov_b32 s2, 0
1072 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1073 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1074 ; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
1075 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1076 ; GFX7-NEXT: buffer_wbinvl1
1077 ; GFX7-NEXT: v_mov_b32_e32 v0, v2
1078 ; GFX7-NEXT: ; return to shader part epilog
1079 %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset
1080 %result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
1081 %cast = bitcast i32 %result to float
1085 define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(ptr addrspace(1) inreg %ptr, i32 %old, i32 %in) {
1086 ; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4095:
1088 ; GFX6-NEXT: s_mov_b32 s0, s2
1089 ; GFX6-NEXT: s_mov_b32 s1, s3
1090 ; GFX6-NEXT: v_mov_b32_e32 v2, v0
1091 ; GFX6-NEXT: s_mov_b32 s2, -1
1092 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1093 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
1094 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1095 ; GFX6-NEXT: buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc
1096 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1097 ; GFX6-NEXT: buffer_wbinvl1
1098 ; GFX6-NEXT: v_mov_b32_e32 v0, v1
1099 ; GFX6-NEXT: s_waitcnt expcnt(0)
1100 ; GFX6-NEXT: ; return to shader part epilog
1102 ; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4095:
1104 ; GFX7-NEXT: s_mov_b32 s0, s2
1105 ; GFX7-NEXT: s_mov_b32 s1, s3
1106 ; GFX7-NEXT: v_mov_b32_e32 v2, v0
1107 ; GFX7-NEXT: s_mov_b32 s2, -1
1108 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1109 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
1110 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1111 ; GFX7-NEXT: buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc
1112 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1113 ; GFX7-NEXT: buffer_wbinvl1
1114 ; GFX7-NEXT: v_mov_b32_e32 v0, v1
1115 ; GFX7-NEXT: ; return to shader part epilog
1116 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
1117 %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
1118 %result = extractvalue { i32, i1 } %result.struct, 0
1119 %cast = bitcast i32 %result to float
1123 define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr, i32 %old, i32 %in) {
1124 ; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296:
1126 ; GFX6-NEXT: s_mov_b32 s4, 0
1127 ; GFX6-NEXT: s_mov_b32 s5, 4
1128 ; GFX6-NEXT: v_mov_b32_e32 v3, s4
1129 ; GFX6-NEXT: s_mov_b32 s0, s2
1130 ; GFX6-NEXT: s_mov_b32 s1, s3
1131 ; GFX6-NEXT: v_mov_b32_e32 v2, v0
1132 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1133 ; GFX6-NEXT: s_mov_b32 s2, s4
1134 ; GFX6-NEXT: v_mov_b32_e32 v4, s5
1135 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1136 ; GFX6-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
1137 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1138 ; GFX6-NEXT: buffer_wbinvl1
1139 ; GFX6-NEXT: v_mov_b32_e32 v0, v1
1140 ; GFX6-NEXT: s_waitcnt expcnt(0)
1141 ; GFX6-NEXT: ; return to shader part epilog
1143 ; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296:
1145 ; GFX7-NEXT: s_mov_b32 s4, 0
1146 ; GFX7-NEXT: s_mov_b32 s5, 4
1147 ; GFX7-NEXT: v_mov_b32_e32 v3, s4
1148 ; GFX7-NEXT: s_mov_b32 s0, s2
1149 ; GFX7-NEXT: s_mov_b32 s1, s3
1150 ; GFX7-NEXT: v_mov_b32_e32 v2, v0
1151 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1152 ; GFX7-NEXT: s_mov_b32 s2, s4
1153 ; GFX7-NEXT: v_mov_b32_e32 v4, s5
1154 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1155 ; GFX7-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
1156 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1157 ; GFX7-NEXT: buffer_wbinvl1
1158 ; GFX7-NEXT: v_mov_b32_e32 v0, v1
1159 ; GFX7-NEXT: ; return to shader part epilog
1160 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
1161 %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
1162 %result = extractvalue { i32, i1 } %result.struct, 0
1163 %cast = bitcast i32 %result to float
1167 define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(ptr addrspace(1) %ptr, i32 %old, i32 %in) {
1168 ; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4095:
1170 ; GFX6-NEXT: v_mov_b32_e32 v4, v2
1171 ; GFX6-NEXT: s_mov_b32 s2, 0
1172 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1173 ; GFX6-NEXT: s_mov_b64 s[0:1], 0
1174 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
1175 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1176 ; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc
1177 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1178 ; GFX6-NEXT: buffer_wbinvl1
1179 ; GFX6-NEXT: v_mov_b32_e32 v0, v3
1180 ; GFX6-NEXT: s_waitcnt expcnt(0)
1181 ; GFX6-NEXT: ; return to shader part epilog
1183 ; GFX7-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4095:
1185 ; GFX7-NEXT: v_mov_b32_e32 v4, v2
1186 ; GFX7-NEXT: s_mov_b32 s2, 0
1187 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1188 ; GFX7-NEXT: s_mov_b64 s[0:1], 0
1189 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
1190 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1191 ; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc
1192 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1193 ; GFX7-NEXT: buffer_wbinvl1
1194 ; GFX7-NEXT: v_mov_b32_e32 v0, v3
1195 ; GFX7-NEXT: ; return to shader part epilog
1196 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
1197 %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
1198 %result = extractvalue { i32, i1 } %result.struct, 0
1199 %cast = bitcast i32 %result to float
1203 define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1) %ptr, i32 %old, i32 %in) {
1204 ; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296:
1206 ; GFX6-NEXT: s_mov_b32 s0, 0
1207 ; GFX6-NEXT: v_mov_b32_e32 v4, v2
1208 ; GFX6-NEXT: s_mov_b32 s1, 4
1209 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1210 ; GFX6-NEXT: s_mov_b32 s2, s0
1211 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1212 ; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
1213 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1214 ; GFX6-NEXT: buffer_wbinvl1
1215 ; GFX6-NEXT: v_mov_b32_e32 v0, v3
1216 ; GFX6-NEXT: s_waitcnt expcnt(0)
1217 ; GFX6-NEXT: ; return to shader part epilog
1219 ; GFX7-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296:
1221 ; GFX7-NEXT: s_mov_b32 s0, 0
1222 ; GFX7-NEXT: v_mov_b32_e32 v4, v2
1223 ; GFX7-NEXT: s_mov_b32 s1, 4
1224 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1225 ; GFX7-NEXT: s_mov_b32 s2, s0
1226 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1227 ; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
1228 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1229 ; GFX7-NEXT: buffer_wbinvl1
1230 ; GFX7-NEXT: v_mov_b32_e32 v0, v3
1231 ; GFX7-NEXT: ; return to shader part epilog
1232 %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
1233 %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
1234 %result = extractvalue { i32, i1 } %result.struct, 0
1235 %cast = bitcast i32 %result to float
1239 define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset, i32 %old, i32 %in) {
1240 ; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_vgpr_offset:
1242 ; GFX6-NEXT: v_mov_b32_e32 v3, v1
1243 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1244 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
1245 ; GFX6-NEXT: s_mov_b32 s0, s2
1246 ; GFX6-NEXT: s_mov_b32 s1, s3
1247 ; GFX6-NEXT: s_mov_b32 s2, 0
1248 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1249 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1250 ; GFX6-NEXT: buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc
1251 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1252 ; GFX6-NEXT: buffer_wbinvl1
1253 ; GFX6-NEXT: v_mov_b32_e32 v0, v2
1254 ; GFX6-NEXT: s_waitcnt expcnt(0)
1255 ; GFX6-NEXT: ; return to shader part epilog
1257 ; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_vgpr_offset:
1259 ; GFX7-NEXT: v_mov_b32_e32 v3, v1
1260 ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1261 ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
1262 ; GFX7-NEXT: s_mov_b32 s0, s2
1263 ; GFX7-NEXT: s_mov_b32 s1, s3
1264 ; GFX7-NEXT: s_mov_b32 s2, 0
1265 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1266 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1267 ; GFX7-NEXT: buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc
1268 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1269 ; GFX7-NEXT: buffer_wbinvl1
1270 ; GFX7-NEXT: v_mov_b32_e32 v0, v2
1271 ; GFX7-NEXT: ; return to shader part epilog
1272 %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset
1273 %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
1274 %result = extractvalue { i32, i1 } %result.struct, 0
1275 %cast = bitcast i32 %result to float