1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX11 %s
6 ; Test using saddr addressing mode of global_*store_* flat instructions.
8 define amdgpu_ps void @global_store_saddr_i8_zext_vgpr(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, i8 %data) {
9 ; GCN-LABEL: global_store_saddr_i8_zext_vgpr:
11 ; GCN-NEXT: global_load_dword v0, v[0:1], off
12 ; GCN-NEXT: s_waitcnt vmcnt(0)
13 ; GCN-NEXT: global_store_byte v0, v2, s[2:3]
16 ; GFX11-LABEL: global_store_saddr_i8_zext_vgpr:
18 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
19 ; GFX11-NEXT: s_waitcnt vmcnt(0)
20 ; GFX11-NEXT: global_store_b8 v0, v2, s[2:3]
22 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
23 ; GFX11-NEXT: s_endpgm
24 %voffset = load i32, ptr addrspace(1) %voffset.ptr
25 %zext.offset = zext i32 %voffset to i64
26 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
27 store i8 %data, ptr addrspace(1) %gep0
31 ; Maximum positive offset on gfx10
32 define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_2047(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, i8 %data) {
33 ; GCN-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
35 ; GCN-NEXT: global_load_dword v0, v[0:1], off
36 ; GCN-NEXT: s_waitcnt vmcnt(0)
37 ; GCN-NEXT: global_store_byte v0, v2, s[2:3] offset:2047
40 ; GFX11-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
42 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
43 ; GFX11-NEXT: s_waitcnt vmcnt(0)
44 ; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] offset:2047
46 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
47 ; GFX11-NEXT: s_endpgm
48 %voffset = load i32, ptr addrspace(1) %voffset.ptr
49 %zext.offset = zext i32 %voffset to i64
50 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
51 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
52 store i8 %data, ptr addrspace(1) %gep1
56 ; Maximum negative offset on gfx10
57 define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, i8 %data) {
58 ; GCN-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
60 ; GCN-NEXT: global_load_dword v0, v[0:1], off
61 ; GCN-NEXT: s_waitcnt vmcnt(0)
62 ; GCN-NEXT: global_store_byte v0, v2, s[2:3] offset:-2048
65 ; GFX11-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
67 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
68 ; GFX11-NEXT: s_waitcnt vmcnt(0)
69 ; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] offset:-2048
71 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
72 ; GFX11-NEXT: s_endpgm
73 %voffset = load i32, ptr addrspace(1) %voffset.ptr
74 %zext.offset = zext i32 %voffset to i64
75 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
76 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
77 store i8 %data, ptr addrspace(1) %gep1
81 ; --------------------------------------------------------------------------------
82 ; Uniformity edge cases
83 ; --------------------------------------------------------------------------------
85 @ptr.in.lds = internal addrspace(3) global ptr addrspace(1) undef
87 ; Base pointer is uniform, but also in VGPRs
88 define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, i8 %data) {
89 ; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
91 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
92 ; GFX9-NEXT: ds_read_b64 v[2:3], v2
93 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
94 ; GFX9-NEXT: v_readfirstlane_b32 s0, v2
95 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3
97 ; GFX9-NEXT: global_store_byte v0, v1, s[0:1]
100 ; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
102 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
103 ; GFX10-NEXT: ds_read_b64 v[2:3], v2
104 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
105 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
106 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
107 ; GFX10-NEXT: global_store_byte v0, v1, s[0:1]
108 ; GFX10-NEXT: s_endpgm
110 ; GFX11-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
112 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
113 ; GFX11-NEXT: ds_load_b64 v[2:3], v2
114 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
115 ; GFX11-NEXT: v_readfirstlane_b32 s0, v2
116 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3
117 ; GFX11-NEXT: global_store_b8 v0, v1, s[0:1]
118 ; GFX11-NEXT: s_nop 0
119 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
120 ; GFX11-NEXT: s_endpgm
121 %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
122 %zext.offset = zext i32 %voffset to i64
123 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
124 store i8 %data, ptr addrspace(1) %gep0
128 ; Base pointer is uniform, but also in VGPRs, with imm offset
129 define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset, i8 %data) {
130 ; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
132 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
133 ; GFX9-NEXT: ds_read_b64 v[2:3], v2
134 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
135 ; GFX9-NEXT: v_readfirstlane_b32 s0, v2
136 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3
138 ; GFX9-NEXT: global_store_byte v0, v1, s[0:1] offset:-120
139 ; GFX9-NEXT: s_endpgm
141 ; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
143 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
144 ; GFX10-NEXT: ds_read_b64 v[2:3], v2
145 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
146 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
147 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
148 ; GFX10-NEXT: global_store_byte v0, v1, s[0:1] offset:-120
149 ; GFX10-NEXT: s_endpgm
151 ; GFX11-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
153 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
154 ; GFX11-NEXT: ds_load_b64 v[2:3], v2
155 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
156 ; GFX11-NEXT: v_readfirstlane_b32 s0, v2
157 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3
158 ; GFX11-NEXT: global_store_b8 v0, v1, s[0:1] offset:-120
159 ; GFX11-NEXT: s_nop 0
160 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
161 ; GFX11-NEXT: s_endpgm
162 %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
163 %zext.offset = zext i32 %voffset to i64
164 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
165 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -120
166 store i8 %data, ptr addrspace(1) %gep1
170 ; --------------------------------------------------------------------------------
171 ; Stress various type stores
172 ; --------------------------------------------------------------------------------
174 define amdgpu_ps void @global_store_saddr_i16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i16 %data) {
175 ; GCN-LABEL: global_store_saddr_i16_zext_vgpr:
177 ; GCN-NEXT: global_store_short v0, v1, s[2:3]
180 ; GFX11-LABEL: global_store_saddr_i16_zext_vgpr:
182 ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3]
183 ; GFX11-NEXT: s_nop 0
184 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
185 ; GFX11-NEXT: s_endpgm
186 %zext.offset = zext i32 %voffset to i64
187 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
188 store i16 %data, ptr addrspace(1) %gep0
192 define amdgpu_ps void @global_store_saddr_i16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i16 %data) {
193 ; GCN-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128:
195 ; GCN-NEXT: global_store_short v0, v1, s[2:3] offset:-128
198 ; GFX11-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128:
200 ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128
201 ; GFX11-NEXT: s_nop 0
202 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
203 ; GFX11-NEXT: s_endpgm
204 %zext.offset = zext i32 %voffset to i64
205 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
206 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
207 store i16 %data, ptr addrspace(1) %gep1
211 define amdgpu_ps void @global_store_saddr_f16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, half %data) {
212 ; GCN-LABEL: global_store_saddr_f16_zext_vgpr:
214 ; GCN-NEXT: global_store_short v0, v1, s[2:3]
217 ; GFX11-LABEL: global_store_saddr_f16_zext_vgpr:
219 ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3]
220 ; GFX11-NEXT: s_nop 0
221 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
222 ; GFX11-NEXT: s_endpgm
223 %zext.offset = zext i32 %voffset to i64
224 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
225 store half %data, ptr addrspace(1) %gep0
229 define amdgpu_ps void @global_store_saddr_f16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, half %data) {
230 ; GCN-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128:
232 ; GCN-NEXT: global_store_short v0, v1, s[2:3] offset:-128
235 ; GFX11-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128:
237 ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128
238 ; GFX11-NEXT: s_nop 0
239 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
240 ; GFX11-NEXT: s_endpgm
241 %zext.offset = zext i32 %voffset to i64
242 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
243 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
244 store half %data, ptr addrspace(1) %gep1
248 define amdgpu_ps void @global_store_saddr_i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
249 ; GCN-LABEL: global_store_saddr_i32_zext_vgpr:
251 ; GCN-NEXT: global_store_dword v0, v1, s[2:3]
254 ; GFX11-LABEL: global_store_saddr_i32_zext_vgpr:
256 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
257 ; GFX11-NEXT: s_nop 0
258 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
259 ; GFX11-NEXT: s_endpgm
260 %zext.offset = zext i32 %voffset to i64
261 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
262 store i32 %data, ptr addrspace(1) %gep0
266 define amdgpu_ps void @global_store_saddr_i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
267 ; GCN-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128:
269 ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
272 ; GFX11-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128:
274 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
275 ; GFX11-NEXT: s_nop 0
276 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
277 ; GFX11-NEXT: s_endpgm
278 %zext.offset = zext i32 %voffset to i64
279 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
280 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
281 store i32 %data, ptr addrspace(1) %gep1
285 define amdgpu_ps void @global_store_saddr_f32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, float %data) {
286 ; GCN-LABEL: global_store_saddr_f32_zext_vgpr:
288 ; GCN-NEXT: global_store_dword v0, v1, s[2:3]
291 ; GFX11-LABEL: global_store_saddr_f32_zext_vgpr:
293 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
294 ; GFX11-NEXT: s_nop 0
295 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
296 ; GFX11-NEXT: s_endpgm
297 %zext.offset = zext i32 %voffset to i64
298 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
299 store float %data, ptr addrspace(1) %gep0
303 define amdgpu_ps void @global_store_saddr_f32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, float %data) {
304 ; GCN-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128:
306 ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
309 ; GFX11-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128:
311 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
312 ; GFX11-NEXT: s_nop 0
313 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
314 ; GFX11-NEXT: s_endpgm
315 %zext.offset = zext i32 %voffset to i64
316 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
317 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
318 store float %data, ptr addrspace(1) %gep1
322 define amdgpu_ps void @global_store_saddr_p3_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, ptr addrspace(3) %data) {
323 ; GCN-LABEL: global_store_saddr_p3_zext_vgpr:
325 ; GCN-NEXT: global_store_dword v0, v1, s[2:3]
328 ; GFX11-LABEL: global_store_saddr_p3_zext_vgpr:
330 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
331 ; GFX11-NEXT: s_nop 0
332 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
333 ; GFX11-NEXT: s_endpgm
334 %zext.offset = zext i32 %voffset to i64
335 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
336 store ptr addrspace(3) %data, ptr addrspace(1) %gep0
340 define amdgpu_ps void @global_store_saddr_p3_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, ptr addrspace(3) %data) {
341 ; GCN-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128:
343 ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
346 ; GFX11-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128:
348 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
349 ; GFX11-NEXT: s_nop 0
350 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
351 ; GFX11-NEXT: s_endpgm
352 %zext.offset = zext i32 %voffset to i64
353 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
354 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
355 store ptr addrspace(3) %data, ptr addrspace(1) %gep1
359 define amdgpu_ps void @global_store_saddr_i64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
360 ; GCN-LABEL: global_store_saddr_i64_zext_vgpr:
362 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
365 ; GFX11-LABEL: global_store_saddr_i64_zext_vgpr:
367 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
368 ; GFX11-NEXT: s_nop 0
369 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
370 ; GFX11-NEXT: s_endpgm
371 %zext.offset = zext i32 %voffset to i64
372 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
373 store i64 %data, ptr addrspace(1) %gep0
377 define amdgpu_ps void @global_store_saddr_i64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
378 ; GCN-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128:
380 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
383 ; GFX11-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128:
385 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
386 ; GFX11-NEXT: s_nop 0
387 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
388 ; GFX11-NEXT: s_endpgm
389 %zext.offset = zext i32 %voffset to i64
390 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
391 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
392 store i64 %data, ptr addrspace(1) %gep1
396 define amdgpu_ps void @global_store_saddr_f64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, double %data) {
397 ; GCN-LABEL: global_store_saddr_f64_zext_vgpr:
399 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
402 ; GFX11-LABEL: global_store_saddr_f64_zext_vgpr:
404 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
405 ; GFX11-NEXT: s_nop 0
406 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
407 ; GFX11-NEXT: s_endpgm
408 %zext.offset = zext i32 %voffset to i64
409 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
410 store double %data, ptr addrspace(1) %gep0
414 define amdgpu_ps void @global_store_saddr_f64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, double %data) {
415 ; GCN-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128:
417 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
420 ; GFX11-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128:
422 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
423 ; GFX11-NEXT: s_nop 0
424 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
425 ; GFX11-NEXT: s_endpgm
426 %zext.offset = zext i32 %voffset to i64
427 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
428 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
429 store double %data, ptr addrspace(1) %gep1
433 define amdgpu_ps void @global_store_saddr_v2i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i32> %data) {
434 ; GCN-LABEL: global_store_saddr_v2i32_zext_vgpr:
436 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
439 ; GFX11-LABEL: global_store_saddr_v2i32_zext_vgpr:
441 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
442 ; GFX11-NEXT: s_nop 0
443 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
444 ; GFX11-NEXT: s_endpgm
445 %zext.offset = zext i32 %voffset to i64
446 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
447 store <2 x i32> %data, ptr addrspace(1) %gep0
451 define amdgpu_ps void @global_store_saddr_v2i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i32> %data) {
452 ; GCN-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128:
454 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
457 ; GFX11-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128:
459 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
460 ; GFX11-NEXT: s_nop 0
461 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
462 ; GFX11-NEXT: s_endpgm
463 %zext.offset = zext i32 %voffset to i64
464 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
465 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
466 store <2 x i32> %data, ptr addrspace(1) %gep1
470 define amdgpu_ps void @global_store_saddr_v2f32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x float> %data) {
471 ; GCN-LABEL: global_store_saddr_v2f32_zext_vgpr:
473 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
476 ; GFX11-LABEL: global_store_saddr_v2f32_zext_vgpr:
478 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
479 ; GFX11-NEXT: s_nop 0
480 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
481 ; GFX11-NEXT: s_endpgm
482 %zext.offset = zext i32 %voffset to i64
483 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
484 store <2 x float> %data, ptr addrspace(1) %gep0
488 define amdgpu_ps void @global_store_saddr_v2f32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x float> %data) {
489 ; GCN-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128:
491 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
494 ; GFX11-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128:
496 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
497 ; GFX11-NEXT: s_nop 0
498 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
499 ; GFX11-NEXT: s_endpgm
500 %zext.offset = zext i32 %voffset to i64
501 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
502 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
503 store <2 x float> %data, ptr addrspace(1) %gep1
507 define amdgpu_ps void @global_store_saddr_v4i16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i16> %data) {
508 ; GCN-LABEL: global_store_saddr_v4i16_zext_vgpr:
510 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
513 ; GFX11-LABEL: global_store_saddr_v4i16_zext_vgpr:
515 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
516 ; GFX11-NEXT: s_nop 0
517 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
518 ; GFX11-NEXT: s_endpgm
519 %zext.offset = zext i32 %voffset to i64
520 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
521 store <4 x i16> %data, ptr addrspace(1) %gep0
525 define amdgpu_ps void @global_store_saddr_v4i16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i16> %data) {
526 ; GCN-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128:
528 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
531 ; GFX11-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128:
533 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
534 ; GFX11-NEXT: s_nop 0
535 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
536 ; GFX11-NEXT: s_endpgm
537 %zext.offset = zext i32 %voffset to i64
538 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
539 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
540 store <4 x i16> %data, ptr addrspace(1) %gep1
544 define amdgpu_ps void @global_store_saddr_v4f16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x half> %data) {
545 ; GCN-LABEL: global_store_saddr_v4f16_zext_vgpr:
547 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
550 ; GFX11-LABEL: global_store_saddr_v4f16_zext_vgpr:
552 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
553 ; GFX11-NEXT: s_nop 0
554 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
555 ; GFX11-NEXT: s_endpgm
556 %zext.offset = zext i32 %voffset to i64
557 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
558 store <4 x half> %data, ptr addrspace(1) %gep0
562 define amdgpu_ps void @global_store_saddr_v4f16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x half> %data) {
563 ; GCN-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128:
565 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
568 ; GFX11-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128:
570 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
571 ; GFX11-NEXT: s_nop 0
572 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
573 ; GFX11-NEXT: s_endpgm
574 %zext.offset = zext i32 %voffset to i64
575 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
576 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
577 store <4 x half> %data, ptr addrspace(1) %gep1
581 define amdgpu_ps void @global_store_saddr_p1_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, ptr addrspace(1) %data) {
582 ; GCN-LABEL: global_store_saddr_p1_zext_vgpr:
584 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
587 ; GFX11-LABEL: global_store_saddr_p1_zext_vgpr:
589 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
590 ; GFX11-NEXT: s_nop 0
591 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
592 ; GFX11-NEXT: s_endpgm
593 %zext.offset = zext i32 %voffset to i64
594 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
595 store ptr addrspace(1) %data, ptr addrspace(1) %gep0
599 define amdgpu_ps void @global_store_saddr_p1_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, ptr addrspace(1) %data) {
600 ; GCN-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128:
602 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
605 ; GFX11-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128:
607 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
608 ; GFX11-NEXT: s_nop 0
609 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
610 ; GFX11-NEXT: s_endpgm
611 %zext.offset = zext i32 %voffset to i64
612 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
613 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
614 store ptr addrspace(1) %data, ptr addrspace(1) %gep1
618 define amdgpu_ps void @global_store_saddr_v3i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <3 x i32> %data) {
619 ; GCN-LABEL: global_store_saddr_v3i32_zext_vgpr:
621 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3]
624 ; GFX11-LABEL: global_store_saddr_v3i32_zext_vgpr:
626 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3]
627 ; GFX11-NEXT: s_nop 0
628 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
629 ; GFX11-NEXT: s_endpgm
630 %zext.offset = zext i32 %voffset to i64
631 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
632 store <3 x i32> %data, ptr addrspace(1) %gep0
636 define amdgpu_ps void @global_store_saddr_v3i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <3 x i32> %data) {
637 ; GCN-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128:
639 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
642 ; GFX11-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128:
644 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
645 ; GFX11-NEXT: s_nop 0
646 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
647 ; GFX11-NEXT: s_endpgm
648 %zext.offset = zext i32 %voffset to i64
649 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
650 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
651 store <3 x i32> %data, ptr addrspace(1) %gep1
655 define amdgpu_ps void @global_store_saddr_v3f32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <3 x float> %data) {
656 ; GCN-LABEL: global_store_saddr_v3f32_zext_vgpr:
658 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3]
661 ; GFX11-LABEL: global_store_saddr_v3f32_zext_vgpr:
663 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3]
664 ; GFX11-NEXT: s_nop 0
665 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
666 ; GFX11-NEXT: s_endpgm
667 %zext.offset = zext i32 %voffset to i64
668 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
669 store <3 x float> %data, ptr addrspace(1) %gep0
673 define amdgpu_ps void @global_store_saddr_v3f32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <3 x float> %data) {
674 ; GCN-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128:
676 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
679 ; GFX11-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128:
681 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
682 ; GFX11-NEXT: s_nop 0
683 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
684 ; GFX11-NEXT: s_endpgm
685 %zext.offset = zext i32 %voffset to i64
686 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
687 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
688 store <3 x float> %data, ptr addrspace(1) %gep1
692 define amdgpu_ps void @global_store_saddr_v6i16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <6 x i16> %data) {
693 ; GCN-LABEL: global_store_saddr_v6i16_zext_vgpr:
695 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3]
698 ; GFX11-LABEL: global_store_saddr_v6i16_zext_vgpr:
700 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3]
701 ; GFX11-NEXT: s_nop 0
702 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
703 ; GFX11-NEXT: s_endpgm
704 %zext.offset = zext i32 %voffset to i64
705 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
706 store <6 x i16> %data, ptr addrspace(1) %gep0
710 define amdgpu_ps void @global_store_saddr_v6i16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <6 x i16> %data) {
711 ; GCN-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128:
713 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
716 ; GFX11-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128:
718 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
719 ; GFX11-NEXT: s_nop 0
720 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
721 ; GFX11-NEXT: s_endpgm
722 %zext.offset = zext i32 %voffset to i64
723 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
724 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
725 store <6 x i16> %data, ptr addrspace(1) %gep1
729 define amdgpu_ps void @global_store_saddr_v6f16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <6 x half> %data) {
730 ; GCN-LABEL: global_store_saddr_v6f16_zext_vgpr:
732 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3]
735 ; GFX11-LABEL: global_store_saddr_v6f16_zext_vgpr:
737 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3]
738 ; GFX11-NEXT: s_nop 0
739 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
740 ; GFX11-NEXT: s_endpgm
741 %zext.offset = zext i32 %voffset to i64
742 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
743 store <6 x half> %data, ptr addrspace(1) %gep0
747 define amdgpu_ps void @global_store_saddr_v6f16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <6 x half> %data) {
748 ; GCN-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128:
750 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
753 ; GFX11-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128:
755 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
756 ; GFX11-NEXT: s_nop 0
757 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
758 ; GFX11-NEXT: s_endpgm
759 %zext.offset = zext i32 %voffset to i64
760 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
761 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
762 store <6 x half> %data, ptr addrspace(1) %gep1
766 define amdgpu_ps void @global_store_saddr_v4i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i32> %data) {
767 ; GCN-LABEL: global_store_saddr_v4i32_zext_vgpr:
769 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
772 ; GFX11-LABEL: global_store_saddr_v4i32_zext_vgpr:
774 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
775 ; GFX11-NEXT: s_nop 0
776 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
777 ; GFX11-NEXT: s_endpgm
778 %zext.offset = zext i32 %voffset to i64
779 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
780 store <4 x i32> %data, ptr addrspace(1) %gep0
784 define amdgpu_ps void @global_store_saddr_v4i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i32> %data) {
785 ; GCN-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
787 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
790 ; GFX11-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
792 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
793 ; GFX11-NEXT: s_nop 0
794 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
795 ; GFX11-NEXT: s_endpgm
796 %zext.offset = zext i32 %voffset to i64
797 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
798 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
799 store <4 x i32> %data, ptr addrspace(1) %gep1
803 define amdgpu_ps void @global_store_saddr_v4f32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x float> %data) {
804 ; GCN-LABEL: global_store_saddr_v4f32_zext_vgpr:
806 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
809 ; GFX11-LABEL: global_store_saddr_v4f32_zext_vgpr:
811 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
812 ; GFX11-NEXT: s_nop 0
813 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
814 ; GFX11-NEXT: s_endpgm
815 %zext.offset = zext i32 %voffset to i64
816 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
817 store <4 x float> %data, ptr addrspace(1) %gep0
821 define amdgpu_ps void @global_store_saddr_v4f32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x float> %data) {
822 ; GCN-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128:
824 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
827 ; GFX11-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128:
829 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
830 ; GFX11-NEXT: s_nop 0
831 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
832 ; GFX11-NEXT: s_endpgm
833 %zext.offset = zext i32 %voffset to i64
834 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
835 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
836 store <4 x float> %data, ptr addrspace(1) %gep1
840 define amdgpu_ps void @global_store_saddr_v2i64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i64> %data) {
841 ; GCN-LABEL: global_store_saddr_v2i64_zext_vgpr:
843 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
846 ; GFX11-LABEL: global_store_saddr_v2i64_zext_vgpr:
848 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
849 ; GFX11-NEXT: s_nop 0
850 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
851 ; GFX11-NEXT: s_endpgm
852 %zext.offset = zext i32 %voffset to i64
853 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
854 store <2 x i64> %data, ptr addrspace(1) %gep0
858 define amdgpu_ps void @global_store_saddr_v2i64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i64> %data) {
859 ; GCN-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128:
861 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
864 ; GFX11-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128:
866 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
867 ; GFX11-NEXT: s_nop 0
868 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
869 ; GFX11-NEXT: s_endpgm
870 %zext.offset = zext i32 %voffset to i64
871 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
872 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
873 store <2 x i64> %data, ptr addrspace(1) %gep1
877 define amdgpu_ps void @global_store_saddr_v2f64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x double> %data) {
878 ; GCN-LABEL: global_store_saddr_v2f64_zext_vgpr:
880 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
883 ; GFX11-LABEL: global_store_saddr_v2f64_zext_vgpr:
885 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
886 ; GFX11-NEXT: s_nop 0
887 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
888 ; GFX11-NEXT: s_endpgm
889 %zext.offset = zext i32 %voffset to i64
890 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
891 store <2 x double> %data, ptr addrspace(1) %gep0
895 define amdgpu_ps void @global_store_saddr_v2f64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x double> %data) {
896 ; GCN-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128:
898 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
901 ; GFX11-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128:
903 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
904 ; GFX11-NEXT: s_nop 0
905 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
906 ; GFX11-NEXT: s_endpgm
907 %zext.offset = zext i32 %voffset to i64
908 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
909 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
910 store <2 x double> %data, ptr addrspace(1) %gep1
914 define amdgpu_ps void @global_store_saddr_v8i16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <8 x i16> %data) {
915 ; GCN-LABEL: global_store_saddr_v8i16_zext_vgpr:
917 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
920 ; GFX11-LABEL: global_store_saddr_v8i16_zext_vgpr:
922 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
923 ; GFX11-NEXT: s_nop 0
924 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
925 ; GFX11-NEXT: s_endpgm
926 %zext.offset = zext i32 %voffset to i64
927 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
928 store <8 x i16> %data, ptr addrspace(1) %gep0
932 define amdgpu_ps void @global_store_saddr_v8i16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <8 x i16> %data) {
933 ; GCN-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128:
935 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
938 ; GFX11-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128:
940 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
941 ; GFX11-NEXT: s_nop 0
942 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
943 ; GFX11-NEXT: s_endpgm
944 %zext.offset = zext i32 %voffset to i64
945 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
946 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
947 store <8 x i16> %data, ptr addrspace(1) %gep1
951 define amdgpu_ps void @global_store_saddr_v8f16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <8 x half> %data) {
952 ; GCN-LABEL: global_store_saddr_v8f16_zext_vgpr:
954 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
957 ; GFX11-LABEL: global_store_saddr_v8f16_zext_vgpr:
959 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
960 ; GFX11-NEXT: s_nop 0
961 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
962 ; GFX11-NEXT: s_endpgm
963 %zext.offset = zext i32 %voffset to i64
964 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
965 store <8 x half> %data, ptr addrspace(1) %gep0
969 define amdgpu_ps void @global_store_saddr_v8f16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <8 x half> %data) {
970 ; GCN-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128:
972 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
975 ; GFX11-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128:
977 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
978 ; GFX11-NEXT: s_nop 0
979 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
980 ; GFX11-NEXT: s_endpgm
981 %zext.offset = zext i32 %voffset to i64
982 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
983 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
984 store <8 x half> %data, ptr addrspace(1) %gep1
988 define amdgpu_ps void @global_store_saddr_v2p1_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x ptr addrspace(1)> %data) {
989 ; GCN-LABEL: global_store_saddr_v2p1_zext_vgpr:
991 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
994 ; GFX11-LABEL: global_store_saddr_v2p1_zext_vgpr:
996 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
997 ; GFX11-NEXT: s_nop 0
998 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
999 ; GFX11-NEXT: s_endpgm
1000 %zext.offset = zext i32 %voffset to i64
1001 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1002 store <2 x ptr addrspace(1)> %data, ptr addrspace(1) %gep0
1006 define amdgpu_ps void @global_store_saddr_v2p1_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x ptr addrspace(1)> %data) {
1007 ; GCN-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128:
1009 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
1010 ; GCN-NEXT: s_endpgm
1012 ; GFX11-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128:
1014 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1015 ; GFX11-NEXT: s_nop 0
1016 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1017 ; GFX11-NEXT: s_endpgm
1018 %zext.offset = zext i32 %voffset to i64
1019 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1020 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1021 store <2 x ptr addrspace(1)> %data, ptr addrspace(1) %gep1
1025 define amdgpu_ps void @global_store_saddr_v4p3_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x ptr addrspace(3)> %data) {
1026 ; GCN-LABEL: global_store_saddr_v4p3_zext_vgpr:
1028 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
1029 ; GCN-NEXT: s_endpgm
1031 ; GFX11-LABEL: global_store_saddr_v4p3_zext_vgpr:
1033 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
1034 ; GFX11-NEXT: s_nop 0
1035 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1036 ; GFX11-NEXT: s_endpgm
1037 %zext.offset = zext i32 %voffset to i64
1038 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1039 store <4 x ptr addrspace(3)> %data, ptr addrspace(1) %gep0
1043 define amdgpu_ps void @global_store_saddr_v4p3_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x ptr addrspace(3)> %data) {
1044 ; GCN-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128:
1046 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
1047 ; GCN-NEXT: s_endpgm
1049 ; GFX11-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128:
1051 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1052 ; GFX11-NEXT: s_nop 0
1053 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1054 ; GFX11-NEXT: s_endpgm
1055 %zext.offset = zext i32 %voffset to i64
1056 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1057 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1058 store <4 x ptr addrspace(3)> %data, ptr addrspace(1) %gep1
1062 ; --------------------------------------------------------------------------------
1064 ; --------------------------------------------------------------------------------
1066 define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1067 ; GFX9-LABEL: atomic_global_store_saddr_i32_zext_vgpr:
1069 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1070 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
1071 ; GFX9-NEXT: s_endpgm
1073 ; GFX10-LABEL: atomic_global_store_saddr_i32_zext_vgpr:
1075 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1076 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1077 ; GFX10-NEXT: global_store_dword v0, v1, s[2:3]
1078 ; GFX10-NEXT: s_endpgm
1080 ; GFX11-LABEL: atomic_global_store_saddr_i32_zext_vgpr:
1082 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1083 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1084 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
1085 ; GFX11-NEXT: s_nop 0
1086 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1087 ; GFX11-NEXT: s_endpgm
1088 %zext.offset = zext i32 %voffset to i64
1089 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1090 store atomic i32 %data, ptr addrspace(1) %gep0 seq_cst, align 4
1094 define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1095 ; GFX9-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128:
1097 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1098 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
1099 ; GFX9-NEXT: s_endpgm
1101 ; GFX10-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128:
1103 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1104 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1105 ; GFX10-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
1106 ; GFX10-NEXT: s_endpgm
1108 ; GFX11-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128:
1110 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1111 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1112 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
1113 ; GFX11-NEXT: s_nop 0
1114 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1115 ; GFX11-NEXT: s_endpgm
1116 %zext.offset = zext i32 %voffset to i64
1117 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1118 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1119 store atomic i32 %data, ptr addrspace(1) %gep1 seq_cst, align 4
1123 define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1124 ; GFX9-LABEL: atomic_global_store_saddr_i64_zext_vgpr:
1126 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1127 ; GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
1128 ; GFX9-NEXT: s_endpgm
1130 ; GFX10-LABEL: atomic_global_store_saddr_i64_zext_vgpr:
1132 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1133 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1134 ; GFX10-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
1135 ; GFX10-NEXT: s_endpgm
1137 ; GFX11-LABEL: atomic_global_store_saddr_i64_zext_vgpr:
1139 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1140 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1141 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
1142 ; GFX11-NEXT: s_nop 0
1143 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1144 ; GFX11-NEXT: s_endpgm
1145 %zext.offset = zext i32 %voffset to i64
1146 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1147 store atomic i64 %data, ptr addrspace(1) %gep0 seq_cst, align 8
1151 define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1152 ; GFX9-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128:
1154 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1155 ; GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
1156 ; GFX9-NEXT: s_endpgm
1158 ; GFX10-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128:
1160 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1161 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1162 ; GFX10-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
1163 ; GFX10-NEXT: s_endpgm
1165 ; GFX11-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128:
1167 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1168 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1169 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
1170 ; GFX11-NEXT: s_nop 0
1171 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1172 ; GFX11-NEXT: s_endpgm
1173 %zext.offset = zext i32 %voffset to i64
1174 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1175 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1176 store atomic i64 %data, ptr addrspace(1) %gep1 seq_cst, align 8
1180 ; --------------------------------------------------------------------------------
1181 ; D16 HI store (hi 16)
1182 ; --------------------------------------------------------------------------------
1184 define amdgpu_ps void @global_store_saddr_i16_d16hi_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i16> %data) {
1185 ; GCN-LABEL: global_store_saddr_i16_d16hi_zext_vgpr:
1187 ; GCN-NEXT: global_store_short_d16_hi v0, v1, s[2:3]
1188 ; GCN-NEXT: s_endpgm
1190 ; GFX11-LABEL: global_store_saddr_i16_d16hi_zext_vgpr:
1192 ; GFX11-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3]
1193 ; GFX11-NEXT: s_nop 0
1194 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1195 ; GFX11-NEXT: s_endpgm
1196 %zext.offset = zext i32 %voffset to i64
1197 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1198 %data.hi = extractelement <2 x i16> %data, i32 1
1199 store i16 %data.hi, ptr addrspace(1) %gep0
1203 define amdgpu_ps void @global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i16> %data) {
1204 ; GCN-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
1206 ; GCN-NEXT: global_store_short_d16_hi v0, v1, s[2:3] offset:-128
1207 ; GCN-NEXT: s_endpgm
1209 ; GFX11-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
1211 ; GFX11-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3] offset:-128
1212 ; GFX11-NEXT: s_nop 0
1213 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1214 ; GFX11-NEXT: s_endpgm
1215 %zext.offset = zext i32 %voffset to i64
1216 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1217 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1218 %data.hi = extractelement <2 x i16> %data, i32 1
1219 store i16 %data.hi, ptr addrspace(1) %gep1
1223 define amdgpu_ps void @global_store_saddr_i16_d16hi_trunci8_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i16> %data) {
1224 ; GCN-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr:
1226 ; GCN-NEXT: global_store_byte_d16_hi v0, v1, s[2:3]
1227 ; GCN-NEXT: s_endpgm
1229 ; GFX11-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr:
1231 ; GFX11-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3]
1232 ; GFX11-NEXT: s_nop 0
1233 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1234 ; GFX11-NEXT: s_endpgm
1235 %zext.offset = zext i32 %voffset to i64
1236 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1237 %data.hi = extractelement <2 x i16> %data, i32 1
1238 %data.hi.trunc = trunc i16 %data.hi to i8
1239 store i8 %data.hi.trunc, ptr addrspace(1) %gep0
1243 define amdgpu_ps void @global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i16> %data) {
1244 ; GCN-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128:
1246 ; GCN-NEXT: global_store_byte_d16_hi v0, v1, s[2:3] offset:-128
1247 ; GCN-NEXT: s_endpgm
1249 ; GFX11-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128:
1251 ; GFX11-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3] offset:-128
1252 ; GFX11-NEXT: s_nop 0
1253 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1254 ; GFX11-NEXT: s_endpgm
1255 %zext.offset = zext i32 %voffset to i64
1256 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1257 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1258 %data.hi = extractelement <2 x i16> %data, i32 1
1259 %data.hi.trunc = trunc i16 %data.hi to i8
1260 store i8 %data.hi.trunc, ptr addrspace(1) %gep1