1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX11 %s
5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12 %s
7 ; Test using saddr addressing mode of global_*store_* flat instructions.
9 define amdgpu_ps void @global_store_saddr_i8_zext_vgpr(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, i8 %data) {
10 ; GCN-LABEL: global_store_saddr_i8_zext_vgpr:
12 ; GCN-NEXT: global_load_dword v0, v[0:1], off
13 ; GCN-NEXT: s_waitcnt vmcnt(0)
14 ; GCN-NEXT: global_store_byte v0, v2, s[2:3]
17 ; GFX11-LABEL: global_store_saddr_i8_zext_vgpr:
19 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
20 ; GFX11-NEXT: s_waitcnt vmcnt(0)
21 ; GFX11-NEXT: global_store_b8 v0, v2, s[2:3]
22 ; GFX11-NEXT: s_endpgm
24 ; GFX12-LABEL: global_store_saddr_i8_zext_vgpr:
26 ; GFX12-NEXT: global_load_b32 v0, v[0:1], off
27 ; GFX12-NEXT: s_wait_loadcnt 0x0
28 ; GFX12-NEXT: global_store_b8 v0, v2, s[2:3]
29 ; GFX12-NEXT: s_endpgm
30 %voffset = load i32, ptr addrspace(1) %voffset.ptr
31 %zext.offset = zext i32 %voffset to i64
32 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
33 store i8 %data, ptr addrspace(1) %gep0
37 ; Maximum positive offset on gfx10
38 define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_2047(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, i8 %data) {
39 ; GCN-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
41 ; GCN-NEXT: global_load_dword v0, v[0:1], off
42 ; GCN-NEXT: s_waitcnt vmcnt(0)
43 ; GCN-NEXT: global_store_byte v0, v2, s[2:3] offset:2047
46 ; GFX11-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
48 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
49 ; GFX11-NEXT: s_waitcnt vmcnt(0)
50 ; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] offset:2047
51 ; GFX11-NEXT: s_endpgm
53 ; GFX12-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
55 ; GFX12-NEXT: global_load_b32 v0, v[0:1], off
56 ; GFX12-NEXT: s_wait_loadcnt 0x0
57 ; GFX12-NEXT: global_store_b8 v0, v2, s[2:3] offset:2047
58 ; GFX12-NEXT: s_endpgm
59 %voffset = load i32, ptr addrspace(1) %voffset.ptr
60 %zext.offset = zext i32 %voffset to i64
61 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
62 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
63 store i8 %data, ptr addrspace(1) %gep1
67 ; Maximum negative offset on gfx10
68 define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, i8 %data) {
69 ; GCN-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
71 ; GCN-NEXT: global_load_dword v0, v[0:1], off
72 ; GCN-NEXT: s_waitcnt vmcnt(0)
73 ; GCN-NEXT: global_store_byte v0, v2, s[2:3] offset:-2048
76 ; GFX11-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
78 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
79 ; GFX11-NEXT: s_waitcnt vmcnt(0)
80 ; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] offset:-2048
81 ; GFX11-NEXT: s_endpgm
83 ; GFX12-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
85 ; GFX12-NEXT: global_load_b32 v0, v[0:1], off
86 ; GFX12-NEXT: s_wait_loadcnt 0x0
87 ; GFX12-NEXT: global_store_b8 v0, v2, s[2:3] offset:-2048
88 ; GFX12-NEXT: s_endpgm
89 %voffset = load i32, ptr addrspace(1) %voffset.ptr
90 %zext.offset = zext i32 %voffset to i64
91 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
92 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
93 store i8 %data, ptr addrspace(1) %gep1
97 ; --------------------------------------------------------------------------------
98 ; Uniformity edge cases
99 ; --------------------------------------------------------------------------------
101 @ptr.in.lds = internal addrspace(3) global ptr addrspace(1) undef
103 ; Base pointer is uniform, but also in VGPRs
104 define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, i8 %data) {
105 ; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
107 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
108 ; GFX9-NEXT: ds_read_b64 v[2:3], v2
109 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
110 ; GFX9-NEXT: v_readfirstlane_b32 s0, v2
111 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3
113 ; GFX9-NEXT: global_store_byte v0, v1, s[0:1]
114 ; GFX9-NEXT: s_endpgm
116 ; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
118 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
119 ; GFX10-NEXT: ds_read_b64 v[2:3], v2
120 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
121 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
122 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
123 ; GFX10-NEXT: global_store_byte v0, v1, s[0:1]
124 ; GFX10-NEXT: s_endpgm
126 ; GFX11-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
128 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
129 ; GFX11-NEXT: ds_load_b64 v[2:3], v2
130 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
131 ; GFX11-NEXT: v_readfirstlane_b32 s0, v2
132 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3
133 ; GFX11-NEXT: global_store_b8 v0, v1, s[0:1]
134 ; GFX11-NEXT: s_endpgm
136 ; GFX12-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
138 ; GFX12-NEXT: v_mov_b32_e32 v2, 0
139 ; GFX12-NEXT: ds_load_b64 v[2:3], v2
140 ; GFX12-NEXT: s_wait_dscnt 0x0
141 ; GFX12-NEXT: v_readfirstlane_b32 s0, v2
142 ; GFX12-NEXT: v_readfirstlane_b32 s1, v3
143 ; GFX12-NEXT: global_store_b8 v0, v1, s[0:1]
144 ; GFX12-NEXT: s_endpgm
145 %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
146 %zext.offset = zext i32 %voffset to i64
147 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
148 store i8 %data, ptr addrspace(1) %gep0
152 ; Base pointer is uniform, but also in VGPRs, with imm offset
153 define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset, i8 %data) {
154 ; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
156 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
157 ; GFX9-NEXT: ds_read_b64 v[2:3], v2
158 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
159 ; GFX9-NEXT: v_readfirstlane_b32 s0, v2
160 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3
162 ; GFX9-NEXT: global_store_byte v0, v1, s[0:1] offset:-120
163 ; GFX9-NEXT: s_endpgm
165 ; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
167 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
168 ; GFX10-NEXT: ds_read_b64 v[2:3], v2
169 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
170 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
171 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
172 ; GFX10-NEXT: global_store_byte v0, v1, s[0:1] offset:-120
173 ; GFX10-NEXT: s_endpgm
175 ; GFX11-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
177 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
178 ; GFX11-NEXT: ds_load_b64 v[2:3], v2
179 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
180 ; GFX11-NEXT: v_readfirstlane_b32 s0, v2
181 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3
182 ; GFX11-NEXT: global_store_b8 v0, v1, s[0:1] offset:-120
183 ; GFX11-NEXT: s_endpgm
185 ; GFX12-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
187 ; GFX12-NEXT: v_mov_b32_e32 v2, 0
188 ; GFX12-NEXT: ds_load_b64 v[2:3], v2
189 ; GFX12-NEXT: s_wait_dscnt 0x0
190 ; GFX12-NEXT: v_readfirstlane_b32 s0, v2
191 ; GFX12-NEXT: v_readfirstlane_b32 s1, v3
192 ; GFX12-NEXT: global_store_b8 v0, v1, s[0:1] offset:-120
193 ; GFX12-NEXT: s_endpgm
194 %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
195 %zext.offset = zext i32 %voffset to i64
196 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
197 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -120
198 store i8 %data, ptr addrspace(1) %gep1
202 ; --------------------------------------------------------------------------------
203 ; Stress various type stores
204 ; --------------------------------------------------------------------------------
206 define amdgpu_ps void @global_store_saddr_i16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i16 %data) {
207 ; GCN-LABEL: global_store_saddr_i16_zext_vgpr:
209 ; GCN-NEXT: global_store_short v0, v1, s[2:3]
212 ; GFX11-LABEL: global_store_saddr_i16_zext_vgpr:
214 ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3]
215 ; GFX11-NEXT: s_endpgm
217 ; GFX12-LABEL: global_store_saddr_i16_zext_vgpr:
219 ; GFX12-NEXT: global_store_b16 v0, v1, s[2:3]
220 ; GFX12-NEXT: s_endpgm
221 %zext.offset = zext i32 %voffset to i64
222 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
223 store i16 %data, ptr addrspace(1) %gep0
227 define amdgpu_ps void @global_store_saddr_i16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i16 %data) {
228 ; GCN-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128:
230 ; GCN-NEXT: global_store_short v0, v1, s[2:3] offset:-128
233 ; GFX11-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128:
235 ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128
236 ; GFX11-NEXT: s_endpgm
238 ; GFX12-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128:
240 ; GFX12-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128
241 ; GFX12-NEXT: s_endpgm
242 %zext.offset = zext i32 %voffset to i64
243 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
244 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
245 store i16 %data, ptr addrspace(1) %gep1
249 define amdgpu_ps void @global_store_saddr_f16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, half %data) {
250 ; GCN-LABEL: global_store_saddr_f16_zext_vgpr:
252 ; GCN-NEXT: global_store_short v0, v1, s[2:3]
255 ; GFX11-LABEL: global_store_saddr_f16_zext_vgpr:
257 ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3]
258 ; GFX11-NEXT: s_endpgm
260 ; GFX12-LABEL: global_store_saddr_f16_zext_vgpr:
262 ; GFX12-NEXT: global_store_b16 v0, v1, s[2:3]
263 ; GFX12-NEXT: s_endpgm
264 %zext.offset = zext i32 %voffset to i64
265 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
266 store half %data, ptr addrspace(1) %gep0
270 define amdgpu_ps void @global_store_saddr_f16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, half %data) {
271 ; GCN-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128:
273 ; GCN-NEXT: global_store_short v0, v1, s[2:3] offset:-128
276 ; GFX11-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128:
278 ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128
279 ; GFX11-NEXT: s_endpgm
281 ; GFX12-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128:
283 ; GFX12-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128
284 ; GFX12-NEXT: s_endpgm
285 %zext.offset = zext i32 %voffset to i64
286 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
287 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
288 store half %data, ptr addrspace(1) %gep1
292 define amdgpu_ps void @global_store_saddr_i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
293 ; GCN-LABEL: global_store_saddr_i32_zext_vgpr:
295 ; GCN-NEXT: global_store_dword v0, v1, s[2:3]
298 ; GFX11-LABEL: global_store_saddr_i32_zext_vgpr:
300 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
301 ; GFX11-NEXT: s_endpgm
303 ; GFX12-LABEL: global_store_saddr_i32_zext_vgpr:
305 ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3]
306 ; GFX12-NEXT: s_endpgm
307 %zext.offset = zext i32 %voffset to i64
308 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
309 store i32 %data, ptr addrspace(1) %gep0
313 define amdgpu_ps void @global_store_saddr_i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
314 ; GCN-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128:
316 ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
319 ; GFX11-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128:
321 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
322 ; GFX11-NEXT: s_endpgm
324 ; GFX12-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128:
326 ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
327 ; GFX12-NEXT: s_endpgm
328 %zext.offset = zext i32 %voffset to i64
329 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
330 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
331 store i32 %data, ptr addrspace(1) %gep1
335 define amdgpu_ps void @global_store_saddr_f32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, float %data) {
336 ; GCN-LABEL: global_store_saddr_f32_zext_vgpr:
338 ; GCN-NEXT: global_store_dword v0, v1, s[2:3]
341 ; GFX11-LABEL: global_store_saddr_f32_zext_vgpr:
343 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
344 ; GFX11-NEXT: s_endpgm
346 ; GFX12-LABEL: global_store_saddr_f32_zext_vgpr:
348 ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3]
349 ; GFX12-NEXT: s_endpgm
350 %zext.offset = zext i32 %voffset to i64
351 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
352 store float %data, ptr addrspace(1) %gep0
356 define amdgpu_ps void @global_store_saddr_f32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, float %data) {
357 ; GCN-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128:
359 ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
362 ; GFX11-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128:
364 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
365 ; GFX11-NEXT: s_endpgm
367 ; GFX12-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128:
369 ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
370 ; GFX12-NEXT: s_endpgm
371 %zext.offset = zext i32 %voffset to i64
372 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
373 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
374 store float %data, ptr addrspace(1) %gep1
378 define amdgpu_ps void @global_store_saddr_p3_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, ptr addrspace(3) %data) {
379 ; GCN-LABEL: global_store_saddr_p3_zext_vgpr:
381 ; GCN-NEXT: global_store_dword v0, v1, s[2:3]
384 ; GFX11-LABEL: global_store_saddr_p3_zext_vgpr:
386 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
387 ; GFX11-NEXT: s_endpgm
389 ; GFX12-LABEL: global_store_saddr_p3_zext_vgpr:
391 ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3]
392 ; GFX12-NEXT: s_endpgm
393 %zext.offset = zext i32 %voffset to i64
394 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
395 store ptr addrspace(3) %data, ptr addrspace(1) %gep0
399 define amdgpu_ps void @global_store_saddr_p3_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, ptr addrspace(3) %data) {
400 ; GCN-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128:
402 ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
405 ; GFX11-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128:
407 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
408 ; GFX11-NEXT: s_endpgm
410 ; GFX12-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128:
412 ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
413 ; GFX12-NEXT: s_endpgm
414 %zext.offset = zext i32 %voffset to i64
415 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
416 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
417 store ptr addrspace(3) %data, ptr addrspace(1) %gep1
421 define amdgpu_ps void @global_store_saddr_i64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
422 ; GCN-LABEL: global_store_saddr_i64_zext_vgpr:
424 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
427 ; GFX11-LABEL: global_store_saddr_i64_zext_vgpr:
429 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
430 ; GFX11-NEXT: s_endpgm
432 ; GFX12-LABEL: global_store_saddr_i64_zext_vgpr:
434 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3]
435 ; GFX12-NEXT: s_endpgm
436 %zext.offset = zext i32 %voffset to i64
437 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
438 store i64 %data, ptr addrspace(1) %gep0
442 define amdgpu_ps void @global_store_saddr_i64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
443 ; GCN-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128:
445 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
448 ; GFX11-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128:
450 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
451 ; GFX11-NEXT: s_endpgm
453 ; GFX12-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128:
455 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
456 ; GFX12-NEXT: s_endpgm
457 %zext.offset = zext i32 %voffset to i64
458 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
459 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
460 store i64 %data, ptr addrspace(1) %gep1
464 define amdgpu_ps void @global_store_saddr_f64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, double %data) {
465 ; GCN-LABEL: global_store_saddr_f64_zext_vgpr:
467 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
470 ; GFX11-LABEL: global_store_saddr_f64_zext_vgpr:
472 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
473 ; GFX11-NEXT: s_endpgm
475 ; GFX12-LABEL: global_store_saddr_f64_zext_vgpr:
477 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3]
478 ; GFX12-NEXT: s_endpgm
479 %zext.offset = zext i32 %voffset to i64
480 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
481 store double %data, ptr addrspace(1) %gep0
485 define amdgpu_ps void @global_store_saddr_f64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, double %data) {
486 ; GCN-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128:
488 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
491 ; GFX11-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128:
493 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
494 ; GFX11-NEXT: s_endpgm
496 ; GFX12-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128:
498 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
499 ; GFX12-NEXT: s_endpgm
500 %zext.offset = zext i32 %voffset to i64
501 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
502 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
503 store double %data, ptr addrspace(1) %gep1
507 define amdgpu_ps void @global_store_saddr_v2i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i32> %data) {
508 ; GCN-LABEL: global_store_saddr_v2i32_zext_vgpr:
510 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
513 ; GFX11-LABEL: global_store_saddr_v2i32_zext_vgpr:
515 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
516 ; GFX11-NEXT: s_endpgm
518 ; GFX12-LABEL: global_store_saddr_v2i32_zext_vgpr:
520 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3]
521 ; GFX12-NEXT: s_endpgm
522 %zext.offset = zext i32 %voffset to i64
523 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
524 store <2 x i32> %data, ptr addrspace(1) %gep0
528 define amdgpu_ps void @global_store_saddr_v2i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i32> %data) {
529 ; GCN-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128:
531 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
534 ; GFX11-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128:
536 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
537 ; GFX11-NEXT: s_endpgm
539 ; GFX12-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128:
541 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
542 ; GFX12-NEXT: s_endpgm
543 %zext.offset = zext i32 %voffset to i64
544 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
545 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
546 store <2 x i32> %data, ptr addrspace(1) %gep1
550 define amdgpu_ps void @global_store_saddr_v2f32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x float> %data) {
551 ; GCN-LABEL: global_store_saddr_v2f32_zext_vgpr:
553 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
556 ; GFX11-LABEL: global_store_saddr_v2f32_zext_vgpr:
558 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
559 ; GFX11-NEXT: s_endpgm
561 ; GFX12-LABEL: global_store_saddr_v2f32_zext_vgpr:
563 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3]
564 ; GFX12-NEXT: s_endpgm
565 %zext.offset = zext i32 %voffset to i64
566 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
567 store <2 x float> %data, ptr addrspace(1) %gep0
571 define amdgpu_ps void @global_store_saddr_v2f32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x float> %data) {
572 ; GCN-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128:
574 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
577 ; GFX11-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128:
579 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
580 ; GFX11-NEXT: s_endpgm
582 ; GFX12-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128:
584 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
585 ; GFX12-NEXT: s_endpgm
586 %zext.offset = zext i32 %voffset to i64
587 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
588 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
589 store <2 x float> %data, ptr addrspace(1) %gep1
593 define amdgpu_ps void @global_store_saddr_v4i16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i16> %data) {
594 ; GCN-LABEL: global_store_saddr_v4i16_zext_vgpr:
596 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
599 ; GFX11-LABEL: global_store_saddr_v4i16_zext_vgpr:
601 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
602 ; GFX11-NEXT: s_endpgm
604 ; GFX12-LABEL: global_store_saddr_v4i16_zext_vgpr:
606 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3]
607 ; GFX12-NEXT: s_endpgm
608 %zext.offset = zext i32 %voffset to i64
609 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
610 store <4 x i16> %data, ptr addrspace(1) %gep0
614 define amdgpu_ps void @global_store_saddr_v4i16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i16> %data) {
615 ; GCN-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128:
617 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
620 ; GFX11-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128:
622 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
623 ; GFX11-NEXT: s_endpgm
625 ; GFX12-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128:
627 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
628 ; GFX12-NEXT: s_endpgm
629 %zext.offset = zext i32 %voffset to i64
630 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
631 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
632 store <4 x i16> %data, ptr addrspace(1) %gep1
636 define amdgpu_ps void @global_store_saddr_v4f16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x half> %data) {
637 ; GCN-LABEL: global_store_saddr_v4f16_zext_vgpr:
639 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
642 ; GFX11-LABEL: global_store_saddr_v4f16_zext_vgpr:
644 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
645 ; GFX11-NEXT: s_endpgm
647 ; GFX12-LABEL: global_store_saddr_v4f16_zext_vgpr:
649 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3]
650 ; GFX12-NEXT: s_endpgm
651 %zext.offset = zext i32 %voffset to i64
652 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
653 store <4 x half> %data, ptr addrspace(1) %gep0
657 define amdgpu_ps void @global_store_saddr_v4f16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x half> %data) {
658 ; GCN-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128:
660 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
663 ; GFX11-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128:
665 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
666 ; GFX11-NEXT: s_endpgm
668 ; GFX12-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128:
670 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
671 ; GFX12-NEXT: s_endpgm
672 %zext.offset = zext i32 %voffset to i64
673 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
674 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
675 store <4 x half> %data, ptr addrspace(1) %gep1
679 define amdgpu_ps void @global_store_saddr_p1_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, ptr addrspace(1) %data) {
680 ; GCN-LABEL: global_store_saddr_p1_zext_vgpr:
682 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
685 ; GFX11-LABEL: global_store_saddr_p1_zext_vgpr:
687 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
688 ; GFX11-NEXT: s_endpgm
690 ; GFX12-LABEL: global_store_saddr_p1_zext_vgpr:
692 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3]
693 ; GFX12-NEXT: s_endpgm
694 %zext.offset = zext i32 %voffset to i64
695 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
696 store ptr addrspace(1) %data, ptr addrspace(1) %gep0
700 define amdgpu_ps void @global_store_saddr_p1_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, ptr addrspace(1) %data) {
701 ; GCN-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128:
703 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
706 ; GFX11-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128:
708 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
709 ; GFX11-NEXT: s_endpgm
711 ; GFX12-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128:
713 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
714 ; GFX12-NEXT: s_endpgm
715 %zext.offset = zext i32 %voffset to i64
716 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
717 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
718 store ptr addrspace(1) %data, ptr addrspace(1) %gep1
722 define amdgpu_ps void @global_store_saddr_v3i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <3 x i32> %data) {
723 ; GCN-LABEL: global_store_saddr_v3i32_zext_vgpr:
725 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3]
728 ; GFX11-LABEL: global_store_saddr_v3i32_zext_vgpr:
730 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3]
731 ; GFX11-NEXT: s_endpgm
733 ; GFX12-LABEL: global_store_saddr_v3i32_zext_vgpr:
735 ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3]
736 ; GFX12-NEXT: s_endpgm
737 %zext.offset = zext i32 %voffset to i64
738 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
739 store <3 x i32> %data, ptr addrspace(1) %gep0
743 define amdgpu_ps void @global_store_saddr_v3i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <3 x i32> %data) {
744 ; GCN-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128:
746 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
749 ; GFX11-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128:
751 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
752 ; GFX11-NEXT: s_endpgm
754 ; GFX12-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128:
756 ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
757 ; GFX12-NEXT: s_endpgm
758 %zext.offset = zext i32 %voffset to i64
759 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
760 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
761 store <3 x i32> %data, ptr addrspace(1) %gep1
765 define amdgpu_ps void @global_store_saddr_v3f32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <3 x float> %data) {
766 ; GCN-LABEL: global_store_saddr_v3f32_zext_vgpr:
768 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3]
771 ; GFX11-LABEL: global_store_saddr_v3f32_zext_vgpr:
773 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3]
774 ; GFX11-NEXT: s_endpgm
776 ; GFX12-LABEL: global_store_saddr_v3f32_zext_vgpr:
778 ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3]
779 ; GFX12-NEXT: s_endpgm
780 %zext.offset = zext i32 %voffset to i64
781 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
782 store <3 x float> %data, ptr addrspace(1) %gep0
786 define amdgpu_ps void @global_store_saddr_v3f32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <3 x float> %data) {
787 ; GCN-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128:
789 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
792 ; GFX11-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128:
794 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
795 ; GFX11-NEXT: s_endpgm
797 ; GFX12-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128:
799 ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
800 ; GFX12-NEXT: s_endpgm
801 %zext.offset = zext i32 %voffset to i64
802 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
803 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
804 store <3 x float> %data, ptr addrspace(1) %gep1
808 define amdgpu_ps void @global_store_saddr_v6i16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <6 x i16> %data) {
809 ; GCN-LABEL: global_store_saddr_v6i16_zext_vgpr:
811 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3]
814 ; GFX11-LABEL: global_store_saddr_v6i16_zext_vgpr:
816 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3]
817 ; GFX11-NEXT: s_endpgm
819 ; GFX12-LABEL: global_store_saddr_v6i16_zext_vgpr:
821 ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3]
822 ; GFX12-NEXT: s_endpgm
823 %zext.offset = zext i32 %voffset to i64
824 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
825 store <6 x i16> %data, ptr addrspace(1) %gep0
829 define amdgpu_ps void @global_store_saddr_v6i16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <6 x i16> %data) {
830 ; GCN-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128:
832 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
835 ; GFX11-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128:
837 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
838 ; GFX11-NEXT: s_endpgm
840 ; GFX12-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128:
842 ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
843 ; GFX12-NEXT: s_endpgm
844 %zext.offset = zext i32 %voffset to i64
845 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
846 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
847 store <6 x i16> %data, ptr addrspace(1) %gep1
851 define amdgpu_ps void @global_store_saddr_v6f16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <6 x half> %data) {
852 ; GCN-LABEL: global_store_saddr_v6f16_zext_vgpr:
854 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3]
857 ; GFX11-LABEL: global_store_saddr_v6f16_zext_vgpr:
859 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3]
860 ; GFX11-NEXT: s_endpgm
862 ; GFX12-LABEL: global_store_saddr_v6f16_zext_vgpr:
864 ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3]
865 ; GFX12-NEXT: s_endpgm
866 %zext.offset = zext i32 %voffset to i64
867 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
868 store <6 x half> %data, ptr addrspace(1) %gep0
872 define amdgpu_ps void @global_store_saddr_v6f16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <6 x half> %data) {
873 ; GCN-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128:
875 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
878 ; GFX11-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128:
880 ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
881 ; GFX11-NEXT: s_endpgm
883 ; GFX12-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128:
885 ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
886 ; GFX12-NEXT: s_endpgm
887 %zext.offset = zext i32 %voffset to i64
888 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
889 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
890 store <6 x half> %data, ptr addrspace(1) %gep1
894 define amdgpu_ps void @global_store_saddr_v4i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i32> %data) {
895 ; GCN-LABEL: global_store_saddr_v4i32_zext_vgpr:
897 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
900 ; GFX11-LABEL: global_store_saddr_v4i32_zext_vgpr:
902 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
903 ; GFX11-NEXT: s_endpgm
905 ; GFX12-LABEL: global_store_saddr_v4i32_zext_vgpr:
907 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3]
908 ; GFX12-NEXT: s_endpgm
909 %zext.offset = zext i32 %voffset to i64
910 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
911 store <4 x i32> %data, ptr addrspace(1) %gep0
915 define amdgpu_ps void @global_store_saddr_v4i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i32> %data) {
916 ; GCN-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
918 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
921 ; GFX11-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
923 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
924 ; GFX11-NEXT: s_endpgm
926 ; GFX12-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
928 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
929 ; GFX12-NEXT: s_endpgm
930 %zext.offset = zext i32 %voffset to i64
931 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
932 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
933 store <4 x i32> %data, ptr addrspace(1) %gep1
937 define amdgpu_ps void @global_store_saddr_v4f32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x float> %data) {
938 ; GCN-LABEL: global_store_saddr_v4f32_zext_vgpr:
940 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
943 ; GFX11-LABEL: global_store_saddr_v4f32_zext_vgpr:
945 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
946 ; GFX11-NEXT: s_endpgm
948 ; GFX12-LABEL: global_store_saddr_v4f32_zext_vgpr:
950 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3]
951 ; GFX12-NEXT: s_endpgm
952 %zext.offset = zext i32 %voffset to i64
953 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
954 store <4 x float> %data, ptr addrspace(1) %gep0
958 define amdgpu_ps void @global_store_saddr_v4f32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x float> %data) {
959 ; GCN-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128:
961 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
964 ; GFX11-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128:
966 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
967 ; GFX11-NEXT: s_endpgm
969 ; GFX12-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128:
971 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
972 ; GFX12-NEXT: s_endpgm
973 %zext.offset = zext i32 %voffset to i64
974 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
975 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
976 store <4 x float> %data, ptr addrspace(1) %gep1
980 define amdgpu_ps void @global_store_saddr_v2i64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i64> %data) {
981 ; GCN-LABEL: global_store_saddr_v2i64_zext_vgpr:
983 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
986 ; GFX11-LABEL: global_store_saddr_v2i64_zext_vgpr:
988 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
989 ; GFX11-NEXT: s_endpgm
991 ; GFX12-LABEL: global_store_saddr_v2i64_zext_vgpr:
993 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3]
994 ; GFX12-NEXT: s_endpgm
995 %zext.offset = zext i32 %voffset to i64
996 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
997 store <2 x i64> %data, ptr addrspace(1) %gep0
1001 define amdgpu_ps void @global_store_saddr_v2i64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i64> %data) {
1002 ; GCN-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128:
1004 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
1005 ; GCN-NEXT: s_endpgm
1007 ; GFX11-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128:
1009 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1010 ; GFX11-NEXT: s_endpgm
1012 ; GFX12-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128:
1014 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1015 ; GFX12-NEXT: s_endpgm
1016 %zext.offset = zext i32 %voffset to i64
1017 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1018 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1019 store <2 x i64> %data, ptr addrspace(1) %gep1
1023 define amdgpu_ps void @global_store_saddr_v2f64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x double> %data) {
1024 ; GCN-LABEL: global_store_saddr_v2f64_zext_vgpr:
1026 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
1027 ; GCN-NEXT: s_endpgm
1029 ; GFX11-LABEL: global_store_saddr_v2f64_zext_vgpr:
1031 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
1032 ; GFX11-NEXT: s_endpgm
1034 ; GFX12-LABEL: global_store_saddr_v2f64_zext_vgpr:
1036 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3]
1037 ; GFX12-NEXT: s_endpgm
1038 %zext.offset = zext i32 %voffset to i64
1039 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1040 store <2 x double> %data, ptr addrspace(1) %gep0
1044 define amdgpu_ps void @global_store_saddr_v2f64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x double> %data) {
1045 ; GCN-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128:
1047 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
1048 ; GCN-NEXT: s_endpgm
1050 ; GFX11-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128:
1052 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1053 ; GFX11-NEXT: s_endpgm
1055 ; GFX12-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128:
1057 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1058 ; GFX12-NEXT: s_endpgm
1059 %zext.offset = zext i32 %voffset to i64
1060 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1061 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1062 store <2 x double> %data, ptr addrspace(1) %gep1
1066 define amdgpu_ps void @global_store_saddr_v8i16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <8 x i16> %data) {
1067 ; GCN-LABEL: global_store_saddr_v8i16_zext_vgpr:
1069 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
1070 ; GCN-NEXT: s_endpgm
1072 ; GFX11-LABEL: global_store_saddr_v8i16_zext_vgpr:
1074 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
1075 ; GFX11-NEXT: s_endpgm
1077 ; GFX12-LABEL: global_store_saddr_v8i16_zext_vgpr:
1079 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3]
1080 ; GFX12-NEXT: s_endpgm
1081 %zext.offset = zext i32 %voffset to i64
1082 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1083 store <8 x i16> %data, ptr addrspace(1) %gep0
1087 define amdgpu_ps void @global_store_saddr_v8i16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <8 x i16> %data) {
1088 ; GCN-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128:
1090 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
1091 ; GCN-NEXT: s_endpgm
1093 ; GFX11-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128:
1095 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1096 ; GFX11-NEXT: s_endpgm
1098 ; GFX12-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128:
1100 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1101 ; GFX12-NEXT: s_endpgm
1102 %zext.offset = zext i32 %voffset to i64
1103 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1104 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1105 store <8 x i16> %data, ptr addrspace(1) %gep1
1109 define amdgpu_ps void @global_store_saddr_v8f16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <8 x half> %data) {
1110 ; GCN-LABEL: global_store_saddr_v8f16_zext_vgpr:
1112 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
1113 ; GCN-NEXT: s_endpgm
1115 ; GFX11-LABEL: global_store_saddr_v8f16_zext_vgpr:
1117 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
1118 ; GFX11-NEXT: s_endpgm
1120 ; GFX12-LABEL: global_store_saddr_v8f16_zext_vgpr:
1122 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3]
1123 ; GFX12-NEXT: s_endpgm
1124 %zext.offset = zext i32 %voffset to i64
1125 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1126 store <8 x half> %data, ptr addrspace(1) %gep0
1130 define amdgpu_ps void @global_store_saddr_v8f16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <8 x half> %data) {
1131 ; GCN-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128:
1133 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
1134 ; GCN-NEXT: s_endpgm
1136 ; GFX11-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128:
1138 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1139 ; GFX11-NEXT: s_endpgm
1141 ; GFX12-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128:
1143 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1144 ; GFX12-NEXT: s_endpgm
1145 %zext.offset = zext i32 %voffset to i64
1146 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1147 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1148 store <8 x half> %data, ptr addrspace(1) %gep1
1152 define amdgpu_ps void @global_store_saddr_v2p1_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x ptr addrspace(1)> %data) {
1153 ; GCN-LABEL: global_store_saddr_v2p1_zext_vgpr:
1155 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
1156 ; GCN-NEXT: s_endpgm
1158 ; GFX11-LABEL: global_store_saddr_v2p1_zext_vgpr:
1160 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
1161 ; GFX11-NEXT: s_endpgm
1163 ; GFX12-LABEL: global_store_saddr_v2p1_zext_vgpr:
1165 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3]
1166 ; GFX12-NEXT: s_endpgm
1167 %zext.offset = zext i32 %voffset to i64
1168 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1169 store <2 x ptr addrspace(1)> %data, ptr addrspace(1) %gep0
1173 define amdgpu_ps void @global_store_saddr_v2p1_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x ptr addrspace(1)> %data) {
1174 ; GCN-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128:
1176 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
1177 ; GCN-NEXT: s_endpgm
1179 ; GFX11-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128:
1181 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1182 ; GFX11-NEXT: s_endpgm
1184 ; GFX12-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128:
1186 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1187 ; GFX12-NEXT: s_endpgm
1188 %zext.offset = zext i32 %voffset to i64
1189 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1190 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1191 store <2 x ptr addrspace(1)> %data, ptr addrspace(1) %gep1
1195 define amdgpu_ps void @global_store_saddr_v4p3_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x ptr addrspace(3)> %data) {
1196 ; GCN-LABEL: global_store_saddr_v4p3_zext_vgpr:
1198 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
1199 ; GCN-NEXT: s_endpgm
1201 ; GFX11-LABEL: global_store_saddr_v4p3_zext_vgpr:
1203 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
1204 ; GFX11-NEXT: s_endpgm
1206 ; GFX12-LABEL: global_store_saddr_v4p3_zext_vgpr:
1208 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3]
1209 ; GFX12-NEXT: s_endpgm
1210 %zext.offset = zext i32 %voffset to i64
1211 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1212 store <4 x ptr addrspace(3)> %data, ptr addrspace(1) %gep0
1216 define amdgpu_ps void @global_store_saddr_v4p3_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x ptr addrspace(3)> %data) {
1217 ; GCN-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128:
1219 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
1220 ; GCN-NEXT: s_endpgm
1222 ; GFX11-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128:
1224 ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1225 ; GFX11-NEXT: s_endpgm
1227 ; GFX12-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128:
1229 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
1230 ; GFX12-NEXT: s_endpgm
1231 %zext.offset = zext i32 %voffset to i64
1232 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1233 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1234 store <4 x ptr addrspace(3)> %data, ptr addrspace(1) %gep1
1238 ; --------------------------------------------------------------------------------
1240 ; --------------------------------------------------------------------------------
1242 define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1243 ; GCN-LABEL: atomic_global_store_saddr_i32_zext_vgpr:
1245 ; GCN-NEXT: global_store_dword v0, v1, s[2:3]
1246 ; GCN-NEXT: s_endpgm
1248 ; GFX11-LABEL: atomic_global_store_saddr_i32_zext_vgpr:
1250 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
1251 ; GFX11-NEXT: s_endpgm
1253 ; GFX12-LABEL: atomic_global_store_saddr_i32_zext_vgpr:
1255 ; GFX12-NEXT: global_wb scope:SCOPE_SYS
1256 ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] scope:SCOPE_SYS
1257 ; GFX12-NEXT: s_endpgm
1258 %zext.offset = zext i32 %voffset to i64
1259 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1260 store atomic i32 %data, ptr addrspace(1) %gep0 seq_cst, align 4
1264 define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1265 ; GCN-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128:
1267 ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
1268 ; GCN-NEXT: s_endpgm
1270 ; GFX11-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128:
1272 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
1273 ; GFX11-NEXT: s_endpgm
1275 ; GFX12-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128:
1277 ; GFX12-NEXT: global_wb scope:SCOPE_SYS
1278 ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 scope:SCOPE_SYS
1279 ; GFX12-NEXT: s_endpgm
1280 %zext.offset = zext i32 %voffset to i64
1281 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1282 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1283 store atomic i32 %data, ptr addrspace(1) %gep1 seq_cst, align 4
1287 define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1288 ; GCN-LABEL: atomic_global_store_saddr_i64_zext_vgpr:
1290 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
1291 ; GCN-NEXT: s_endpgm
1293 ; GFX11-LABEL: atomic_global_store_saddr_i64_zext_vgpr:
1295 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
1296 ; GFX11-NEXT: s_endpgm
1298 ; GFX12-LABEL: atomic_global_store_saddr_i64_zext_vgpr:
1300 ; GFX12-NEXT: global_wb scope:SCOPE_SYS
1301 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] scope:SCOPE_SYS
1302 ; GFX12-NEXT: s_endpgm
1303 %zext.offset = zext i32 %voffset to i64
1304 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1305 store atomic i64 %data, ptr addrspace(1) %gep0 seq_cst, align 8
1309 define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1310 ; GCN-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128:
1312 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
1313 ; GCN-NEXT: s_endpgm
1315 ; GFX11-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128:
1317 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
1318 ; GFX11-NEXT: s_endpgm
1320 ; GFX12-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128:
1322 ; GFX12-NEXT: global_wb scope:SCOPE_SYS
1323 ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 scope:SCOPE_SYS
1324 ; GFX12-NEXT: s_endpgm
1325 %zext.offset = zext i32 %voffset to i64
1326 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1327 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1328 store atomic i64 %data, ptr addrspace(1) %gep1 seq_cst, align 8
1332 ; --------------------------------------------------------------------------------
1333 ; D16 HI store (hi 16)
1334 ; --------------------------------------------------------------------------------
1336 define amdgpu_ps void @global_store_saddr_i16_d16hi_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i16> %data) {
1337 ; GCN-LABEL: global_store_saddr_i16_d16hi_zext_vgpr:
1339 ; GCN-NEXT: global_store_short_d16_hi v0, v1, s[2:3]
1340 ; GCN-NEXT: s_endpgm
1342 ; GFX11-LABEL: global_store_saddr_i16_d16hi_zext_vgpr:
1344 ; GFX11-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3]
1345 ; GFX11-NEXT: s_endpgm
1347 ; GFX12-LABEL: global_store_saddr_i16_d16hi_zext_vgpr:
1349 ; GFX12-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3]
1350 ; GFX12-NEXT: s_endpgm
1351 %zext.offset = zext i32 %voffset to i64
1352 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1353 %data.hi = extractelement <2 x i16> %data, i32 1
1354 store i16 %data.hi, ptr addrspace(1) %gep0
1358 define amdgpu_ps void @global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i16> %data) {
1359 ; GCN-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
1361 ; GCN-NEXT: global_store_short_d16_hi v0, v1, s[2:3] offset:-128
1362 ; GCN-NEXT: s_endpgm
1364 ; GFX11-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
1366 ; GFX11-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3] offset:-128
1367 ; GFX11-NEXT: s_endpgm
1369 ; GFX12-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
1371 ; GFX12-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3] offset:-128
1372 ; GFX12-NEXT: s_endpgm
1373 %zext.offset = zext i32 %voffset to i64
1374 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1375 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1376 %data.hi = extractelement <2 x i16> %data, i32 1
1377 store i16 %data.hi, ptr addrspace(1) %gep1
1381 define amdgpu_ps void @global_store_saddr_i16_d16hi_trunci8_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i16> %data) {
1382 ; GCN-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr:
1384 ; GCN-NEXT: global_store_byte_d16_hi v0, v1, s[2:3]
1385 ; GCN-NEXT: s_endpgm
1387 ; GFX11-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr:
1389 ; GFX11-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3]
1390 ; GFX11-NEXT: s_endpgm
1392 ; GFX12-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr:
1394 ; GFX12-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3]
1395 ; GFX12-NEXT: s_endpgm
1396 %zext.offset = zext i32 %voffset to i64
1397 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1398 %data.hi = extractelement <2 x i16> %data, i32 1
1399 %data.hi.trunc = trunc i16 %data.hi to i8
1400 store i8 %data.hi.trunc, ptr addrspace(1) %gep0
1404 define amdgpu_ps void @global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i16> %data) {
1405 ; GCN-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128:
1407 ; GCN-NEXT: global_store_byte_d16_hi v0, v1, s[2:3] offset:-128
1408 ; GCN-NEXT: s_endpgm
1410 ; GFX11-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128:
1412 ; GFX11-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3] offset:-128
1413 ; GFX11-NEXT: s_endpgm
1415 ; GFX12-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128:
1417 ; GFX12-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3] offset:-128
1418 ; GFX12-NEXT: s_endpgm
1419 %zext.offset = zext i32 %voffset to i64
1420 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1421 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1422 %data.hi = extractelement <2 x i16> %data, i32 1
1423 %data.hi.trunc = trunc i16 %data.hi to i8
1424 store i8 %data.hi.trunc, ptr addrspace(1) %gep1