1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
5 ; Test using saddr addressing mode of global_*store_* flat instructions.
7 define amdgpu_ps void @global_store_saddr_i8_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 addrspace(1)* %voffset.ptr, i8 %data) {
8 ; GCN-LABEL: global_store_saddr_i8_zext_vgpr:
10 ; GCN-NEXT: global_load_dword v0, v[0:1], off
11 ; GCN-NEXT: s_waitcnt vmcnt(0)
12 ; GCN-NEXT: global_store_byte v0, v2, s[2:3]
14 %voffset = load i32, i32 addrspace(1)* %voffset.ptr
15 %zext.offset = zext i32 %voffset to i64
16 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
17 store i8 %data, i8 addrspace(1)* %gep0
21 ; Maximum positive offset on gfx10
22 define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_2047(i8 addrspace(1)* inreg %sbase, i32 addrspace(1)* %voffset.ptr, i8 %data) {
23 ; GCN-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
25 ; GCN-NEXT: global_load_dword v0, v[0:1], off
26 ; GCN-NEXT: s_waitcnt vmcnt(0)
27 ; GCN-NEXT: global_store_byte v0, v2, s[2:3] offset:2047
29 %voffset = load i32, i32 addrspace(1)* %voffset.ptr
30 %zext.offset = zext i32 %voffset to i64
31 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
32 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 2047
33 store i8 %data, i8 addrspace(1)* %gep1
37 ; Maximum negative offset on gfx10
38 define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_neg2048(i8 addrspace(1)* inreg %sbase, i32 addrspace(1)* %voffset.ptr, i8 %data) {
39 ; GCN-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
41 ; GCN-NEXT: global_load_dword v0, v[0:1], off
42 ; GCN-NEXT: s_waitcnt vmcnt(0)
43 ; GCN-NEXT: global_store_byte v0, v2, s[2:3] offset:-2048
45 %voffset = load i32, i32 addrspace(1)* %voffset.ptr
46 %zext.offset = zext i32 %voffset to i64
47 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
48 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -2048
49 store i8 %data, i8 addrspace(1)* %gep1
53 ; --------------------------------------------------------------------------------
54 ; Uniformity edge cases
55 ; --------------------------------------------------------------------------------
57 @ptr.in.lds = internal addrspace(3) global i8 addrspace(1)* undef
59 ; Base pointer is uniform, but also in VGPRs
60 define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, i8 %data) {
61 ; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
63 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
64 ; GFX9-NEXT: ds_read_b64 v[2:3], v2
65 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
66 ; GFX9-NEXT: v_readfirstlane_b32 s0, v2
67 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3
69 ; GFX9-NEXT: global_store_byte v0, v1, s[0:1]
72 ; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
74 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
75 ; GFX10-NEXT: ds_read_b64 v[2:3], v2
76 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
77 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
78 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
79 ; GFX10-NEXT: global_store_byte v0, v1, s[0:1]
80 ; GFX10-NEXT: s_endpgm
81 %sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds
82 %zext.offset = zext i32 %voffset to i64
83 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
84 store i8 %data, i8 addrspace(1)* %gep0
88 ; Base pointer is uniform, but also in VGPRs, with imm offset
89 define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset, i8 %data) {
90 ; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
92 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
93 ; GFX9-NEXT: ds_read_b64 v[2:3], v2
94 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
95 ; GFX9-NEXT: v_readfirstlane_b32 s0, v2
96 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3
98 ; GFX9-NEXT: global_store_byte v0, v1, s[0:1] offset:-120
101 ; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
103 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
104 ; GFX10-NEXT: ds_read_b64 v[2:3], v2
105 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
106 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
107 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
108 ; GFX10-NEXT: global_store_byte v0, v1, s[0:1] offset:-120
109 ; GFX10-NEXT: s_endpgm
110 %sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds
111 %zext.offset = zext i32 %voffset to i64
112 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
113 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -120
114 store i8 %data, i8 addrspace(1)* %gep1
118 ; --------------------------------------------------------------------------------
119 ; Stress various type stores
120 ; --------------------------------------------------------------------------------
122 define amdgpu_ps void @global_store_saddr_i16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i16 %data) {
123 ; GCN-LABEL: global_store_saddr_i16_zext_vgpr:
125 ; GCN-NEXT: global_store_short v0, v1, s[2:3]
127 %zext.offset = zext i32 %voffset to i64
128 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
129 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i16 addrspace(1)*
130 store i16 %data, i16 addrspace(1)* %gep0.cast
134 define amdgpu_ps void @global_store_saddr_i16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i16 %data) {
135 ; GCN-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128:
137 ; GCN-NEXT: global_store_short v0, v1, s[2:3] offset:-128
139 %zext.offset = zext i32 %voffset to i64
140 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
141 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
142 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i16 addrspace(1)*
143 store i16 %data, i16 addrspace(1)* %gep1.cast
147 define amdgpu_ps void @global_store_saddr_f16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, half %data) {
148 ; GCN-LABEL: global_store_saddr_f16_zext_vgpr:
150 ; GCN-NEXT: global_store_short v0, v1, s[2:3]
152 %zext.offset = zext i32 %voffset to i64
153 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
154 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to half addrspace(1)*
155 store half %data, half addrspace(1)* %gep0.cast
159 define amdgpu_ps void @global_store_saddr_f16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, half %data) {
160 ; GCN-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128:
162 ; GCN-NEXT: global_store_short v0, v1, s[2:3] offset:-128
164 %zext.offset = zext i32 %voffset to i64
165 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
166 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
167 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to half addrspace(1)*
168 store half %data, half addrspace(1)* %gep1.cast
172 define amdgpu_ps void @global_store_saddr_i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) {
173 ; GCN-LABEL: global_store_saddr_i32_zext_vgpr:
175 ; GCN-NEXT: global_store_dword v0, v1, s[2:3]
177 %zext.offset = zext i32 %voffset to i64
178 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
179 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i32 addrspace(1)*
180 store i32 %data, i32 addrspace(1)* %gep0.cast
184 define amdgpu_ps void @global_store_saddr_i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) {
185 ; GCN-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128:
187 ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
189 %zext.offset = zext i32 %voffset to i64
190 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
191 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
192 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i32 addrspace(1)*
193 store i32 %data, i32 addrspace(1)* %gep1.cast
197 define amdgpu_ps void @global_store_saddr_f32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, float %data) {
198 ; GCN-LABEL: global_store_saddr_f32_zext_vgpr:
200 ; GCN-NEXT: global_store_dword v0, v1, s[2:3]
202 %zext.offset = zext i32 %voffset to i64
203 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
204 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to float addrspace(1)*
205 store float %data, float addrspace(1)* %gep0.cast
209 define amdgpu_ps void @global_store_saddr_f32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, float %data) {
210 ; GCN-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128:
212 ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
214 %zext.offset = zext i32 %voffset to i64
215 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
216 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
217 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to float addrspace(1)*
218 store float %data, float addrspace(1)* %gep1.cast
222 define amdgpu_ps void @global_store_saddr_p3_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i8 addrspace(3)* %data) {
223 ; GCN-LABEL: global_store_saddr_p3_zext_vgpr:
225 ; GCN-NEXT: global_store_dword v0, v1, s[2:3]
227 %zext.offset = zext i32 %voffset to i64
228 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
229 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i8 addrspace(3)* addrspace(1)*
230 store i8 addrspace(3)* %data, i8 addrspace(3)* addrspace(1)* %gep0.cast
234 define amdgpu_ps void @global_store_saddr_p3_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i8 addrspace(3)* %data) {
235 ; GCN-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128:
237 ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
239 %zext.offset = zext i32 %voffset to i64
240 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
241 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
242 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i8 addrspace(3)* addrspace(1)*
243 store i8 addrspace(3)* %data, i8 addrspace(3)* addrspace(1)* %gep1.cast
247 define amdgpu_ps void @global_store_saddr_i64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i64 %data) {
248 ; GCN-LABEL: global_store_saddr_i64_zext_vgpr:
250 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
252 %zext.offset = zext i32 %voffset to i64
253 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
254 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i64 addrspace(1)*
255 store i64 %data, i64 addrspace(1)* %gep0.cast
259 define amdgpu_ps void @global_store_saddr_i64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i64 %data) {
260 ; GCN-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128:
262 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
264 %zext.offset = zext i32 %voffset to i64
265 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
266 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
267 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i64 addrspace(1)*
268 store i64 %data, i64 addrspace(1)* %gep1.cast
272 define amdgpu_ps void @global_store_saddr_f64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, double %data) {
273 ; GCN-LABEL: global_store_saddr_f64_zext_vgpr:
275 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
277 %zext.offset = zext i32 %voffset to i64
278 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
279 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to double addrspace(1)*
280 store double %data, double addrspace(1)* %gep0.cast
284 define amdgpu_ps void @global_store_saddr_f64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, double %data) {
285 ; GCN-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128:
287 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
289 %zext.offset = zext i32 %voffset to i64
290 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
291 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
292 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to double addrspace(1)*
293 store double %data, double addrspace(1)* %gep1.cast
297 define amdgpu_ps void @global_store_saddr_v2i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i32> %data) {
298 ; GCN-LABEL: global_store_saddr_v2i32_zext_vgpr:
300 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
302 %zext.offset = zext i32 %voffset to i64
303 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
304 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x i32> addrspace(1)*
305 store <2 x i32> %data, <2 x i32> addrspace(1)* %gep0.cast
309 define amdgpu_ps void @global_store_saddr_v2i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i32> %data) {
310 ; GCN-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128:
312 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
314 %zext.offset = zext i32 %voffset to i64
315 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
316 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
317 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x i32> addrspace(1)*
318 store <2 x i32> %data, <2 x i32> addrspace(1)* %gep1.cast
322 define amdgpu_ps void @global_store_saddr_v2f32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x float> %data) {
323 ; GCN-LABEL: global_store_saddr_v2f32_zext_vgpr:
325 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
327 %zext.offset = zext i32 %voffset to i64
328 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
329 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x float> addrspace(1)*
330 store <2 x float> %data, <2 x float> addrspace(1)* %gep0.cast
334 define amdgpu_ps void @global_store_saddr_v2f32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x float> %data) {
335 ; GCN-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128:
337 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
339 %zext.offset = zext i32 %voffset to i64
340 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
341 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
342 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x float> addrspace(1)*
343 store <2 x float> %data, <2 x float> addrspace(1)* %gep1.cast
347 define amdgpu_ps void @global_store_saddr_v4i16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i16> %data) {
348 ; GCN-LABEL: global_store_saddr_v4i16_zext_vgpr:
350 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
352 %zext.offset = zext i32 %voffset to i64
353 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
354 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x i16> addrspace(1)*
355 store <4 x i16> %data, <4 x i16> addrspace(1)* %gep0.cast
359 define amdgpu_ps void @global_store_saddr_v4i16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i16> %data) {
360 ; GCN-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128:
362 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
364 %zext.offset = zext i32 %voffset to i64
365 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
366 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
367 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x i16> addrspace(1)*
368 store <4 x i16> %data, <4 x i16> addrspace(1)* %gep1.cast
372 define amdgpu_ps void @global_store_saddr_v4f16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x half> %data) {
373 ; GCN-LABEL: global_store_saddr_v4f16_zext_vgpr:
375 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
377 %zext.offset = zext i32 %voffset to i64
378 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
379 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x half> addrspace(1)*
380 store <4 x half> %data, <4 x half> addrspace(1)* %gep0.cast
384 define amdgpu_ps void @global_store_saddr_v4f16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x half> %data) {
385 ; GCN-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128:
387 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
389 %zext.offset = zext i32 %voffset to i64
390 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
391 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
392 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x half> addrspace(1)*
393 store <4 x half> %data, <4 x half> addrspace(1)* %gep1.cast
397 define amdgpu_ps void @global_store_saddr_p1_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i8 addrspace(1)* %data) {
398 ; GCN-LABEL: global_store_saddr_p1_zext_vgpr:
400 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
402 %zext.offset = zext i32 %voffset to i64
403 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
404 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i8 addrspace(1)* addrspace(1)*
405 store i8 addrspace(1)* %data, i8 addrspace(1)* addrspace(1)* %gep0.cast
409 define amdgpu_ps void @global_store_saddr_p1_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i8 addrspace(1)* %data) {
410 ; GCN-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128:
412 ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
414 %zext.offset = zext i32 %voffset to i64
415 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
416 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
417 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i8 addrspace(1)* addrspace(1)*
418 store i8 addrspace(1)* %data, i8 addrspace(1)* addrspace(1)* %gep1.cast
422 define amdgpu_ps void @global_store_saddr_v3i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <3 x i32> %data) {
423 ; GCN-LABEL: global_store_saddr_v3i32_zext_vgpr:
425 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3]
427 %zext.offset = zext i32 %voffset to i64
428 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
429 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <3 x i32> addrspace(1)*
430 store <3 x i32> %data, <3 x i32> addrspace(1)* %gep0.cast
434 define amdgpu_ps void @global_store_saddr_v3i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <3 x i32> %data) {
435 ; GCN-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128:
437 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
439 %zext.offset = zext i32 %voffset to i64
440 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
441 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
442 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <3 x i32> addrspace(1)*
443 store <3 x i32> %data, <3 x i32> addrspace(1)* %gep1.cast
447 define amdgpu_ps void @global_store_saddr_v3f32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <3 x float> %data) {
448 ; GCN-LABEL: global_store_saddr_v3f32_zext_vgpr:
450 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3]
452 %zext.offset = zext i32 %voffset to i64
453 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
454 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <3 x float> addrspace(1)*
455 store <3 x float> %data, <3 x float> addrspace(1)* %gep0.cast
459 define amdgpu_ps void @global_store_saddr_v3f32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <3 x float> %data) {
460 ; GCN-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128:
462 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
464 %zext.offset = zext i32 %voffset to i64
465 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
466 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
467 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <3 x float> addrspace(1)*
468 store <3 x float> %data, <3 x float> addrspace(1)* %gep1.cast
472 define amdgpu_ps void @global_store_saddr_v6i16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <6 x i16> %data) {
473 ; GCN-LABEL: global_store_saddr_v6i16_zext_vgpr:
475 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3]
477 %zext.offset = zext i32 %voffset to i64
478 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
479 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <6 x i16> addrspace(1)*
480 store <6 x i16> %data, <6 x i16> addrspace(1)* %gep0.cast
484 define amdgpu_ps void @global_store_saddr_v6i16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <6 x i16> %data) {
485 ; GCN-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128:
487 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
489 %zext.offset = zext i32 %voffset to i64
490 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
491 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
492 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <6 x i16> addrspace(1)*
493 store <6 x i16> %data, <6 x i16> addrspace(1)* %gep1.cast
497 define amdgpu_ps void @global_store_saddr_v6f16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <6 x half> %data) {
498 ; GCN-LABEL: global_store_saddr_v6f16_zext_vgpr:
500 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3]
502 %zext.offset = zext i32 %voffset to i64
503 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
504 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <6 x half> addrspace(1)*
505 store <6 x half> %data, <6 x half> addrspace(1)* %gep0.cast
509 define amdgpu_ps void @global_store_saddr_v6f16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <6 x half> %data) {
510 ; GCN-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128:
512 ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
514 %zext.offset = zext i32 %voffset to i64
515 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
516 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
517 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <6 x half> addrspace(1)*
518 store <6 x half> %data, <6 x half> addrspace(1)* %gep1.cast
522 define amdgpu_ps void @global_store_saddr_v4i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i32> %data) {
523 ; GCN-LABEL: global_store_saddr_v4i32_zext_vgpr:
525 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
527 %zext.offset = zext i32 %voffset to i64
528 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
529 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x i32> addrspace(1)*
530 store <4 x i32> %data, <4 x i32> addrspace(1)* %gep0.cast
534 define amdgpu_ps void @global_store_saddr_v4i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i32> %data) {
535 ; GCN-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
537 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
539 %zext.offset = zext i32 %voffset to i64
540 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
541 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
542 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x i32> addrspace(1)*
543 store <4 x i32> %data, <4 x i32> addrspace(1)* %gep1.cast
547 define amdgpu_ps void @global_store_saddr_v4f32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x float> %data) {
548 ; GCN-LABEL: global_store_saddr_v4f32_zext_vgpr:
550 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
552 %zext.offset = zext i32 %voffset to i64
553 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
554 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x float> addrspace(1)*
555 store <4 x float> %data, <4 x float> addrspace(1)* %gep0.cast
559 define amdgpu_ps void @global_store_saddr_v4f32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x float> %data) {
560 ; GCN-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128:
562 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
564 %zext.offset = zext i32 %voffset to i64
565 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
566 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
567 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x float> addrspace(1)*
568 store <4 x float> %data, <4 x float> addrspace(1)* %gep1.cast
572 define amdgpu_ps void @global_store_saddr_v2i64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i64> %data) {
573 ; GCN-LABEL: global_store_saddr_v2i64_zext_vgpr:
575 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
577 %zext.offset = zext i32 %voffset to i64
578 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
579 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x i64> addrspace(1)*
580 store <2 x i64> %data, <2 x i64> addrspace(1)* %gep0.cast
584 define amdgpu_ps void @global_store_saddr_v2i64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i64> %data) {
585 ; GCN-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128:
587 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
589 %zext.offset = zext i32 %voffset to i64
590 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
591 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
592 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x i64> addrspace(1)*
593 store <2 x i64> %data, <2 x i64> addrspace(1)* %gep1.cast
597 define amdgpu_ps void @global_store_saddr_v2f64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x double> %data) {
598 ; GCN-LABEL: global_store_saddr_v2f64_zext_vgpr:
600 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
602 %zext.offset = zext i32 %voffset to i64
603 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
604 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x double> addrspace(1)*
605 store <2 x double> %data, <2 x double> addrspace(1)* %gep0.cast
609 define amdgpu_ps void @global_store_saddr_v2f64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x double> %data) {
610 ; GCN-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128:
612 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
614 %zext.offset = zext i32 %voffset to i64
615 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
616 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
617 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x double> addrspace(1)*
618 store <2 x double> %data, <2 x double> addrspace(1)* %gep1.cast
622 define amdgpu_ps void @global_store_saddr_v8i16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <8 x i16> %data) {
623 ; GCN-LABEL: global_store_saddr_v8i16_zext_vgpr:
625 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
627 %zext.offset = zext i32 %voffset to i64
628 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
629 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <8 x i16> addrspace(1)*
630 store <8 x i16> %data, <8 x i16> addrspace(1)* %gep0.cast
634 define amdgpu_ps void @global_store_saddr_v8i16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <8 x i16> %data) {
635 ; GCN-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128:
637 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
639 %zext.offset = zext i32 %voffset to i64
640 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
641 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
642 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <8 x i16> addrspace(1)*
643 store <8 x i16> %data, <8 x i16> addrspace(1)* %gep1.cast
647 define amdgpu_ps void @global_store_saddr_v8f16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <8 x half> %data) {
648 ; GCN-LABEL: global_store_saddr_v8f16_zext_vgpr:
650 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
652 %zext.offset = zext i32 %voffset to i64
653 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
654 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <8 x half> addrspace(1)*
655 store <8 x half> %data, <8 x half> addrspace(1)* %gep0.cast
659 define amdgpu_ps void @global_store_saddr_v8f16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <8 x half> %data) {
660 ; GCN-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128:
662 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
664 %zext.offset = zext i32 %voffset to i64
665 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
666 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
667 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <8 x half> addrspace(1)*
668 store <8 x half> %data, <8 x half> addrspace(1)* %gep1.cast
672 define amdgpu_ps void @global_store_saddr_v2p1_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i8 addrspace(1)*> %data) {
673 ; GCN-LABEL: global_store_saddr_v2p1_zext_vgpr:
675 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
677 %zext.offset = zext i32 %voffset to i64
678 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
679 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x i8 addrspace(1)*> addrspace(1)*
680 store <2 x i8 addrspace(1)*> %data, <2 x i8 addrspace(1)*> addrspace(1)* %gep0.cast
684 define amdgpu_ps void @global_store_saddr_v2p1_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i8 addrspace(1)*> %data) {
685 ; GCN-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128:
687 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
689 %zext.offset = zext i32 %voffset to i64
690 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
691 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
692 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x i8 addrspace(1)*> addrspace(1)*
693 store <2 x i8 addrspace(1)*> %data, <2 x i8 addrspace(1)*> addrspace(1)* %gep1.cast
697 define amdgpu_ps void @global_store_saddr_v4p3_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i8 addrspace(3)*> %data) {
698 ; GCN-LABEL: global_store_saddr_v4p3_zext_vgpr:
700 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3]
702 %zext.offset = zext i32 %voffset to i64
703 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
704 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x i8 addrspace(3)*> addrspace(1)*
705 store <4 x i8 addrspace(3)*> %data, <4 x i8 addrspace(3)*> addrspace(1)* %gep0.cast
709 define amdgpu_ps void @global_store_saddr_v4p3_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i8 addrspace(3)*> %data) {
710 ; GCN-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128:
712 ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
714 %zext.offset = zext i32 %voffset to i64
715 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
716 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
717 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x i8 addrspace(3)*> addrspace(1)*
718 store <4 x i8 addrspace(3)*> %data, <4 x i8 addrspace(3)*> addrspace(1)* %gep1.cast
722 ; --------------------------------------------------------------------------------
724 ; --------------------------------------------------------------------------------
726 define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) {
727 ; GFX9-LABEL: atomic_global_store_saddr_i32_zext_vgpr:
729 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
730 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
731 ; GFX9-NEXT: s_endpgm
733 ; GFX10-LABEL: atomic_global_store_saddr_i32_zext_vgpr:
735 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
736 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
737 ; GFX10-NEXT: global_store_dword v0, v1, s[2:3]
738 ; GFX10-NEXT: s_endpgm
739 %zext.offset = zext i32 %voffset to i64
740 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
741 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i32 addrspace(1)*
742 store atomic i32 %data, i32 addrspace(1)* %gep0.cast seq_cst, align 4
746 define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) {
747 ; GFX9-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128:
749 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
750 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
751 ; GFX9-NEXT: s_endpgm
753 ; GFX10-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128:
755 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
756 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
757 ; GFX10-NEXT: global_store_dword v0, v1, s[2:3] offset:-128
758 ; GFX10-NEXT: s_endpgm
759 %zext.offset = zext i32 %voffset to i64
760 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
761 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
762 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i32 addrspace(1)*
763 store atomic i32 %data, i32 addrspace(1)* %gep1.cast seq_cst, align 4
767 define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i64 %data) {
768 ; GFX9-LABEL: atomic_global_store_saddr_i64_zext_vgpr:
770 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
771 ; GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
772 ; GFX9-NEXT: s_endpgm
774 ; GFX10-LABEL: atomic_global_store_saddr_i64_zext_vgpr:
776 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
777 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
778 ; GFX10-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3]
779 ; GFX10-NEXT: s_endpgm
780 %zext.offset = zext i32 %voffset to i64
781 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
782 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i64 addrspace(1)*
783 store atomic i64 %data, i64 addrspace(1)* %gep0.cast seq_cst, align 8
787 define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i64 %data) {
788 ; GFX9-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128:
790 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
791 ; GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
792 ; GFX9-NEXT: s_endpgm
794 ; GFX10-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128:
796 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
797 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
798 ; GFX10-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
799 ; GFX10-NEXT: s_endpgm
800 %zext.offset = zext i32 %voffset to i64
801 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
802 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
803 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i64 addrspace(1)*
804 store atomic i64 %data, i64 addrspace(1)* %gep1.cast seq_cst, align 8
808 ; --------------------------------------------------------------------------------
809 ; D16 HI store (hi 16)
810 ; --------------------------------------------------------------------------------
812 define amdgpu_ps void @global_store_saddr_i16_d16hi_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i16> %data) {
813 ; GCN-LABEL: global_store_saddr_i16_d16hi_zext_vgpr:
815 ; GCN-NEXT: global_store_short_d16_hi v0, v1, s[2:3]
817 %zext.offset = zext i32 %voffset to i64
818 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
819 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i16 addrspace(1)*
820 %data.hi = extractelement <2 x i16> %data, i32 1
821 store i16 %data.hi, i16 addrspace(1)* %gep0.cast
825 define amdgpu_ps void @global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i16> %data) {
826 ; GCN-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
828 ; GCN-NEXT: global_store_short_d16_hi v0, v1, s[2:3] offset:-128
830 %zext.offset = zext i32 %voffset to i64
831 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
832 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
833 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i16 addrspace(1)*
834 %data.hi = extractelement <2 x i16> %data, i32 1
835 store i16 %data.hi, i16 addrspace(1)* %gep1.cast
839 define amdgpu_ps void @global_store_saddr_i16_d16hi_trunci8_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i16> %data) {
840 ; GCN-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr:
842 ; GCN-NEXT: global_store_byte_d16_hi v0, v1, s[2:3]
844 %zext.offset = zext i32 %voffset to i64
845 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
846 %data.hi = extractelement <2 x i16> %data, i32 1
847 %data.hi.trunc = trunc i16 %data.hi to i8
848 store i8 %data.hi.trunc, i8 addrspace(1)* %gep0
852 define amdgpu_ps void @global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i16> %data) {
853 ; GCN-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128:
855 ; GCN-NEXT: global_store_byte_d16_hi v0, v1, s[2:3] offset:-128
857 %zext.offset = zext i32 %voffset to i64
858 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
859 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
860 %data.hi = extractelement <2 x i16> %data, i32 1
861 %data.hi.trunc = trunc i16 %data.hi to i8
862 store i8 %data.hi.trunc, i8 addrspace(1)* %gep1