1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
2 ; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
4 ; GCN-LABEL: {{^}}load_local_lo_v2i16_undeflo:
6 ; GFX9-NEXT: ds_read_u16_d16 v0, v0
8 ; GFX9-NEXT: s_setpc_b64
11 define <2 x i16> @load_local_lo_v2i16_undeflo(i16 addrspace(3)* %in) #0 {
13 %load = load i16, i16 addrspace(3)* %in
14 %build = insertelement <2 x i16> undef, i16 %load, i32 0
18 ; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo:
20 ; GFX9-NEXT: ds_read_u16_d16 v0, v0
21 ; GFX9-NEXT: s_waitcnt
22 ; GFX9-NEXT: s_setpc_b64
25 define <2 x i16> @load_local_lo_v2i16_reglo(i16 addrspace(3)* %in, i16 %reg) #0 {
27 %load = load i16, i16 addrspace(3)* %in
28 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
29 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
33 ; Show that we get reasonable regalloc without physreg constraints.
34 ; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg:
36 ; GFX9-NEXT: ds_read_u16_d16 v0, v0
37 ; GFX9-NEXT: s_waitcnt
38 ; GFX9-NEXT: global_store_dword v[0:1], v0, off{{$}}
39 ; GFX9-NEXT: s_waitcnt
40 ; GFX9-NEXT: s_setpc_b64
43 define void @load_local_lo_v2i16_reglo_vreg(i16 addrspace(3)* %in, i16 %reg) #0 {
45 %load = load i16, i16 addrspace(3)* %in
46 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
47 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
48 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
52 ; GCN-LABEL: {{^}}load_local_lo_v2i16_zerolo:
54 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
55 ; GFX9-NEXT: ds_read_u16_d16 v1, v0
56 ; GFX9-NEXT: s_waitcnt
57 ; GFX9-NEXT: v_mov_b32_e32 v0, v1
58 ; GFX9-NEXT: s_setpc_b64
61 define <2 x i16> @load_local_lo_v2i16_zerolo(i16 addrspace(3)* %in) #0 {
63 %load = load i16, i16 addrspace(3)* %in
64 %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 0
68 ; GCN-LABEL: {{^}}load_local_lo_v2f16_fpimm:
70 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
71 ; GFX9-NEXT: ds_read_u16_d16 v1, v0
72 ; GFX9-NEXT: s_waitcnt
73 ; GFX9-NEXT: v_mov_b32_e32 v0, v1
74 ; GFX9-NEXT: s_setpc_b64
77 define <2 x half> @load_local_lo_v2f16_fpimm(half addrspace(3)* %in) #0 {
79 %load = load half, half addrspace(3)* %in
80 %build = insertelement <2 x half> <half 0.0, half 2.0>, half %load, i32 0
84 ; GCN-LABEL: {{^}}load_local_lo_v2f16_reghi_vreg:
86 ; GFX9-NEXT: ds_read_u16_d16 v1, v0
87 ; GFX9-NEXT: s_waitcnt
88 ; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}}
89 ; GFX9-NEXT: s_waitcnt
90 ; GFX9-NEXT: s_setpc_b64
93 define void @load_local_lo_v2f16_reghi_vreg(half addrspace(3)* %in, i32 %reg) #0 {
95 %reg.bc = bitcast i32 %reg to <2 x half>
96 %load = load half, half addrspace(3)* %in
97 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
98 store <2 x half> %build1, <2 x half> addrspace(1)* undef
102 ; GCN-LABEL: {{^}}load_local_lo_v2f16_reglo_vreg:
104 ; GFX9: ds_read_u16 v
105 ; GFX9: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
106 ; GFX9: v_lshl_or_b32 v{{[0-9]+}}, v{{[0-9]+}}, 16, v{{[0-9]+}}
107 ; GFX9: global_store_dword
110 define void @load_local_lo_v2f16_reglo_vreg(half addrspace(3)* %in, half %reg) #0 {
112 %load = load half, half addrspace(3)* %in
113 %build0 = insertelement <2 x half> undef, half %reg, i32 1
114 %build1 = insertelement <2 x half> %build0, half %load, i32 0
115 store <2 x half> %build1, <2 x half> addrspace(1)* undef
119 ; GCN-LABEL: {{^}}load_local_lo_v2i16_reghi_vreg_zexti8:
121 ; GFX9-NEXT: ds_read_u8_d16 v1, v0
122 ; GFX9-NEXT: s_waitcnt
123 ; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}}
124 ; GFX9-NEXT: s_waitcnt
125 ; GFX9-NEXT: s_setpc_b64
128 define void @load_local_lo_v2i16_reghi_vreg_zexti8(i8 addrspace(3)* %in, i32 %reg) #0 {
130 %reg.bc = bitcast i32 %reg to <2 x i16>
131 %load = load i8, i8 addrspace(3)* %in
132 %ext = zext i8 %load to i16
133 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
134 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
138 ; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg_zexti8:
141 ; GFX9: global_store_dword
142 ; GFX9-NEXT: s_waitcnt
143 ; GFX9-NEXT: s_setpc_b64
146 define void @load_local_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(3)* %in, i16 %reg) #0 {
148 %load = load i8, i8 addrspace(3)* %in
149 %ext = zext i8 %load to i16
150 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
151 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0
152 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
156 ; GCN-LABEL: {{^}}load_local_lo_v2i16_reghi_vreg_sexti8:
158 ; GFX9-NEXT: ds_read_i8_d16 v1, v0
159 ; GFX9-NEXT: s_waitcnt
160 ; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}}
161 ; GFX9-NEXT: s_waitcnt
162 ; GFX9-NEXT: s_setpc_b64
165 define void @load_local_lo_v2i16_reghi_vreg_sexti8(i8 addrspace(3)* %in, i32 %reg) #0 {
167 %reg.bc = bitcast i32 %reg to <2 x i16>
168 %load = load i8, i8 addrspace(3)* %in
169 %ext = sext i8 %load to i16
170 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
171 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
175 ; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg_sexti8:
178 ; GFX9: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
179 ; GFX9: v_lshl_or_b32 v{{[0-9]+}}, v{{[0-9]+}}, 16, v{{[0-9]+}}
182 define void @load_local_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(3)* %in, i16 %reg) #0 {
184 %load = load i8, i8 addrspace(3)* %in
185 %ext = sext i8 %load to i16
186 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
187 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0
188 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
192 ; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg:
194 ; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
195 ; GFX9-NEXT: s_waitcnt
196 ; GFX9-NEXT: global_store_dword
197 ; GFX9-NEXT: s_waitcnt
198 ; GFX9-NEXT: s_setpc_b64
199 define void @load_global_lo_v2i16_reglo_vreg(i16 addrspace(1)* %in, i32 %reg) #0 {
201 %reg.bc = bitcast i32 %reg to <2 x i16>
202 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047
203 %load = load i16, i16 addrspace(1)* %gep
204 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
205 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
209 ; GCN-LABEL: {{^}}load_global_lo_v2f16_reglo_vreg:
211 ; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
212 ; GFX9-NEXT: s_waitcnt
213 ; GFX9-NEXT: global_store_dword
214 ; GFX9-NEXT: s_waitcnt
215 ; GFX9-NEXT: s_setpc_b64
216 define void @load_global_lo_v2f16_reglo_vreg(half addrspace(1)* %in, i32 %reg) #0 {
218 %reg.bc = bitcast i32 %reg to <2 x half>
219 %gep = getelementptr inbounds half, half addrspace(1)* %in, i64 -2047
220 %load = load half, half addrspace(1)* %gep
221 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
222 store <2 x half> %build1, <2 x half> addrspace(1)* undef
226 ; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg_zexti8:
228 ; GFX9-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095
229 ; GFX9-NEXT: s_waitcnt
230 ; GFX9-NEXT: global_store_dword
231 ; GFX9-NEXT: s_waitcnt
232 ; GFX9-NEXT: s_setpc_b64
233 define void @load_global_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
235 %reg.bc = bitcast i32 %reg to <2 x i16>
236 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
237 %load = load i8, i8 addrspace(1)* %gep
238 %ext = zext i8 %load to i16
239 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
240 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
244 ; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg_sexti8:
246 ; GFX9-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095
247 ; GFX9-NEXT: s_waitcnt
248 ; GFX9-NEXT: global_store_dword
249 ; GFX9-NEXT: s_waitcnt
250 ; GFX9-NEXT: s_setpc_b64
251 define void @load_global_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
253 %reg.bc = bitcast i32 %reg to <2 x i16>
254 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
255 %load = load i8, i8 addrspace(1)* %gep
256 %ext = sext i8 %load to i16
257 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
258 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
262 ; GCN-LABEL: {{^}}load_flat_lo_v2i16_reghi_vreg:
264 ; GFX9-NEXT: flat_load_short_d16 v2, v[0:1]
265 ; GFX9-NEXT: s_waitcnt
266 ; GFX9-NEXT: global_store_dword v[0:1], v2
267 ; GFX9-NEXT: s_waitcnt
268 ; GFX9-NEXT: s_setpc_b64
270 ; VI: flat_load_ushort v{{[0-9]+}}
272 define void @load_flat_lo_v2i16_reghi_vreg(i16 addrspace(4)* %in, i32 %reg) #0 {
274 %reg.bc = bitcast i32 %reg to <2 x i16>
275 %load = load i16, i16 addrspace(4)* %in
276 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
277 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
281 ; GCN-LABEL: {{^}}load_flat_lo_v2f16_reghi_vreg:
283 ; GFX9-NEXT: flat_load_short_d16 v2, v[0:1]
284 ; GFX9-NEXT: s_waitcnt
285 ; GFX9-NEXT: global_store_dword v[0:1], v2
286 ; GFX9-NEXT: s_waitcnt
287 ; GFX9-NEXT: s_setpc_b64
289 ; VI: flat_load_ushort v{{[0-9]+}}
291 define void @load_flat_lo_v2f16_reghi_vreg(half addrspace(4)* %in, i32 %reg) #0 {
293 %reg.bc = bitcast i32 %reg to <2 x half>
294 %load = load half, half addrspace(4)* %in
295 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
296 store <2 x half> %build1, <2 x half> addrspace(1)* undef
300 ; GCN-LABEL: {{^}}load_flat_lo_v2i16_reglo_vreg_zexti8:
302 ; GFX9-NEXT: flat_load_ubyte_d16 v2, v[0:1]
303 ; GFX9-NEXT: s_waitcnt
304 ; GFX9-NEXT: global_store_dword v[0:1], v2
305 ; GFX9-NEXT: s_waitcnt
306 ; GFX9-NEXT: s_setpc_b64
308 ; VI: flat_load_ubyte v{{[0-9]+}}
310 define void @load_flat_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(4)* %in, i32 %reg) #0 {
312 %reg.bc = bitcast i32 %reg to <2 x i16>
313 %load = load i8, i8 addrspace(4)* %in
314 %ext = zext i8 %load to i16
315 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
316 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
320 ; GCN-LABEL: {{^}}load_flat_lo_v2i16_reglo_vreg_sexti8:
322 ; GFX9-NEXT: flat_load_sbyte_d16 v2, v[0:1]
323 ; GFX9-NEXT: s_waitcnt
324 ; GFX9-NEXT: global_store_dword v[0:1], v2
325 ; GFX9-NEXT: s_waitcnt
326 ; GFX9-NEXT: s_setpc_b64
328 ; VI: flat_load_sbyte v{{[0-9]+}}
329 ; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
331 define void @load_flat_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(4)* %in, i32 %reg) #0 {
333 %reg.bc = bitcast i32 %reg to <2 x i16>
334 %load = load i8, i8 addrspace(4)* %in
335 %ext = sext i8 %load to i16
336 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
337 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
341 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg:
343 ; GFX9: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094{{$}}
344 ; GFX9-NEXT: s_waitcnt
345 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
346 ; GFX9-NEXT: s_waitcnt
347 ; GFX9-NEXT: s_setpc_b64
349 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
350 define void @load_private_lo_v2i16_reglo_vreg(i16* byval %in, i32 %reg) #0 {
352 %reg.bc = bitcast i32 %reg to <2 x i16>
353 %gep = getelementptr inbounds i16, i16* %in, i64 2045
354 %load = load i16, i16* %gep
355 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
356 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
360 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reghi_vreg:
362 ; GFX9: buffer_load_ushort v1, off, s[0:3], s5 offset:4094{{$}}
363 ; GFX9-NEXT: s_waitcnt
365 ; GFX9: v_lshl_or_b32
367 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
368 ; GFX9-NEXT: s_waitcnt
369 ; GFX9-NEXT: s_setpc_b64
371 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
372 define void @load_private_lo_v2i16_reghi_vreg(i16* byval %in, i16 %reg) #0 {
374 %gep = getelementptr inbounds i16, i16* %in, i64 2045
375 %load = load i16, i16* %gep
376 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
377 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
378 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
382 ; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg:
384 ; GFX9: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094{{$}}
385 ; GFX9-NEXT: s_waitcnt
386 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
387 ; GFX9-NEXT: s_waitcnt
388 ; GFX9-NEXT: s_setpc_b64
390 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
391 define void @load_private_lo_v2f16_reglo_vreg(half* byval %in, i32 %reg) #0 {
393 %reg.bc = bitcast i32 %reg to <2 x half>
394 %gep = getelementptr inbounds half, half* %in, i64 2045
395 %load = load half, half* %gep
396 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
397 store <2 x half> %build1, <2 x half> addrspace(1)* undef
401 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff:
403 ; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}}
404 ; GFX9-NEXT: s_waitcnt
405 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
406 ; GFX9-NEXT: s_waitcnt
407 ; GFX9-NEXT: s_setpc_b64
409 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
410 define void @load_private_lo_v2i16_reglo_vreg_nooff(i16* %in, i32 %reg) #0 {
412 %reg.bc = bitcast i32 %reg to <2 x i16>
413 %load = load volatile i16, i16* inttoptr (i32 4094 to i16*)
414 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
415 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
419 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reghi_vreg_nooff:
421 ; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}}
422 ; GFX9-NEXT: s_waitcnt
423 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
424 ; GFX9-NEXT: s_waitcnt
425 ; GFX9-NEXT: s_setpc_b64
427 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
428 define void @load_private_lo_v2i16_reghi_vreg_nooff(i16* %in, i32 %reg) #0 {
430 %reg.bc = bitcast i32 %reg to <2 x i16>
431 %load = load volatile i16, i16* inttoptr (i32 4094 to i16*)
432 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
433 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
437 ; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_nooff:
439 ; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}}
440 ; GFX9-NEXT: s_waitcnt
441 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
442 ; GFX9-NEXT: s_waitcnt
443 ; GFX9-NEXT: s_setpc_b64
445 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
446 define void @load_private_lo_v2f16_reglo_vreg_nooff(half* %in, i32 %reg) #0 {
448 %reg.bc = bitcast i32 %reg to <2 x half>
449 %load = load volatile half, half* inttoptr (i32 4094 to half*)
450 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
451 store <2 x half> %build1, <2 x half> addrspace(1)* undef
455 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_zexti8:
457 ; GFX9: buffer_load_ubyte_d16 v0, off, s[0:3], s5 offset:4095{{$}}
458 ; GFX9-NEXT: s_waitcnt
459 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
460 ; GFX9-NEXT: s_waitcnt
461 ; GFX9-NEXT: s_setpc_b64
463 ; VI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
464 define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8* byval %in, i32 %reg) #0 {
466 %reg.bc = bitcast i32 %reg to <2 x i16>
467 %gep = getelementptr inbounds i8, i8* %in, i64 4091
468 %load = load i8, i8* %gep
469 %ext = zext i8 %load to i16
470 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
471 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
475 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_sexti8:
477 ; GFX9: buffer_load_sbyte_d16 v0, off, s[0:3], s5 offset:4095{{$}}
478 ; GFX9-NEXT: s_waitcnt
479 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
480 ; GFX9-NEXT: s_waitcnt
481 ; GFX9-NEXT: s_setpc_b64
483 ; VI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
484 define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8* byval %in, i32 %reg) #0 {
486 %reg.bc = bitcast i32 %reg to <2 x i16>
487 %gep = getelementptr inbounds i8, i8* %in, i64 4091
488 %load = load i8, i8* %gep
489 %ext = sext i8 %load to i16
490 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
491 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
495 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
497 ; GFX9-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s4 offset:4094{{$}}
498 ; GFX9-NEXT: s_waitcnt
499 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
500 ; GFX9-NEXT: s_waitcnt
501 ; GFX9-NEXT: s_setpc_b64
503 ; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
504 define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8* %in, i32 %reg) #0 {
506 %reg.bc = bitcast i32 %reg to <2 x i16>
507 %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
508 %ext = zext i8 %load to i16
509 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
510 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
514 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
516 ; GFX9-NEXT: buffer_load_sbyte_d16 v1, off, s[0:3], s4 offset:4094{{$}}
517 ; GFX9-NEXT: s_waitcnt
518 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
519 ; GFX9-NEXT: s_waitcnt
520 ; GFX9-NEXT: s_setpc_b64
522 ; VI: buffer_load_sbyte v0, off, s[0:3], s4 offset:4094{{$}}
523 define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8* %in, i32 %reg) #0 {
525 %reg.bc = bitcast i32 %reg to <2 x i16>
526 %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
527 %ext = sext i8 %load to i16
528 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
529 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
533 ; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
535 ; GFX9-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s4 offset:4094{{$}}
536 ; GFX9-NEXT: s_waitcnt
537 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
538 ; GFX9-NEXT: s_waitcnt
539 ; GFX9-NEXT: s_setpc_b64
541 ; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
542 define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8* %in, i32 %reg) #0 {
544 %reg.bc = bitcast i32 %reg to <2 x half>
545 %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
546 %ext = zext i8 %load to i16
547 %bc.ext = bitcast i16 %ext to half
548 %build1 = insertelement <2 x half> %reg.bc, half %bc.ext, i32 0
549 store <2 x half> %build1, <2 x half> addrspace(1)* undef
553 ; GCN-LABEL: {{^}}load_constant_lo_v2i16_reglo_vreg:
555 ; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
556 ; GFX9-NEXT: s_waitcnt
557 ; GFX9-NEXT: global_store_dword
558 ; GFX9-NEXT: s_waitcnt
559 ; GFX9-NEXT: s_setpc_b64
561 ; VI: flat_load_ushort
562 define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(2)* %in, i32 %reg) #0 {
564 %reg.bc = bitcast i32 %reg to <2 x i16>
565 %gep = getelementptr inbounds i16, i16 addrspace(2)* %in, i64 -2047
566 %load = load i16, i16 addrspace(2)* %gep
567 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
568 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
572 ; GCN-LABEL: load_constant_lo_v2f16_reglo_vreg
574 ; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
575 ; GFX9-NEXT: s_waitcnt
576 ; GFX9-NEXT: global_store_dword
577 ; GFX9-NEXT: s_waitcnt
578 ; GFX9-NEXT: s_setpc_b64
580 ; VI: flat_load_ushort
581 define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(2)* %in, i32 %reg) #0 {
583 %reg.bc = bitcast i32 %reg to <2 x half>
584 %gep = getelementptr inbounds half, half addrspace(2)* %in, i64 -2047
585 %load = load half, half addrspace(2)* %gep
586 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
587 store <2 x half> %build1, <2 x half> addrspace(1)* undef
591 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_to_offset:
592 ; GFX9: buffer_store_dword
593 ; GFX9-NEXT: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094
595 ; VI: buffer_load_ushort v
596 define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 {
598 %obj0 = alloca [10 x i32], align 4
599 %obj1 = alloca [4096 x i16], align 2
600 %reg.bc = bitcast i32 %reg to <2 x i16>
601 %bc = bitcast [10 x i32]* %obj0 to i32*
602 store volatile i32 123, i32* %bc
603 %gep = getelementptr inbounds [4096 x i16], [4096 x i16]* %obj1, i32 0, i32 2025
604 %load = load volatile i16, i16* %gep
605 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
606 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
610 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
611 ; GFX9: buffer_store_dword
612 ; GFX9-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s5 offset:4095
614 ; VI: buffer_load_sbyte v
615 define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
617 %obj0 = alloca [10 x i32], align 4
618 %obj1 = alloca [4096 x i8], align 2
619 %reg.bc = bitcast i32 %reg to <2 x i16>
620 %bc = bitcast [10 x i32]* %obj0 to i32*
621 store volatile i32 123, i32* %bc
622 %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
623 %load = load volatile i8, i8* %gep
624 %load.ext = sext i8 %load to i16
625 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
626 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
630 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
631 ; GFX9: buffer_store_dword
632 ; GFX9-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s5 offset:4095
634 ; VI: buffer_load_ubyte v
635 define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
637 %obj0 = alloca [10 x i32], align 4
638 %obj1 = alloca [4096 x i8], align 2
639 %reg.bc = bitcast i32 %reg to <2 x i16>
640 %bc = bitcast [10 x i32]* %obj0 to i32*
641 store volatile i32 123, i32* %bc
642 %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
643 %load = load volatile i8, i8* %gep
644 %load.ext = zext i8 %load to i16
645 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
646 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
650 attributes #0 = { nounwind }