1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
4 ; GCN-LABEL: {{^}}load_local_hi_v2i16_undeflo:
6 ; GFX9-NEXT: ds_read_u16_d16_hi v0, v0
8 ; GFX9-NEXT: s_setpc_b64
11 define <2 x i16> @load_local_hi_v2i16_undeflo(i16 addrspace(3)* %in) #0 {
13 %load = load i16, i16 addrspace(3)* %in
14 %build = insertelement <2 x i16> undef, i16 %load, i32 1
18 ; GCN-LABEL: {{^}}load_local_hi_v2i16_reglo:
20 ; GFX9-NEXT: ds_read_u16_d16_hi v1, v0
21 ; GFX9-NEXT: s_waitcnt
22 ; GFX9-NEXT: v_mov_b32_e32 v0, v1
23 ; GFX9-NEXT: s_setpc_b64
26 define <2 x i16> @load_local_hi_v2i16_reglo(i16 addrspace(3)* %in, i16 %reg) #0 {
28 %load = load i16, i16 addrspace(3)* %in
29 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
30 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
34 ; Show that we get reasonable regalloc without physreg constraints.
35 ; GCN-LABEL: {{^}}load_local_hi_v2i16_reglo_vreg:
37 ; GFX9-NEXT: ds_read_u16_d16_hi v1, v0
38 ; GFX9-NEXT: s_waitcnt
39 ; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}}
40 ; GFX9-NEXT: s_waitcnt
41 ; GFX9-NEXT: s_setpc_b64
44 define void @load_local_hi_v2i16_reglo_vreg(i16 addrspace(3)* %in, i16 %reg) #0 {
46 %load = load i16, i16 addrspace(3)* %in
47 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
48 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
49 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
53 ; GCN-LABEL: {{^}}load_local_hi_v2i16_zerolo:
55 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
56 ; GFX9-NEXT: ds_read_u16_d16_hi v1, v0
57 ; GFX9-NEXT: s_waitcnt
58 ; GFX9-NEXT: v_mov_b32_e32 v0, v1
59 ; GFX9-NEXT: s_setpc_b64
62 define <2 x i16> @load_local_hi_v2i16_zerolo(i16 addrspace(3)* %in) #0 {
64 %load = load i16, i16 addrspace(3)* %in
65 %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 1
69 ; FIXME: Remove m0 initialization
70 ; GCN-LABEL: {{^}}load_local_hi_v2i16_zerolo_shift:
72 ; GFX9-NEXT: s_mov_b32 m0, -1
73 ; GFX9-NEXT: ds_read_u16 v0, v0
74 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
75 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
76 ; GFX9-NEXT: s_setpc_b64
79 ; VI: v_lshlrev_b32_e32 v0, 16, v0
80 define i32 @load_local_hi_v2i16_zerolo_shift(i16 addrspace(3)* %in) #0 {
82 %load = load i16, i16 addrspace(3)* %in
83 %zext = zext i16 %load to i32
84 %shift = shl i32 %zext, 16
88 ; GCN-LABEL: {{^}}load_local_hi_v2f16_reglo_vreg:
90 ; GFX9-NEXT: ds_read_u16_d16_hi v1, v0
91 ; GFX9-NEXT: s_waitcnt
92 ; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}}
93 ; GFX9-NEXT: s_waitcnt
94 ; GFX9-NEXT: s_setpc_b64
97 define void @load_local_hi_v2f16_reglo_vreg(half addrspace(3)* %in, half %reg) #0 {
99 %load = load half, half addrspace(3)* %in
100 %build0 = insertelement <2 x half> undef, half %reg, i32 0
101 %build1 = insertelement <2 x half> %build0, half %load, i32 1
102 store <2 x half> %build1, <2 x half> addrspace(1)* undef
106 ; GCN-LABEL: {{^}}load_local_hi_v2i16_reglo_vreg_zexti8:
108 ; GFX9-NEXT: ds_read_u8_d16_hi v1, v0
109 ; GFX9-NEXT: s_waitcnt
110 ; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}}
111 ; GFX9-NEXT: s_waitcnt
112 ; GFX9-NEXT: s_setpc_b64
115 define void @load_local_hi_v2i16_reglo_vreg_zexti8(i8 addrspace(3)* %in, i16 %reg) #0 {
117 %load = load i8, i8 addrspace(3)* %in
118 %ext = zext i8 %load to i16
119 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
120 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
121 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
125 ; GCN-LABEL: {{^}}load_local_hi_v2i16_reglo_vreg_sexti8:
127 ; GFX9-NEXT: ds_read_i8_d16_hi v1, v0
128 ; GFX9-NEXT: s_waitcnt
129 ; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}}
130 ; GFX9-NEXT: s_waitcnt
131 ; GFX9-NEXT: s_setpc_b64
134 define void @load_local_hi_v2i16_reglo_vreg_sexti8(i8 addrspace(3)* %in, i16 %reg) #0 {
136 %load = load i8, i8 addrspace(3)* %in
137 %ext = sext i8 %load to i16
138 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
139 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
140 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
144 ; GCN-LABEL: {{^}}load_global_hi_v2i16_reglo_vreg:
146 ; GFX9-NEXT: global_load_short_d16_hi v2, v[0:1], off offset:-4094
147 ; GFX9-NEXT: s_waitcnt
148 ; GFX9-NEXT: global_store_dword
149 ; GFX9-NEXT: s_waitcnt
150 ; GFX9-NEXT: s_setpc_b64
151 define void @load_global_hi_v2i16_reglo_vreg(i16 addrspace(1)* %in, i16 %reg) #0 {
153 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047
154 %load = load i16, i16 addrspace(1)* %gep
155 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
156 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
157 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
161 ; GCN-LABEL: {{^}}load_global_hi_v2f16_reglo_vreg:
163 ; GFX9-NEXT: global_load_short_d16_hi v2, v[0:1], off offset:-4094
164 ; GFX9-NEXT: s_waitcnt
165 ; GFX9-NEXT: global_store_dword
166 ; GFX9-NEXT: s_waitcnt
167 ; GFX9-NEXT: s_setpc_b64
168 define void @load_global_hi_v2f16_reglo_vreg(half addrspace(1)* %in, half %reg) #0 {
170 %gep = getelementptr inbounds half, half addrspace(1)* %in, i64 -2047
171 %load = load half, half addrspace(1)* %gep
172 %build0 = insertelement <2 x half> undef, half %reg, i32 0
173 %build1 = insertelement <2 x half> %build0, half %load, i32 1
174 store <2 x half> %build1, <2 x half> addrspace(1)* undef
178 ; GCN-LABEL: {{^}}load_global_hi_v2i16_reglo_vreg_zexti8:
180 ; GFX9-NEXT: global_load_ubyte_d16_hi v2, v[0:1], off offset:-4095
181 ; GFX9-NEXT: s_waitcnt
182 ; GFX9-NEXT: global_store_dword
183 ; GFX9-NEXT: s_waitcnt
184 ; GFX9-NEXT: s_setpc_b64
185 define void @load_global_hi_v2i16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i16 %reg) #0 {
187 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
188 %load = load i8, i8 addrspace(1)* %gep
189 %ext = zext i8 %load to i16
190 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
191 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
192 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
196 ; GCN-LABEL: {{^}}load_global_hi_v2i16_reglo_vreg_sexti8:
198 ; GFX9-NEXT: global_load_sbyte_d16_hi v2, v[0:1], off offset:-4095
199 ; GFX9-NEXT: s_waitcnt
200 ; GFX9-NEXT: global_store_dword
201 ; GFX9-NEXT: s_waitcnt
202 ; GFX9-NEXT: s_setpc_b64
203 define void @load_global_hi_v2i16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i16 %reg) #0 {
205 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
206 %load = load i8, i8 addrspace(1)* %gep
207 %ext = sext i8 %load to i16
208 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
209 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
210 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
214 ; GCN-LABEL: load_flat_hi_v2i16_reglo_vreg:
216 ; GFX9-NEXT: flat_load_short_d16_hi v2, v[0:1]
217 ; GFX9-NEXT: s_waitcnt
218 ; GFX9-NEXT: global_store_dword v[0:1], v2
219 ; GFX9-NEXT: s_waitcnt
220 ; GFX9-NEXT: s_setpc_b64
222 ; VI: flat_load_ushort v{{[0-9]+}}
223 ; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
225 define void @load_flat_hi_v2i16_reglo_vreg(i16 addrspace(4)* %in, i16 %reg) #0 {
227 %load = load i16, i16 addrspace(4)* %in
228 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
229 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
230 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
234 ; GCN-LABEL: {{^}}load_flat_hi_v2f16_reglo_vreg:
236 ; GFX9-NEXT: flat_load_short_d16_hi v2, v[0:1]
237 ; GFX9-NEXT: s_waitcnt
238 ; GFX9-NEXT: global_store_dword v[0:1], v2
239 ; GFX9-NEXT: s_waitcnt
240 ; GFX9-NEXT: s_setpc_b64
242 ; VI: flat_load_ushort v{{[0-9]+}}
243 ; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
245 define void @load_flat_hi_v2f16_reglo_vreg(half addrspace(4)* %in, half %reg) #0 {
247 %load = load half, half addrspace(4)* %in
248 %build0 = insertelement <2 x half> undef, half %reg, i32 0
249 %build1 = insertelement <2 x half> %build0, half %load, i32 1
250 store <2 x half> %build1, <2 x half> addrspace(1)* undef
254 ; GCN-LABEL: {{^}}load_flat_hi_v2i16_reglo_vreg_zexti8:
256 ; GFX9-NEXT: flat_load_ubyte_d16_hi v2, v[0:1]
257 ; GFX9-NEXT: s_waitcnt
258 ; GFX9-NEXT: global_store_dword v[0:1], v2
259 ; GFX9-NEXT: s_waitcnt
260 ; GFX9-NEXT: s_setpc_b64
262 ; VI: flat_load_ubyte v{{[0-9]+}}
263 ; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
265 define void @load_flat_hi_v2i16_reglo_vreg_zexti8(i8 addrspace(4)* %in, i16 %reg) #0 {
267 %load = load i8, i8 addrspace(4)* %in
268 %ext = zext i8 %load to i16
269 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
270 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
271 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
275 ; GCN-LABEL: {{^}}load_flat_hi_v2i16_reglo_vreg_sexti8:
277 ; GFX9-NEXT: flat_load_sbyte_d16_hi v2, v[0:1]
278 ; GFX9-NEXT: s_waitcnt
279 ; GFX9-NEXT: global_store_dword v[0:1], v2
280 ; GFX9-NEXT: s_waitcnt
281 ; GFX9-NEXT: s_setpc_b64
283 ; VI: flat_load_sbyte v{{[0-9]+}}
284 ; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
286 define void @load_flat_hi_v2i16_reglo_vreg_sexti8(i8 addrspace(4)* %in, i16 %reg) #0 {
288 %load = load i8, i8 addrspace(4)* %in
289 %ext = sext i8 %load to i16
290 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
291 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
292 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
296 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg:
298 ; GFX9-NEXT: buffer_load_short_d16_hi v1, v0, s[0:3], s4 offen offset:4094{{$}}
299 ; GFX9-NEXT: s_waitcnt
300 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
301 ; GFX9-NEXT: s_waitcnt
302 ; GFX9-NEXT: s_setpc_b64
304 ; VI: buffer_load_ushort v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4094{{$}}
305 define void @load_private_hi_v2i16_reglo_vreg(i16* %in, i16 %reg) #0 {
307 %gep = getelementptr inbounds i16, i16* %in, i64 2047
308 %load = load i16, i16* %gep
309 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
310 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
311 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
315 ; GCN-LABEL: {{^}}load_private_hi_v2f16_reglo_vreg:
317 ; GFX9-NEXT: buffer_load_short_d16_hi v1, v0, s[0:3], s4 offen offset:4094{{$}}
318 ; GFX9-NEXT: s_waitcnt
319 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
320 ; GFX9-NEXT: s_waitcnt
321 ; GFX9-NEXT: s_setpc_b64
323 ; VI: buffer_load_ushort v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4094{{$}}
324 define void @load_private_hi_v2f16_reglo_vreg(half* %in, half %reg) #0 {
326 %gep = getelementptr inbounds half, half* %in, i64 2047
327 %load = load half, half* %gep
328 %build0 = insertelement <2 x half> undef, half %reg, i32 0
329 %build1 = insertelement <2 x half> %build0, half %load, i32 1
330 store <2 x half> %build1, <2 x half> addrspace(1)* undef
334 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_nooff:
336 ; GFX9-NEXT: buffer_load_short_d16_hi v1, off, s[0:3], s4 offset:4094{{$}}
337 ; GFX9-NEXT: s_waitcnt
338 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
339 ; GFX9-NEXT: s_waitcnt
340 ; GFX9-NEXT: s_setpc_b64
342 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
343 define void @load_private_hi_v2i16_reglo_vreg_nooff(i16* %in, i16 %reg) #0 {
345 %load = load volatile i16, i16* inttoptr (i32 4094 to i16*)
346 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
347 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
348 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
352 ; GCN-LABEL: {{^}}load_private_hi_v2f16_reglo_vreg_nooff:
354 ; GFX9-NEXT: buffer_load_short_d16_hi v1, off, s[0:3], s4 offset:4094{{$}}
355 ; GFX9-NEXT: s_waitcnt
356 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
357 ; GFX9-NEXT: s_waitcnt
358 ; GFX9-NEXT: s_setpc_b64
360 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
361 define void @load_private_hi_v2f16_reglo_vreg_nooff(half* %in, half %reg) #0 {
363 %load = load volatile half, half* inttoptr (i32 4094 to half*)
364 %build0 = insertelement <2 x half> undef, half %reg, i32 0
365 %build1 = insertelement <2 x half> %build0, half %load, i32 1
366 store <2 x half> %build1, <2 x half> addrspace(1)* undef
370 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_zexti8:
372 ; GFX9-NEXT: buffer_load_ubyte_d16_hi v1, v0, s[0:3], s4 offen offset:2047{{$}}
373 ; GFX9-NEXT: s_waitcnt
374 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
375 ; GFX9-NEXT: s_waitcnt
376 ; GFX9-NEXT: s_setpc_b64
378 ; VI: buffer_load_ubyte v{{[0-9]+}}, v0, s[0:3], s4 offen offset:2047{{$}}
379 define void @load_private_hi_v2i16_reglo_vreg_zexti8(i8* %in, i16 %reg) #0 {
381 %gep = getelementptr inbounds i8, i8* %in, i64 2047
382 %load = load i8, i8* %gep
383 %ext = zext i8 %load to i16
384 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
385 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
386 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
390 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_sexti8:
392 ; GFX9-NEXT: buffer_load_sbyte_d16_hi v1, v0, s[0:3], s4 offen offset:2047{{$}}
393 ; GFX9-NEXT: s_waitcnt
394 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
395 ; GFX9-NEXT: s_waitcnt
396 ; GFX9-NEXT: s_setpc_b64
398 ; VI: buffer_load_sbyte v{{[0-9]+}}, v0, s[0:3], s4 offen offset:2047{{$}}
399 define void @load_private_hi_v2i16_reglo_vreg_sexti8(i8* %in, i16 %reg) #0 {
401 %gep = getelementptr inbounds i8, i8* %in, i64 2047
402 %load = load i8, i8* %gep
403 %ext = sext i8 %load to i16
404 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
405 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
406 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
410 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_nooff_zexti8:
412 ; GFX9-NEXT: buffer_load_ubyte_d16_hi v1, off, s[0:3], s4 offset:4094{{$}}
413 ; GFX9-NEXT: s_waitcnt
414 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
415 ; GFX9-NEXT: s_waitcnt
416 ; GFX9-NEXT: s_setpc_b64
418 ; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
419 define void @load_private_hi_v2i16_reglo_vreg_nooff_zexti8(i8* %in, i16 %reg) #0 {
421 %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
422 %ext = zext i8 %load to i16
423 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
424 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
425 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
429 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_nooff_sexti8:
431 ; GFX9-NEXT: buffer_load_sbyte_d16_hi v1, off, s[0:3], s4 offset:4094{{$}}
432 ; GFX9-NEXT: s_waitcnt
433 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
434 ; GFX9-NEXT: s_waitcnt
435 ; GFX9-NEXT: s_setpc_b64
437 ; VI: buffer_load_sbyte v0, off, s[0:3], s4 offset:4094{{$}}
438 define void @load_private_hi_v2i16_reglo_vreg_nooff_sexti8(i8* %in, i16 %reg) #0 {
440 %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
441 %ext = sext i8 %load to i16
442 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
443 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
444 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
448 ; GCN-LABEL: {{^}}load_private_hi_v2f16_reglo_vreg_nooff_zexti8:
450 ; GFX9-NEXT: buffer_load_ubyte_d16_hi v1, off, s[0:3], s4 offset:4094{{$}}
451 ; GFX9-NEXT: s_waitcnt
452 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
453 ; GFX9-NEXT: s_waitcnt
454 ; GFX9-NEXT: s_setpc_b64
456 ; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
457 define void @load_private_hi_v2f16_reglo_vreg_nooff_zexti8(i8* %in, half %reg) #0 {
459 %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
460 %ext = zext i8 %load to i16
461 %bc.ext = bitcast i16 %ext to half
462 %build0 = insertelement <2 x half> undef, half %reg, i32 0
463 %build1 = insertelement <2 x half> %build0, half %bc.ext, i32 1
464 store <2 x half> %build1, <2 x half> addrspace(1)* undef
468 ; GCN-LABEL: {{^}}load_constant_hi_v2i16_reglo_vreg:
470 ; GFX9-NEXT: global_load_short_d16_hi v2, v[0:1], off offset:-4094
471 ; GFX9-NEXT: s_waitcnt
472 ; GFX9-NEXT: global_store_dword
473 ; GFX9-NEXT: s_waitcnt
474 ; GFX9-NEXT: s_setpc_b64
476 ; VI: flat_load_ushort
477 define void @load_constant_hi_v2i16_reglo_vreg(i16 addrspace(2)* %in, i16 %reg) #0 {
479 %gep = getelementptr inbounds i16, i16 addrspace(2)* %in, i64 -2047
480 %load = load i16, i16 addrspace(2)* %gep
481 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
482 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
483 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
487 ; GCN-LABEL: load_constant_hi_v2f16_reglo_vreg
489 ; GFX9-NEXT: global_load_short_d16_hi v2, v[0:1], off offset:-4094
490 ; GFX9-NEXT: s_waitcnt
491 ; GFX9-NEXT: global_store_dword
492 ; GFX9-NEXT: s_waitcnt
493 ; GFX9-NEXT: s_setpc_b64
495 ; VI: flat_load_ushort
496 define void @load_constant_hi_v2f16_reglo_vreg(half addrspace(2)* %in, half %reg) #0 {
498 %gep = getelementptr inbounds half, half addrspace(2)* %in, i64 -2047
499 %load = load half, half addrspace(2)* %gep
500 %build0 = insertelement <2 x half> undef, half %reg, i32 0
501 %build1 = insertelement <2 x half> %build0, half %load, i32 1
502 store <2 x half> %build1, <2 x half> addrspace(1)* undef
506 attributes #0 = { nounwind }