1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906,NO-D16-HI %s
4 ; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX803,NO-D16-HI %s
6 define <2 x i16> @load_local_lo_v2i16_undeflo(i16 addrspace(3)* %in) #0 {
7 ; GFX900-LABEL: load_local_lo_v2i16_undeflo:
8 ; GFX900: ; %bb.0: ; %entry
9 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10 ; GFX900-NEXT: ds_read_u16_d16 v0, v0
11 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
12 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14 ; GFX906-LABEL: load_local_lo_v2i16_undeflo:
15 ; GFX906: ; %bb.0: ; %entry
16 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17 ; GFX906-NEXT: ds_read_u16 v0, v0
18 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
19 ; GFX906-NEXT: s_setpc_b64 s[30:31]
21 ; GFX803-LABEL: load_local_lo_v2i16_undeflo:
22 ; GFX803: ; %bb.0: ; %entry
23 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX803-NEXT: s_mov_b32 m0, -1
25 ; GFX803-NEXT: ds_read_u16 v0, v0
26 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
27 ; GFX803-NEXT: s_setpc_b64 s[30:31]
29 %load = load i16, i16 addrspace(3)* %in
30 %build = insertelement <2 x i16> undef, i16 %load, i32 0
34 define <2 x i16> @load_local_lo_v2i16_reglo(i16 addrspace(3)* %in, i16 %reg) #0 {
35 ; GFX900-LABEL: load_local_lo_v2i16_reglo:
36 ; GFX900: ; %bb.0: ; %entry
37 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38 ; GFX900-NEXT: ds_read_u16 v0, v0
39 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
40 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
41 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
42 ; GFX900-NEXT: s_setpc_b64 s[30:31]
44 ; GFX906-LABEL: load_local_lo_v2i16_reglo:
45 ; GFX906: ; %bb.0: ; %entry
46 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47 ; GFX906-NEXT: ds_read_u16 v0, v0
48 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
49 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
50 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
51 ; GFX906-NEXT: s_setpc_b64 s[30:31]
53 ; GFX803-LABEL: load_local_lo_v2i16_reglo:
54 ; GFX803: ; %bb.0: ; %entry
55 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56 ; GFX803-NEXT: s_mov_b32 m0, -1
57 ; GFX803-NEXT: ds_read_u16 v0, v0
58 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
59 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
60 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
61 ; GFX803-NEXT: s_setpc_b64 s[30:31]
63 %load = load i16, i16 addrspace(3)* %in
64 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
65 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
69 ; Show that we get reasonable regalloc without physreg constraints.
70 define void @load_local_lo_v2i16_reglo_vreg(i16 addrspace(3)* %in, i16 %reg) #0 {
71 ; GFX900-LABEL: load_local_lo_v2i16_reglo_vreg:
72 ; GFX900: ; %bb.0: ; %entry
73 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74 ; GFX900-NEXT: ds_read_u16 v0, v0
75 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
76 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
77 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
78 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
79 ; GFX900-NEXT: s_waitcnt vmcnt(0)
80 ; GFX900-NEXT: s_setpc_b64 s[30:31]
82 ; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg:
83 ; GFX906: ; %bb.0: ; %entry
84 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX906-NEXT: ds_read_u16 v0, v0
86 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
87 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
88 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
89 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
90 ; GFX906-NEXT: s_waitcnt vmcnt(0)
91 ; GFX906-NEXT: s_setpc_b64 s[30:31]
93 ; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg:
94 ; GFX803: ; %bb.0: ; %entry
95 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96 ; GFX803-NEXT: s_mov_b32 m0, -1
97 ; GFX803-NEXT: ds_read_u16 v0, v0
98 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
99 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
100 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
101 ; GFX803-NEXT: flat_store_dword v[0:1], v0
102 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
103 ; GFX803-NEXT: s_setpc_b64 s[30:31]
105 %load = load i16, i16 addrspace(3)* %in
106 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
107 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
108 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
112 define <2 x i16> @load_local_lo_v2i16_zerolo(i16 addrspace(3)* %in) #0 {
113 ; GFX900-LABEL: load_local_lo_v2i16_zerolo:
114 ; GFX900: ; %bb.0: ; %entry
115 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116 ; GFX900-NEXT: v_mov_b32_e32 v1, 0
117 ; GFX900-NEXT: ds_read_u16_d16 v1, v0
118 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
119 ; GFX900-NEXT: v_mov_b32_e32 v0, v1
120 ; GFX900-NEXT: s_setpc_b64 s[30:31]
122 ; GFX906-LABEL: load_local_lo_v2i16_zerolo:
123 ; GFX906: ; %bb.0: ; %entry
124 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125 ; GFX906-NEXT: ds_read_u16 v0, v0
126 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
127 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
128 ; GFX906-NEXT: s_setpc_b64 s[30:31]
130 ; GFX803-LABEL: load_local_lo_v2i16_zerolo:
131 ; GFX803: ; %bb.0: ; %entry
132 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133 ; GFX803-NEXT: s_mov_b32 m0, -1
134 ; GFX803-NEXT: ds_read_u16 v0, v0
135 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
136 ; GFX803-NEXT: s_setpc_b64 s[30:31]
138 %load = load i16, i16 addrspace(3)* %in
139 %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 0
143 define <2 x half> @load_local_lo_v2f16_fpimm(half addrspace(3)* %in) #0 {
144 ; GFX900-LABEL: load_local_lo_v2f16_fpimm:
145 ; GFX900: ; %bb.0: ; %entry
146 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147 ; GFX900-NEXT: v_mov_b32_e32 v1, 2.0
148 ; GFX900-NEXT: ds_read_u16_d16 v1, v0
149 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
150 ; GFX900-NEXT: v_mov_b32_e32 v0, v1
151 ; GFX900-NEXT: s_setpc_b64 s[30:31]
153 ; GFX906-LABEL: load_local_lo_v2f16_fpimm:
154 ; GFX906: ; %bb.0: ; %entry
155 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156 ; GFX906-NEXT: ds_read_u16 v0, v0
157 ; GFX906-NEXT: s_movk_i32 s4, 0x4000
158 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
159 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
160 ; GFX906-NEXT: v_lshl_or_b32 v0, s4, 16, v0
161 ; GFX906-NEXT: s_setpc_b64 s[30:31]
163 ; GFX803-LABEL: load_local_lo_v2f16_fpimm:
164 ; GFX803: ; %bb.0: ; %entry
165 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166 ; GFX803-NEXT: s_mov_b32 m0, -1
167 ; GFX803-NEXT: ds_read_u16 v0, v0
168 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
169 ; GFX803-NEXT: v_or_b32_e32 v0, 2.0, v0
170 ; GFX803-NEXT: s_setpc_b64 s[30:31]
172 %load = load half, half addrspace(3)* %in
173 %build = insertelement <2 x half> <half 0.0, half 2.0>, half %load, i32 0
174 ret <2 x half> %build
177 define void @load_local_lo_v2f16_reghi_vreg(half addrspace(3)* %in, i32 %reg) #0 {
178 ; GFX900-LABEL: load_local_lo_v2f16_reghi_vreg:
179 ; GFX900: ; %bb.0: ; %entry
180 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181 ; GFX900-NEXT: ds_read_u16_d16 v1, v0
182 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
183 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
184 ; GFX900-NEXT: s_waitcnt vmcnt(0)
185 ; GFX900-NEXT: s_setpc_b64 s[30:31]
187 ; GFX906-LABEL: load_local_lo_v2f16_reghi_vreg:
188 ; GFX906: ; %bb.0: ; %entry
189 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190 ; GFX906-NEXT: ds_read_u16 v0, v0
191 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v1
192 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
193 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
194 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
195 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
196 ; GFX906-NEXT: s_waitcnt vmcnt(0)
197 ; GFX906-NEXT: s_setpc_b64 s[30:31]
199 ; GFX803-LABEL: load_local_lo_v2f16_reghi_vreg:
200 ; GFX803: ; %bb.0: ; %entry
201 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202 ; GFX803-NEXT: s_mov_b32 m0, -1
203 ; GFX803-NEXT: ds_read_u16 v0, v0
204 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
205 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
206 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
207 ; GFX803-NEXT: flat_store_dword v[0:1], v0
208 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
209 ; GFX803-NEXT: s_setpc_b64 s[30:31]
211 %reg.bc = bitcast i32 %reg to <2 x half>
212 %load = load half, half addrspace(3)* %in
213 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
214 store <2 x half> %build1, <2 x half> addrspace(1)* undef
218 define void @load_local_lo_v2f16_reglo_vreg(half addrspace(3)* %in, half %reg) #0 {
219 ; GFX900-LABEL: load_local_lo_v2f16_reglo_vreg:
220 ; GFX900: ; %bb.0: ; %entry
221 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222 ; GFX900-NEXT: ds_read_u16 v0, v0
223 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
224 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
225 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
226 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
227 ; GFX900-NEXT: s_waitcnt vmcnt(0)
228 ; GFX900-NEXT: s_setpc_b64 s[30:31]
230 ; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg:
231 ; GFX906: ; %bb.0: ; %entry
232 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
233 ; GFX906-NEXT: ds_read_u16 v0, v0
234 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
235 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
236 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
237 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
238 ; GFX906-NEXT: s_waitcnt vmcnt(0)
239 ; GFX906-NEXT: s_setpc_b64 s[30:31]
241 ; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg:
242 ; GFX803: ; %bb.0: ; %entry
243 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244 ; GFX803-NEXT: s_mov_b32 m0, -1
245 ; GFX803-NEXT: ds_read_u16 v0, v0
246 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
247 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
248 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
249 ; GFX803-NEXT: flat_store_dword v[0:1], v0
250 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
251 ; GFX803-NEXT: s_setpc_b64 s[30:31]
253 %load = load half, half addrspace(3)* %in
254 %build0 = insertelement <2 x half> undef, half %reg, i32 1
255 %build1 = insertelement <2 x half> %build0, half %load, i32 0
256 store <2 x half> %build1, <2 x half> addrspace(1)* undef
260 define void @load_local_lo_v2i16_reghi_vreg_zexti8(i8 addrspace(3)* %in, i32 %reg) #0 {
261 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8:
262 ; GFX900: ; %bb.0: ; %entry
263 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264 ; GFX900-NEXT: ds_read_u8_d16 v1, v0
265 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
266 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
267 ; GFX900-NEXT: s_waitcnt vmcnt(0)
268 ; GFX900-NEXT: s_setpc_b64 s[30:31]
270 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8:
271 ; GFX906: ; %bb.0: ; %entry
272 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; GFX906-NEXT: ds_read_u8 v0, v0
274 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
275 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
276 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
277 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
278 ; GFX906-NEXT: s_waitcnt vmcnt(0)
279 ; GFX906-NEXT: s_setpc_b64 s[30:31]
281 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8:
282 ; GFX803: ; %bb.0: ; %entry
283 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
284 ; GFX803-NEXT: s_mov_b32 m0, -1
285 ; GFX803-NEXT: ds_read_u8 v0, v0
286 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1
287 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
288 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
289 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
290 ; GFX803-NEXT: flat_store_dword v[0:1], v0
291 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
292 ; GFX803-NEXT: s_setpc_b64 s[30:31]
294 %reg.bc = bitcast i32 %reg to <2 x i16>
295 %load = load i8, i8 addrspace(3)* %in
296 %ext = zext i8 %load to i16
297 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
298 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
302 define void @load_local_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(3)* %in, i16 %reg) #0 {
303 ; GFX900-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8:
304 ; GFX900: ; %bb.0: ; %entry
305 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306 ; GFX900-NEXT: ds_read_u8 v0, v0
307 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
308 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
309 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
310 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
311 ; GFX900-NEXT: s_waitcnt vmcnt(0)
312 ; GFX900-NEXT: s_setpc_b64 s[30:31]
314 ; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8:
315 ; GFX906: ; %bb.0: ; %entry
316 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
317 ; GFX906-NEXT: ds_read_u8 v0, v0
318 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
319 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
320 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
321 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
322 ; GFX906-NEXT: s_waitcnt vmcnt(0)
323 ; GFX906-NEXT: s_setpc_b64 s[30:31]
325 ; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8:
326 ; GFX803: ; %bb.0: ; %entry
327 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328 ; GFX803-NEXT: s_mov_b32 m0, -1
329 ; GFX803-NEXT: ds_read_u8 v0, v0
330 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
331 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
332 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
333 ; GFX803-NEXT: flat_store_dword v[0:1], v0
334 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
335 ; GFX803-NEXT: s_setpc_b64 s[30:31]
337 %load = load i8, i8 addrspace(3)* %in
338 %ext = zext i8 %load to i16
339 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
340 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0
341 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
345 define void @load_local_lo_v2i16_reghi_vreg_sexti8(i8 addrspace(3)* %in, i32 %reg) #0 {
346 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8:
347 ; GFX900: ; %bb.0: ; %entry
348 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349 ; GFX900-NEXT: ds_read_i8_d16 v1, v0
350 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
351 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
352 ; GFX900-NEXT: s_waitcnt vmcnt(0)
353 ; GFX900-NEXT: s_setpc_b64 s[30:31]
355 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8:
356 ; GFX906: ; %bb.0: ; %entry
357 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358 ; GFX906-NEXT: ds_read_i8 v0, v0
359 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
360 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
361 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
362 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
363 ; GFX906-NEXT: s_waitcnt vmcnt(0)
364 ; GFX906-NEXT: s_setpc_b64 s[30:31]
366 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8:
367 ; GFX803: ; %bb.0: ; %entry
368 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369 ; GFX803-NEXT: s_mov_b32 m0, -1
370 ; GFX803-NEXT: ds_read_i8 v0, v0
371 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
372 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
373 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
374 ; GFX803-NEXT: flat_store_dword v[0:1], v0
375 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
376 ; GFX803-NEXT: s_setpc_b64 s[30:31]
378 %reg.bc = bitcast i32 %reg to <2 x i16>
379 %load = load i8, i8 addrspace(3)* %in
380 %ext = sext i8 %load to i16
381 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
382 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
386 define void @load_local_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(3)* %in, i16 %reg) #0 {
387 ; GFX900-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8:
388 ; GFX900: ; %bb.0: ; %entry
389 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390 ; GFX900-NEXT: ds_read_i8 v0, v0
391 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
392 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
393 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
394 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
395 ; GFX900-NEXT: s_waitcnt vmcnt(0)
396 ; GFX900-NEXT: s_setpc_b64 s[30:31]
398 ; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8:
399 ; GFX906: ; %bb.0: ; %entry
400 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
401 ; GFX906-NEXT: ds_read_i8 v0, v0
402 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
403 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
404 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
405 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
406 ; GFX906-NEXT: s_waitcnt vmcnt(0)
407 ; GFX906-NEXT: s_setpc_b64 s[30:31]
409 ; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8:
410 ; GFX803: ; %bb.0: ; %entry
411 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412 ; GFX803-NEXT: s_mov_b32 m0, -1
413 ; GFX803-NEXT: ds_read_i8 v0, v0
414 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
415 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
416 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
417 ; GFX803-NEXT: flat_store_dword v[0:1], v0
418 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
419 ; GFX803-NEXT: s_setpc_b64 s[30:31]
421 %load = load i8, i8 addrspace(3)* %in
422 %ext = sext i8 %load to i16
423 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
424 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0
425 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
429 define void @load_local_lo_v2f16_reglo_vreg_zexti8(i8 addrspace(3)* %in, half %reg) #0 {
430 ; GFX900-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8:
431 ; GFX900: ; %bb.0: ; %entry
432 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433 ; GFX900-NEXT: ds_read_u8 v0, v0
434 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
435 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
436 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
437 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
438 ; GFX900-NEXT: s_waitcnt vmcnt(0)
439 ; GFX900-NEXT: s_setpc_b64 s[30:31]
441 ; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8:
442 ; GFX906: ; %bb.0: ; %entry
443 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
444 ; GFX906-NEXT: ds_read_u8 v0, v0
445 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
446 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
447 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
448 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
449 ; GFX906-NEXT: s_waitcnt vmcnt(0)
450 ; GFX906-NEXT: s_setpc_b64 s[30:31]
452 ; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8:
453 ; GFX803: ; %bb.0: ; %entry
454 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
455 ; GFX803-NEXT: s_mov_b32 m0, -1
456 ; GFX803-NEXT: ds_read_u8 v0, v0
457 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
458 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
459 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
460 ; GFX803-NEXT: flat_store_dword v[0:1], v0
461 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
462 ; GFX803-NEXT: s_setpc_b64 s[30:31]
464 %load = load i8, i8 addrspace(3)* %in
465 %ext = zext i8 %load to i16
466 %bitcast = bitcast i16 %ext to half
467 %build0 = insertelement <2 x half> undef, half %reg, i32 1
468 %build1 = insertelement <2 x half> %build0, half %bitcast, i32 0
469 store <2 x half> %build1, <2 x half> addrspace(1)* undef
473 define void @load_local_lo_v2f16_reglo_vreg_sexti8(i8 addrspace(3)* %in, half %reg) #0 {
474 ; GFX900-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8:
475 ; GFX900: ; %bb.0: ; %entry
476 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
477 ; GFX900-NEXT: ds_read_i8 v0, v0
478 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
479 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
480 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
481 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
482 ; GFX900-NEXT: s_waitcnt vmcnt(0)
483 ; GFX900-NEXT: s_setpc_b64 s[30:31]
485 ; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8:
486 ; GFX906: ; %bb.0: ; %entry
487 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488 ; GFX906-NEXT: ds_read_i8 v0, v0
489 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
490 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
491 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
492 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
493 ; GFX906-NEXT: s_waitcnt vmcnt(0)
494 ; GFX906-NEXT: s_setpc_b64 s[30:31]
496 ; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8:
497 ; GFX803: ; %bb.0: ; %entry
498 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499 ; GFX803-NEXT: s_mov_b32 m0, -1
500 ; GFX803-NEXT: ds_read_i8 v0, v0
501 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
502 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
503 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
504 ; GFX803-NEXT: flat_store_dword v[0:1], v0
505 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
506 ; GFX803-NEXT: s_setpc_b64 s[30:31]
508 %load = load i8, i8 addrspace(3)* %in
509 %ext = sext i8 %load to i16
510 %bitcast = bitcast i16 %ext to half
511 %build0 = insertelement <2 x half> undef, half %reg, i32 1
512 %build1 = insertelement <2 x half> %build0, half %bitcast, i32 0
513 store <2 x half> %build1, <2 x half> addrspace(1)* undef
517 define void @load_local_lo_v2i16_reghi_vreg_multi_use_lo(i16 addrspace(3)* %in, <2 x i16> %reg) #0 {
518 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo:
519 ; GFX900: ; %bb.0: ; %entry
520 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; GFX900-NEXT: ds_read_u16 v0, v0
522 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
523 ; GFX900-NEXT: v_mov_b32_e32 v2, 0xffff
524 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
525 ; GFX900-NEXT: ds_write_b16 v3, v0
526 ; GFX900-NEXT: v_bfi_b32 v0, v2, v0, v1
527 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
528 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
529 ; GFX900-NEXT: s_setpc_b64 s[30:31]
531 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo:
532 ; GFX906: ; %bb.0: ; %entry
533 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
534 ; GFX906-NEXT: ds_read_u16 v0, v0
535 ; GFX906-NEXT: v_mov_b32_e32 v3, 0
536 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
537 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
538 ; GFX906-NEXT: ds_write_b16 v3, v0
539 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
540 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
541 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
542 ; GFX906-NEXT: s_setpc_b64 s[30:31]
544 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo:
545 ; GFX803: ; %bb.0: ; %entry
546 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
547 ; GFX803-NEXT: s_mov_b32 m0, -1
548 ; GFX803-NEXT: v_mov_b32_e32 v2, 0
549 ; GFX803-NEXT: ds_read_u16 v0, v0
550 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
551 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
552 ; GFX803-NEXT: ds_write_b16 v2, v0
553 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
554 ; GFX803-NEXT: flat_store_dword v[0:1], v0
555 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
556 ; GFX803-NEXT: s_setpc_b64 s[30:31]
558 %load = load i16, i16 addrspace(3)* %in
559 %elt1 = extractelement <2 x i16> %reg, i32 1
560 store i16 %load, i16 addrspace(3)* null
561 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0
562 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
566 define void @load_local_lo_v2i16_reghi_vreg_multi_use_hi(i16 addrspace(3)* %in, <2 x i16> %reg) #0 {
567 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi:
568 ; GFX900: ; %bb.0: ; %entry
569 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
570 ; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v1
571 ; GFX900-NEXT: ds_read_u16_d16 v1, v0
572 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
573 ; GFX900-NEXT: ds_write_b16 v0, v2
574 ; GFX900-NEXT: s_waitcnt lgkmcnt(1)
575 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
576 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
577 ; GFX900-NEXT: s_setpc_b64 s[30:31]
579 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi:
580 ; GFX906: ; %bb.0: ; %entry
581 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582 ; GFX906-NEXT: ds_read_u16 v0, v0
583 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
584 ; GFX906-NEXT: v_lshrrev_b32_e32 v3, 16, v1
585 ; GFX906-NEXT: v_mov_b32_e32 v4, 0
586 ; GFX906-NEXT: ds_write_b16 v4, v3
587 ; GFX906-NEXT: s_waitcnt lgkmcnt(1)
588 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
589 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
590 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
591 ; GFX906-NEXT: s_setpc_b64 s[30:31]
593 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi:
594 ; GFX803: ; %bb.0: ; %entry
595 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
596 ; GFX803-NEXT: s_mov_b32 m0, -1
597 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1
598 ; GFX803-NEXT: v_mov_b32_e32 v3, 0
599 ; GFX803-NEXT: ds_read_u16 v0, v0
600 ; GFX803-NEXT: v_lshlrev_b32_e32 v2, 16, v1
601 ; GFX803-NEXT: ds_write_b16 v3, v1
602 ; GFX803-NEXT: s_waitcnt lgkmcnt(1)
603 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v2
604 ; GFX803-NEXT: flat_store_dword v[0:1], v0
605 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
606 ; GFX803-NEXT: s_setpc_b64 s[30:31]
608 %load = load i16, i16 addrspace(3)* %in
609 %elt1 = extractelement <2 x i16> %reg, i32 1
610 store i16 %elt1, i16 addrspace(3)* null
611 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0
612 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
616 define void @load_local_lo_v2i16_reghi_vreg_multi_use_lohi(i16 addrspace(3)* noalias %in, <2 x i16> %reg, i16 addrspace(3)* noalias %out0, i16 addrspace(3)* noalias %out1) #0 {
617 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi:
618 ; GFX900: ; %bb.0: ; %entry
619 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620 ; GFX900-NEXT: ds_read_u16 v0, v0
621 ; GFX900-NEXT: v_lshrrev_b32_e32 v5, 16, v1
622 ; GFX900-NEXT: v_mov_b32_e32 v4, 0xffff
623 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
624 ; GFX900-NEXT: ds_write_b16 v2, v0
625 ; GFX900-NEXT: ds_write_b16 v3, v5
626 ; GFX900-NEXT: v_bfi_b32 v0, v4, v0, v1
627 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
628 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
629 ; GFX900-NEXT: s_setpc_b64 s[30:31]
631 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi:
632 ; GFX906: ; %bb.0: ; %entry
633 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
634 ; GFX906-NEXT: ds_read_u16 v0, v0
635 ; GFX906-NEXT: v_lshrrev_b32_e32 v5, 16, v1
636 ; GFX906-NEXT: v_mov_b32_e32 v4, 0xffff
637 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
638 ; GFX906-NEXT: ds_write_b16 v2, v0
639 ; GFX906-NEXT: ds_write_b16 v3, v5
640 ; GFX906-NEXT: v_bfi_b32 v0, v4, v0, v1
641 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
642 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
643 ; GFX906-NEXT: s_setpc_b64 s[30:31]
645 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi:
646 ; GFX803: ; %bb.0: ; %entry
647 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
648 ; GFX803-NEXT: s_mov_b32 m0, -1
649 ; GFX803-NEXT: ds_read_u16 v0, v0
650 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1
651 ; GFX803-NEXT: v_lshlrev_b32_e32 v4, 16, v1
652 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
653 ; GFX803-NEXT: ds_write_b16 v2, v0
654 ; GFX803-NEXT: ds_write_b16 v3, v1
655 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
656 ; GFX803-NEXT: flat_store_dword v[0:1], v0
657 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
658 ; GFX803-NEXT: s_setpc_b64 s[30:31]
660 %load = load i16, i16 addrspace(3)* %in
661 %elt1 = extractelement <2 x i16> %reg, i32 1
662 store i16 %load, i16 addrspace(3)* %out0
663 store i16 %elt1, i16 addrspace(3)* %out1
664 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0
665 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
669 define void @load_global_lo_v2i16_reglo_vreg(i16 addrspace(1)* %in, i32 %reg) #0 {
670 ; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg:
671 ; GFX900: ; %bb.0: ; %entry
672 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
673 ; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
674 ; GFX900-NEXT: s_waitcnt vmcnt(0)
675 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
676 ; GFX900-NEXT: s_waitcnt vmcnt(0)
677 ; GFX900-NEXT: s_setpc_b64 s[30:31]
679 ; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg:
680 ; GFX906: ; %bb.0: ; %entry
681 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
682 ; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094
683 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
684 ; GFX906-NEXT: s_waitcnt vmcnt(0)
685 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
686 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
687 ; GFX906-NEXT: s_waitcnt vmcnt(0)
688 ; GFX906-NEXT: s_setpc_b64 s[30:31]
690 ; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg:
691 ; GFX803: ; %bb.0: ; %entry
692 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
693 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
694 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
695 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
696 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
697 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
698 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
699 ; GFX803-NEXT: flat_store_dword v[0:1], v0
700 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
701 ; GFX803-NEXT: s_setpc_b64 s[30:31]
703 %reg.bc = bitcast i32 %reg to <2 x i16>
704 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047
705 %load = load i16, i16 addrspace(1)* %gep
706 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
707 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
711 define void @load_global_lo_v2f16_reglo_vreg(half addrspace(1)* %in, i32 %reg) #0 {
712 ; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg:
713 ; GFX900: ; %bb.0: ; %entry
714 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
715 ; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
716 ; GFX900-NEXT: s_waitcnt vmcnt(0)
717 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
718 ; GFX900-NEXT: s_waitcnt vmcnt(0)
719 ; GFX900-NEXT: s_setpc_b64 s[30:31]
721 ; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg:
722 ; GFX906: ; %bb.0: ; %entry
723 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
724 ; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094
725 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
726 ; GFX906-NEXT: s_waitcnt vmcnt(0)
727 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
728 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
729 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
730 ; GFX906-NEXT: s_waitcnt vmcnt(0)
731 ; GFX906-NEXT: s_setpc_b64 s[30:31]
733 ; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg:
734 ; GFX803: ; %bb.0: ; %entry
735 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
736 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
737 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
738 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
739 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
740 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
741 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
742 ; GFX803-NEXT: flat_store_dword v[0:1], v0
743 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
744 ; GFX803-NEXT: s_setpc_b64 s[30:31]
746 %reg.bc = bitcast i32 %reg to <2 x half>
747 %gep = getelementptr inbounds half, half addrspace(1)* %in, i64 -2047
748 %load = load half, half addrspace(1)* %gep
749 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
750 store <2 x half> %build1, <2 x half> addrspace(1)* undef
754 define void @load_global_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
755 ; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8:
756 ; GFX900: ; %bb.0: ; %entry
757 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
758 ; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095
759 ; GFX900-NEXT: s_waitcnt vmcnt(0)
760 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
761 ; GFX900-NEXT: s_waitcnt vmcnt(0)
762 ; GFX900-NEXT: s_setpc_b64 s[30:31]
764 ; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8:
765 ; GFX906: ; %bb.0: ; %entry
766 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
767 ; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095
768 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
769 ; GFX906-NEXT: s_waitcnt vmcnt(0)
770 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
771 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
772 ; GFX906-NEXT: s_waitcnt vmcnt(0)
773 ; GFX906-NEXT: s_setpc_b64 s[30:31]
775 ; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8:
776 ; GFX803: ; %bb.0: ; %entry
777 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
778 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
779 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
780 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
781 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2
782 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
783 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
784 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
785 ; GFX803-NEXT: flat_store_dword v[0:1], v0
786 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
787 ; GFX803-NEXT: s_setpc_b64 s[30:31]
789 %reg.bc = bitcast i32 %reg to <2 x i16>
790 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
791 %load = load i8, i8 addrspace(1)* %gep
792 %ext = zext i8 %load to i16
793 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
794 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
798 define void @load_global_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
799 ; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8:
800 ; GFX900: ; %bb.0: ; %entry
801 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
802 ; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095
803 ; GFX900-NEXT: s_waitcnt vmcnt(0)
804 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
805 ; GFX900-NEXT: s_waitcnt vmcnt(0)
806 ; GFX900-NEXT: s_setpc_b64 s[30:31]
808 ; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8:
809 ; GFX906: ; %bb.0: ; %entry
810 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
811 ; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095
812 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
813 ; GFX906-NEXT: s_waitcnt vmcnt(0)
814 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
815 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
816 ; GFX906-NEXT: s_waitcnt vmcnt(0)
817 ; GFX906-NEXT: s_setpc_b64 s[30:31]
819 ; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8:
820 ; GFX803: ; %bb.0: ; %entry
821 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
822 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
823 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
824 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
825 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
826 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
827 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
828 ; GFX803-NEXT: flat_store_dword v[0:1], v0
829 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
830 ; GFX803-NEXT: s_setpc_b64 s[30:31]
832 %reg.bc = bitcast i32 %reg to <2 x i16>
833 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
834 %load = load i8, i8 addrspace(1)* %gep
835 %ext = sext i8 %load to i16
836 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
837 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
841 define void @load_global_lo_v2f16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
842 ; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8:
843 ; GFX900: ; %bb.0: ; %entry
844 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
845 ; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095
846 ; GFX900-NEXT: s_waitcnt vmcnt(0)
847 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
848 ; GFX900-NEXT: s_waitcnt vmcnt(0)
849 ; GFX900-NEXT: s_setpc_b64 s[30:31]
851 ; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8:
852 ; GFX906: ; %bb.0: ; %entry
853 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
854 ; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095
855 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
856 ; GFX906-NEXT: s_waitcnt vmcnt(0)
857 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
858 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
859 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
860 ; GFX906-NEXT: s_waitcnt vmcnt(0)
861 ; GFX906-NEXT: s_setpc_b64 s[30:31]
863 ; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8:
864 ; GFX803: ; %bb.0: ; %entry
865 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
866 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
867 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
868 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
869 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2
870 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
871 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
872 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
873 ; GFX803-NEXT: flat_store_dword v[0:1], v0
874 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
875 ; GFX803-NEXT: s_setpc_b64 s[30:31]
877 %reg.bc = bitcast i32 %reg to <2 x half>
878 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
879 %load = load i8, i8 addrspace(1)* %gep
880 %ext = zext i8 %load to i16
881 %bitcast = bitcast i16 %ext to half
882 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
883 store <2 x half> %build1, <2 x half> addrspace(1)* undef
887 define void @load_global_lo_v2f16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
888 ; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8:
889 ; GFX900: ; %bb.0: ; %entry
890 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
891 ; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095
892 ; GFX900-NEXT: s_waitcnt vmcnt(0)
893 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
894 ; GFX900-NEXT: s_waitcnt vmcnt(0)
895 ; GFX900-NEXT: s_setpc_b64 s[30:31]
897 ; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8:
898 ; GFX906: ; %bb.0: ; %entry
899 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
900 ; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095
901 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
902 ; GFX906-NEXT: s_waitcnt vmcnt(0)
903 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
904 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
905 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
906 ; GFX906-NEXT: s_waitcnt vmcnt(0)
907 ; GFX906-NEXT: s_setpc_b64 s[30:31]
909 ; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8:
910 ; GFX803: ; %bb.0: ; %entry
911 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
912 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
913 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
914 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
915 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
916 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
917 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
918 ; GFX803-NEXT: flat_store_dword v[0:1], v0
919 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
920 ; GFX803-NEXT: s_setpc_b64 s[30:31]
922 %reg.bc = bitcast i32 %reg to <2 x half>
923 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
924 %load = load i8, i8 addrspace(1)* %gep
925 %ext = sext i8 %load to i16
926 %bitcast = bitcast i16 %ext to half
927 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
928 store <2 x half> %build1, <2 x half> addrspace(1)* undef
932 define void @load_flat_lo_v2i16_reghi_vreg(i16* %in, i32 %reg) #0 {
933 ; GFX900-LABEL: load_flat_lo_v2i16_reghi_vreg:
934 ; GFX900: ; %bb.0: ; %entry
935 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
936 ; GFX900-NEXT: flat_load_short_d16 v2, v[0:1]
937 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
938 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
939 ; GFX900-NEXT: s_waitcnt vmcnt(0)
940 ; GFX900-NEXT: s_setpc_b64 s[30:31]
942 ; GFX906-LABEL: load_flat_lo_v2i16_reghi_vreg:
943 ; GFX906: ; %bb.0: ; %entry
944 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
945 ; GFX906-NEXT: flat_load_ushort v0, v[0:1]
946 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
947 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
948 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
949 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
950 ; GFX906-NEXT: s_waitcnt vmcnt(0)
951 ; GFX906-NEXT: s_setpc_b64 s[30:31]
953 ; GFX803-LABEL: load_flat_lo_v2i16_reghi_vreg:
954 ; GFX803: ; %bb.0: ; %entry
955 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
956 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
957 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
958 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
959 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
960 ; GFX803-NEXT: flat_store_dword v[0:1], v0
961 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
962 ; GFX803-NEXT: s_setpc_b64 s[30:31]
964 %reg.bc = bitcast i32 %reg to <2 x i16>
965 %load = load i16, i16* %in
966 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
967 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
971 define void @load_flat_lo_v2f16_reghi_vreg(half* %in, i32 %reg) #0 {
972 ; GFX900-LABEL: load_flat_lo_v2f16_reghi_vreg:
973 ; GFX900: ; %bb.0: ; %entry
974 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975 ; GFX900-NEXT: flat_load_short_d16 v2, v[0:1]
976 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
977 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
978 ; GFX900-NEXT: s_waitcnt vmcnt(0)
979 ; GFX900-NEXT: s_setpc_b64 s[30:31]
981 ; GFX906-LABEL: load_flat_lo_v2f16_reghi_vreg:
982 ; GFX906: ; %bb.0: ; %entry
983 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984 ; GFX906-NEXT: flat_load_ushort v0, v[0:1]
985 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
986 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
987 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
988 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
989 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
990 ; GFX906-NEXT: s_waitcnt vmcnt(0)
991 ; GFX906-NEXT: s_setpc_b64 s[30:31]
993 ; GFX803-LABEL: load_flat_lo_v2f16_reghi_vreg:
994 ; GFX803: ; %bb.0: ; %entry
995 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
996 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
997 ; FIXME: and should be removable
998 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
999 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1000 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1001 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1002 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1003 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1005 %reg.bc = bitcast i32 %reg to <2 x half>
1006 %load = load half, half* %in
1007 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
1008 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1012 define void @load_flat_lo_v2i16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 {
1013 ; GFX900-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8:
1014 ; GFX900: ; %bb.0: ; %entry
1015 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1016 ; GFX900-NEXT: flat_load_ubyte_d16 v2, v[0:1]
1017 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1018 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1019 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1020 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1022 ; GFX906-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8:
1023 ; GFX906: ; %bb.0: ; %entry
1024 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1025 ; GFX906-NEXT: flat_load_ubyte v0, v[0:1]
1026 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
1027 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1028 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
1029 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1030 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1031 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1033 ; GFX803-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8:
1034 ; GFX803: ; %bb.0: ; %entry
1035 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1036 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
1037 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1038 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1039 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1040 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
1041 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1042 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1043 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1045 %reg.bc = bitcast i32 %reg to <2 x i16>
1046 %load = load i8, i8* %in
1047 %ext = zext i8 %load to i16
1048 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1049 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1053 define void @load_flat_lo_v2i16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 {
1054 ; GFX900-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8:
1055 ; GFX900: ; %bb.0: ; %entry
1056 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1057 ; GFX900-NEXT: flat_load_sbyte_d16 v2, v[0:1]
1058 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1059 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1060 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1061 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1063 ; GFX906-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8:
1064 ; GFX906: ; %bb.0: ; %entry
1065 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1066 ; GFX906-NEXT: flat_load_sbyte v0, v[0:1]
1067 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
1068 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1069 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
1070 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1071 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1072 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1074 ; GFX803-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8:
1075 ; GFX803: ; %bb.0: ; %entry
1076 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1077 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
1078 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1079 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1080 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1081 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1082 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1083 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1085 %reg.bc = bitcast i32 %reg to <2 x i16>
1086 %load = load i8, i8* %in
1087 %ext = sext i8 %load to i16
1088 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1089 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1093 define void @load_flat_lo_v2f16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 {
1094 ; GFX900-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8:
1095 ; GFX900: ; %bb.0: ; %entry
1096 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1097 ; GFX900-NEXT: flat_load_ubyte_d16 v2, v[0:1]
1098 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1099 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1100 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1101 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1103 ; GFX906-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8:
1104 ; GFX906: ; %bb.0: ; %entry
1105 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106 ; GFX906-NEXT: flat_load_ubyte v0, v[0:1]
1107 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1108 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1109 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1110 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1111 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1112 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1113 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1115 ; GFX803-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8:
1116 ; GFX803: ; %bb.0: ; %entry
1117 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
1119 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1120 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1121 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1122 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
1123 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1124 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1125 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1127 %reg.bc = bitcast i32 %reg to <2 x half>
1128 %load = load i8, i8* %in
1129 %ext = zext i8 %load to i16
1130 %bitcast = bitcast i16 %ext to half
1131 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
1132 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1136 define void @load_flat_lo_v2f16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 {
1137 ; GFX900-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8:
1138 ; GFX900: ; %bb.0: ; %entry
1139 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1140 ; GFX900-NEXT: flat_load_sbyte_d16 v2, v[0:1]
1141 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1142 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1143 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1144 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1146 ; GFX906-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8:
1147 ; GFX906: ; %bb.0: ; %entry
1148 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1149 ; GFX906-NEXT: flat_load_sbyte v0, v[0:1]
1150 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1151 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1152 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1153 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1154 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1155 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1156 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1158 ; GFX803-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8:
1159 ; GFX803: ; %bb.0: ; %entry
1160 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1161 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
1162 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1163 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1164 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1165 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1166 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1167 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1169 %reg.bc = bitcast i32 %reg to <2 x half>
1170 %load = load i8, i8* %in
1171 %ext = sext i8 %load to i16
1172 %bitcast = bitcast i16 %ext to half
1173 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
1174 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1178 define void @load_private_lo_v2i16_reglo_vreg(i16 addrspace(5)* byval %in, i32 %reg) #0 {
1179 ; GFX900-LABEL: load_private_lo_v2i16_reglo_vreg:
1180 ; GFX900: ; %bb.0: ; %entry
1181 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1182 ; GFX900-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094
1183 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1184 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
1185 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1186 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1188 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg:
1189 ; GFX906: ; %bb.0: ; %entry
1190 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1191 ; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1192 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1193 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1194 ; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
1195 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1196 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1197 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1199 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg:
1200 ; GFX803: ; %bb.0: ; %entry
1201 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1202 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1203 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1204 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1205 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1206 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1207 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1208 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1210 %reg.bc = bitcast i32 %reg to <2 x i16>
1211 %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2047
1212 %load = load i16, i16 addrspace(5)* %gep
1213 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1214 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1218 define void @load_private_lo_v2i16_reghi_vreg(i16 addrspace(5)* byval %in, i16 %reg) #0 {
1219 ; GFX900-LABEL: load_private_lo_v2i16_reghi_vreg:
1220 ; GFX900: ; %bb.0: ; %entry
1221 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1222 ; GFX900-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1223 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1224 ; GFX900-NEXT: v_and_b32_e32 v1, 0xffff, v1
1225 ; GFX900-NEXT: v_lshl_or_b32 v0, v0, 16, v1
1226 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
1227 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1228 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1230 ; GFX906-LABEL: load_private_lo_v2i16_reghi_vreg:
1231 ; GFX906: ; %bb.0: ; %entry
1232 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1233 ; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1234 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1235 ; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
1236 ; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1
1237 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1238 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1239 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1241 ; GFX803-LABEL: load_private_lo_v2i16_reghi_vreg:
1242 ; GFX803: ; %bb.0: ; %entry
1243 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1244 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1245 ; GFX803-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1246 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1247 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1248 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1249 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1250 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1252 %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2047
1253 %load = load i16, i16 addrspace(5)* %gep
1254 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
1255 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
1256 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1260 define void @load_private_lo_v2f16_reglo_vreg(half addrspace(5)* byval %in, i32 %reg) #0 {
1261 ; GFX900-LABEL: load_private_lo_v2f16_reglo_vreg:
1262 ; GFX900: ; %bb.0: ; %entry
1263 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1264 ; GFX900-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094
1265 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1266 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
1267 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1268 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1270 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg:
1271 ; GFX906: ; %bb.0: ; %entry
1272 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273 ; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1274 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1275 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1276 ; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
1277 ; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1
1278 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1279 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1280 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1282 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg:
1283 ; GFX803: ; %bb.0: ; %entry
1284 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1285 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1286 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1287 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1288 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1289 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1290 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1291 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1293 %reg.bc = bitcast i32 %reg to <2 x half>
1294 %gep = getelementptr inbounds half, half addrspace(5)* %in, i64 2047
1295 %load = load half, half addrspace(5)* %gep
1296 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
1297 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1301 define void @load_private_lo_v2i16_reglo_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 {
1302 ; GFX900-LABEL: load_private_lo_v2i16_reglo_vreg_nooff:
1303 ; GFX900: ; %bb.0: ; %entry
1304 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1305 ; GFX900-NEXT: buffer_load_short_d16 v1, off, s[0:3], s33 offset:4094
1306 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1307 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
1308 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1309 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1311 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff:
1312 ; GFX906: ; %bb.0: ; %entry
1313 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1314 ; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], s33 offset:4094
1315 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1316 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1317 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
1318 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1319 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1320 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1322 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff:
1323 ; GFX803: ; %bb.0: ; %entry
1324 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1325 ; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], s33 offset:4094
1326 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1327 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1328 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1329 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1330 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1331 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1333 %reg.bc = bitcast i32 %reg to <2 x i16>
1334 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*)
1335 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1336 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1340 define void @load_private_lo_v2i16_reghi_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 {
1341 ; GFX900-LABEL: load_private_lo_v2i16_reghi_vreg_nooff:
1342 ; GFX900: ; %bb.0: ; %entry
1343 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1344 ; GFX900-NEXT: buffer_load_short_d16 v1, off, s[0:3], s33 offset:4094
1345 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1346 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
1347 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1348 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1350 ; GFX906-LABEL: load_private_lo_v2i16_reghi_vreg_nooff:
1351 ; GFX906: ; %bb.0: ; %entry
1352 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1353 ; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], s33 offset:4094
1354 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1355 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1356 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
1357 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1358 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1359 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1361 ; GFX803-LABEL: load_private_lo_v2i16_reghi_vreg_nooff:
1362 ; GFX803: ; %bb.0: ; %entry
1363 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1364 ; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], s33 offset:4094
1365 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1366 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1367 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1368 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1369 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1370 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1372 %reg.bc = bitcast i32 %reg to <2 x i16>
1373 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*)
1374 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1375 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1379 define void @load_private_lo_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, i32 %reg) #0 {
1380 ; GFX900-LABEL: load_private_lo_v2f16_reglo_vreg_nooff:
1381 ; GFX900: ; %bb.0: ; %entry
1382 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1383 ; GFX900-NEXT: buffer_load_short_d16 v1, off, s[0:3], s33 offset:4094
1384 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1385 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
1386 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1387 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1389 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_nooff:
1390 ; GFX906: ; %bb.0: ; %entry
1391 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1392 ; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], s33 offset:4094
1393 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1394 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1395 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1396 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1397 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1398 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1399 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1401 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_nooff:
1402 ; GFX803: ; %bb.0: ; %entry
1403 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1404 ; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], s33 offset:4094
1405 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1406 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1407 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1408 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1409 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1410 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1412 %reg.bc = bitcast i32 %reg to <2 x half>
1413 %load = load volatile half, half addrspace(5)* inttoptr (i32 4094 to half addrspace(5)*)
1414 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
1415 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1419 define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 {
1420 ; GFX900-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8:
1421 ; GFX900: ; %bb.0: ; %entry
1422 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1423 ; GFX900-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095
1424 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1425 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
1426 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1427 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1429 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8:
1430 ; GFX906: ; %bb.0: ; %entry
1431 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1432 ; GFX906-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095
1433 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1434 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1435 ; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
1436 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1437 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1438 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1440 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8:
1441 ; GFX803: ; %bb.0: ; %entry
1442 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095
1444 ; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1445 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1446 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1447 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
1448 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1449 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1450 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1452 %reg.bc = bitcast i32 %reg to <2 x i16>
1453 %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4095
1454 %load = load i8, i8 addrspace(5)* %gep
1455 %ext = zext i8 %load to i16
1456 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1457 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1461 define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 {
1462 ; GFX900-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8:
1463 ; GFX900: ; %bb.0: ; %entry
1464 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1465 ; GFX900-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095
1466 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1467 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
1468 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1469 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1471 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8:
1472 ; GFX906: ; %bb.0: ; %entry
1473 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1474 ; GFX906-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095
1475 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1476 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1477 ; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
1478 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1479 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1480 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1482 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8:
1483 ; GFX803: ; %bb.0: ; %entry
1484 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1485 ; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095
1486 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1487 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1488 ; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1489 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1490 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1491 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1493 %reg.bc = bitcast i32 %reg to <2 x i16>
1494 %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4095
1495 %load = load i8, i8 addrspace(5)* %gep
1496 %ext = sext i8 %load to i16
1497 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1498 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1502 define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
1503 ; GFX900-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
1504 ; GFX900: ; %bb.0: ; %entry
1505 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1506 ; GFX900-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s33 offset:4094
1507 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1508 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
1509 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1510 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1512 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
1513 ; GFX906: ; %bb.0: ; %entry
1514 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1515 ; GFX906-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:4094
1516 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1517 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1518 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
1519 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1520 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1521 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1523 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
1524 ; GFX803: ; %bb.0: ; %entry
1525 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1526 ; GFX803-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:4094
1527 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1528 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1529 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1530 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
1531 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1532 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1533 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1535 %reg.bc = bitcast i32 %reg to <2 x i16>
1536 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
1537 %ext = zext i8 %load to i16
1538 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1539 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1543 define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
1544 ; GFX900-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
1545 ; GFX900: ; %bb.0: ; %entry
1546 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1547 ; GFX900-NEXT: buffer_load_sbyte_d16 v1, off, s[0:3], s33 offset:4094
1548 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1549 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
1550 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1551 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1553 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
1554 ; GFX906: ; %bb.0: ; %entry
1555 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1556 ; GFX906-NEXT: buffer_load_sbyte v0, off, s[0:3], s33 offset:4094
1557 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1558 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1559 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
1560 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1561 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1562 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1564 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
1565 ; GFX803: ; %bb.0: ; %entry
1566 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1567 ; GFX803-NEXT: buffer_load_sbyte v0, off, s[0:3], s33 offset:4094
1568 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1569 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1570 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1571 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1572 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1573 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1575 %reg.bc = bitcast i32 %reg to <2 x i16>
1576 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
1577 %ext = sext i8 %load to i16
1578 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1579 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1583 define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
1584 ; GFX900-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
1585 ; GFX900: ; %bb.0: ; %entry
1586 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1587 ; GFX900-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s33 offset:4094
1588 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1589 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
1590 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1591 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1593 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
1594 ; GFX906: ; %bb.0: ; %entry
1595 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1596 ; GFX906-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:4094
1597 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1598 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1599 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1600 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1601 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1602 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1603 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1605 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
1606 ; GFX803: ; %bb.0: ; %entry
1607 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1608 ; GFX803-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:4094
1609 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1610 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1611 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1612 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
1613 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1614 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1615 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1617 %reg.bc = bitcast i32 %reg to <2 x half>
1618 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
1619 %ext = zext i8 %load to i16
1620 %bc.ext = bitcast i16 %ext to half
1621 %build1 = insertelement <2 x half> %reg.bc, half %bc.ext, i32 0
1622 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1626 define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(4)* %in, i32 %reg) #0 {
1627 ; GFX900-LABEL: load_constant_lo_v2i16_reglo_vreg:
1628 ; GFX900: ; %bb.0: ; %entry
1629 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1630 ; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
1631 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1632 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1633 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1634 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1636 ; GFX906-LABEL: load_constant_lo_v2i16_reglo_vreg:
1637 ; GFX906: ; %bb.0: ; %entry
1638 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1639 ; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094
1640 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
1641 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1642 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
1643 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1644 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1645 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1647 ; GFX803-LABEL: load_constant_lo_v2i16_reglo_vreg:
1648 ; GFX803: ; %bb.0: ; %entry
1649 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1650 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
1651 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1652 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
1653 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1654 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1655 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1656 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1657 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1658 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1660 %reg.bc = bitcast i32 %reg to <2 x i16>
1661 %gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 -2047
1662 %load = load i16, i16 addrspace(4)* %gep
1663 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1664 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1668 define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(4)* %in, i32 %reg) #0 {
1669 ; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg:
1670 ; GFX900: ; %bb.0: ; %entry
1671 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1672 ; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
1673 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1674 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1675 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1676 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1678 ; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg:
1679 ; GFX906: ; %bb.0: ; %entry
1680 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1681 ; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094
1682 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1683 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1684 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1685 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1686 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1687 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1688 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1690 ; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg:
1691 ; GFX803: ; %bb.0: ; %entry
1692 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1693 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
1694 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1695 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
1696 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1697 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1698 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1699 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1700 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1701 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1703 %reg.bc = bitcast i32 %reg to <2 x half>
1704 %gep = getelementptr inbounds half, half addrspace(4)* %in, i64 -2047
1705 %load = load half, half addrspace(4)* %gep
1706 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
1707 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1711 define void @load_constant_lo_v2f16_reglo_vreg_zexti8(i8 addrspace(4)* %in, i32 %reg) #0 {
1712 ; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8:
1713 ; GFX900: ; %bb.0: ; %entry
1714 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1715 ; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095
1716 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1717 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1718 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1719 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1721 ; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8:
1722 ; GFX906: ; %bb.0: ; %entry
1723 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1724 ; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095
1725 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1726 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1727 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1728 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1729 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1730 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1731 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1733 ; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8:
1734 ; GFX803: ; %bb.0: ; %entry
1735 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1736 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
1737 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1738 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
1739 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1740 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1741 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1742 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
1743 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1744 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1745 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1747 %reg.bc = bitcast i32 %reg to <2 x half>
1748 %gep = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 -4095
1749 %load = load i8, i8 addrspace(4)* %gep
1750 %ext = zext i8 %load to i16
1751 %bitcast = bitcast i16 %ext to half
1752 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
1753 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1757 define void @load_constant_lo_v2f16_reglo_vreg_sexti8(i8 addrspace(4)* %in, i32 %reg) #0 {
1758 ; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8:
1759 ; GFX900: ; %bb.0: ; %entry
1760 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1761 ; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095
1762 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1763 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1764 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1765 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1767 ; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8:
1768 ; GFX906: ; %bb.0: ; %entry
1769 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1770 ; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095
1771 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1772 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1773 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1774 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1775 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1776 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1777 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1779 ; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8:
1780 ; GFX803: ; %bb.0: ; %entry
1781 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1782 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
1783 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1784 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
1785 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1786 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1787 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1788 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1789 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1790 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1792 %reg.bc = bitcast i32 %reg to <2 x half>
1793 %gep = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 -4095
1794 %load = load i8, i8 addrspace(4)* %gep
1795 %ext = sext i8 %load to i16
1796 %bitcast = bitcast i16 %ext to half
1797 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
1798 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1802 define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 {
1803 ; GFX900-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset:
1804 ; GFX900: ; %bb.0: ; %entry
1805 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1806 ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7b
1807 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s32
1808 ; GFX900-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094
1809 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1810 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
1811 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1812 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1814 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset:
1815 ; GFX906: ; %bb.0: ; %entry
1816 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1817 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
1818 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
1819 ; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1820 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1821 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1822 ; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
1823 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1824 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1825 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1827 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset:
1828 ; GFX803: ; %bb.0: ; %entry
1829 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1830 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
1831 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
1832 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1833 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1834 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1835 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1836 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1837 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1838 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1840 %obj0 = alloca [10 x i32], align 4, addrspace(5)
1841 %obj1 = alloca [4096 x i16], align 2, addrspace(5)
1842 %reg.bc = bitcast i32 %reg to <2 x i16>
1843 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
1844 store volatile i32 123, i32 addrspace(5)* %bc
1845 %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2027
1846 %load = load volatile i16, i16 addrspace(5)* %gep
1847 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1848 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1852 define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
1853 ; GFX900-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
1854 ; GFX900: ; %bb.0: ; %entry
1855 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1856 ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7b
1857 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s32
1858 ; GFX900-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095
1859 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1860 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
1861 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1862 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1864 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
1865 ; GFX906: ; %bb.0: ; %entry
1866 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1867 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
1868 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
1869 ; GFX906-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095
1870 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1871 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1872 ; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
1873 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1874 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1875 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1877 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
1878 ; GFX803: ; %bb.0: ; %entry
1879 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1880 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
1881 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
1882 ; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095
1883 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1884 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1885 ; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1886 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1887 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1888 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1890 %obj0 = alloca [10 x i32], align 4, addrspace(5)
1891 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
1892 %reg.bc = bitcast i32 %reg to <2 x i16>
1893 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
1894 store volatile i32 123, i32 addrspace(5)* %bc
1895 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055
1896 %load = load volatile i8, i8 addrspace(5)* %gep
1897 %load.ext = sext i8 %load to i16
1898 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
1899 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1903 define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
1904 ; GFX900-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
1905 ; GFX900: ; %bb.0: ; %entry
1906 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1907 ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7b
1908 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s32
1909 ; GFX900-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095
1910 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1911 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
1912 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1913 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1915 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
1916 ; GFX906: ; %bb.0: ; %entry
1917 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1918 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
1919 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
1920 ; GFX906-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095
1921 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1922 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1923 ; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
1924 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1925 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1926 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1928 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
1929 ; GFX803: ; %bb.0: ; %entry
1930 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1931 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
1932 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
1933 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095
1934 ; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1935 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1936 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1937 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
1938 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1939 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1940 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1942 %obj0 = alloca [10 x i32], align 4, addrspace(5)
1943 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
1944 %reg.bc = bitcast i32 %reg to <2 x i16>
1945 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
1946 store volatile i32 123, i32 addrspace(5)* %bc
1947 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055
1948 %load = load volatile i8, i8 addrspace(5)* %gep
1949 %load.ext = zext i8 %load to i16
1950 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
1951 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1955 define void @load_private_lo_v2f16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
1956 ; GFX900-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
1957 ; GFX900: ; %bb.0: ; %entry
1958 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1959 ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7b
1960 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s32
1961 ; GFX900-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095
1962 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1963 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
1964 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1965 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1967 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
1968 ; GFX906: ; %bb.0: ; %entry
1969 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1970 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
1971 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
1972 ; GFX906-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095
1973 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1974 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1975 ; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
1976 ; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1
1977 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1978 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1979 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1981 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
1982 ; GFX803: ; %bb.0: ; %entry
1983 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1984 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
1985 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
1986 ; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095
1987 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1988 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1989 ; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1990 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1991 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1992 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1994 %obj0 = alloca [10 x i32], align 4, addrspace(5)
1995 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
1996 %reg.bc = bitcast i32 %reg to <2 x half>
1997 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
1998 store volatile i32 123, i32 addrspace(5)* %bc
1999 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055
2000 %load = load volatile i8, i8 addrspace(5)* %gep
2001 %load.ext = sext i8 %load to i16
2002 %bitcast = bitcast i16 %load.ext to half
2003 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
2004 store <2 x half> %build1, <2 x half> addrspace(1)* undef
2008 define void @load_private_lo_v2f16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
2009 ; GFX900-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
2010 ; GFX900: ; %bb.0: ; %entry
2011 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2012 ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7b
2013 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s32
2014 ; GFX900-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095
2015 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2016 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
2017 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2018 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2020 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
2021 ; GFX906: ; %bb.0: ; %entry
2022 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2023 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
2024 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
2025 ; GFX906-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095
2026 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2027 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2028 ; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
2029 ; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1
2030 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
2031 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2032 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2034 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
2035 ; GFX803: ; %bb.0: ; %entry
2036 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2037 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2038 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
2039 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095
2040 ; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2041 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
2042 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2043 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
2044 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2045 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2046 ; GFX803-NEXT: s_setpc_b64 s[30:31]
2048 %obj0 = alloca [10 x i32], align 4, addrspace(5)
2049 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
2050 %reg.bc = bitcast i32 %reg to <2 x half>
2051 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
2052 store volatile i32 123, i32 addrspace(5)* %bc
2053 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055
2054 %load = load volatile i8, i8 addrspace(5)* %gep
2055 %load.ext = zext i8 %load to i16
2056 %bitcast = bitcast i16 %load.ext to half
2057 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
2058 store <2 x half> %build1, <2 x half> addrspace(1)* undef
2062 attributes #0 = { nounwind }