1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GFX900-MUBUF %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck --check-prefix=GFX906 %s
4 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck --check-prefix=GFX803 %s
5 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs --mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX900,GFX900-FLATSCR %s
7 define <2 x i16> @load_local_lo_v2i16_undeflo(ptr addrspace(3) %in) #0 {
8 ; GFX900-LABEL: load_local_lo_v2i16_undeflo:
9 ; GFX900: ; %bb.0: ; %entry
10 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX900-NEXT: ds_read_u16_d16 v0, v0
12 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
13 ; GFX900-NEXT: s_setpc_b64 s[30:31]
15 ; GFX906-LABEL: load_local_lo_v2i16_undeflo:
16 ; GFX906: ; %bb.0: ; %entry
17 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX906-NEXT: ds_read_u16 v0, v0
19 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
20 ; GFX906-NEXT: s_setpc_b64 s[30:31]
22 ; GFX803-LABEL: load_local_lo_v2i16_undeflo:
23 ; GFX803: ; %bb.0: ; %entry
24 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25 ; GFX803-NEXT: s_mov_b32 m0, -1
26 ; GFX803-NEXT: ds_read_u16 v0, v0
27 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
28 ; GFX803-NEXT: s_setpc_b64 s[30:31]
30 %load = load i16, ptr addrspace(3) %in
31 %build = insertelement <2 x i16> undef, i16 %load, i32 0
35 define <2 x i16> @load_local_lo_v2i16_reglo(ptr addrspace(3) %in, i16 %reg) #0 {
36 ; GFX900-MUBUF-LABEL: load_local_lo_v2i16_reglo:
37 ; GFX900-MUBUF: ; %bb.0: ; %entry
38 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; GFX900-MUBUF-NEXT: ds_read_u16 v0, v0
40 ; GFX900-MUBUF-NEXT: s_mov_b32 s4, 0x5040100
41 ; GFX900-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
42 ; GFX900-MUBUF-NEXT: v_perm_b32 v0, v1, v0, s4
43 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
45 ; GFX906-LABEL: load_local_lo_v2i16_reglo:
46 ; GFX906: ; %bb.0: ; %entry
47 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GFX906-NEXT: ds_read_u16 v0, v0
49 ; GFX906-NEXT: s_mov_b32 s4, 0x5040100
50 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
51 ; GFX906-NEXT: v_perm_b32 v0, v1, v0, s4
52 ; GFX906-NEXT: s_setpc_b64 s[30:31]
54 ; GFX803-LABEL: load_local_lo_v2i16_reglo:
55 ; GFX803: ; %bb.0: ; %entry
56 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57 ; GFX803-NEXT: s_mov_b32 m0, -1
58 ; GFX803-NEXT: ds_read_u16 v0, v0
59 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
60 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
61 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
62 ; GFX803-NEXT: s_setpc_b64 s[30:31]
64 ; GFX900-FLATSCR-LABEL: load_local_lo_v2i16_reglo:
65 ; GFX900-FLATSCR: ; %bb.0: ; %entry
66 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; GFX900-FLATSCR-NEXT: ds_read_u16 v0, v0
68 ; GFX900-FLATSCR-NEXT: s_mov_b32 s0, 0x5040100
69 ; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
70 ; GFX900-FLATSCR-NEXT: v_perm_b32 v0, v1, v0, s0
71 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
73 %load = load i16, ptr addrspace(3) %in
74 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
75 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
79 ; Show that we get reasonable regalloc without physreg constraints.
80 define void @load_local_lo_v2i16_reglo_vreg(ptr addrspace(3) %in, i16 %reg) #0 {
81 ; GFX900-MUBUF-LABEL: load_local_lo_v2i16_reglo_vreg:
82 ; GFX900-MUBUF: ; %bb.0: ; %entry
83 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84 ; GFX900-MUBUF-NEXT: ds_read_u16 v0, v0
85 ; GFX900-MUBUF-NEXT: s_mov_b32 s4, 0x5040100
86 ; GFX900-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
87 ; GFX900-MUBUF-NEXT: v_perm_b32 v0, v1, v0, s4
88 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
89 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
90 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
92 ; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg:
93 ; GFX906: ; %bb.0: ; %entry
94 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95 ; GFX906-NEXT: ds_read_u16 v0, v0
96 ; GFX906-NEXT: s_mov_b32 s4, 0x5040100
97 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
98 ; GFX906-NEXT: v_perm_b32 v0, v1, v0, s4
99 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
100 ; GFX906-NEXT: s_waitcnt vmcnt(0)
101 ; GFX906-NEXT: s_setpc_b64 s[30:31]
103 ; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg:
104 ; GFX803: ; %bb.0: ; %entry
105 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106 ; GFX803-NEXT: s_mov_b32 m0, -1
107 ; GFX803-NEXT: ds_read_u16 v0, v0
108 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
109 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
110 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
111 ; GFX803-NEXT: flat_store_dword v[0:1], v0
112 ; GFX803-NEXT: s_waitcnt vmcnt(0)
113 ; GFX803-NEXT: s_setpc_b64 s[30:31]
115 ; GFX900-FLATSCR-LABEL: load_local_lo_v2i16_reglo_vreg:
116 ; GFX900-FLATSCR: ; %bb.0: ; %entry
117 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118 ; GFX900-FLATSCR-NEXT: ds_read_u16 v0, v0
119 ; GFX900-FLATSCR-NEXT: s_mov_b32 s0, 0x5040100
120 ; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
121 ; GFX900-FLATSCR-NEXT: v_perm_b32 v0, v1, v0, s0
122 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
123 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
124 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
126 %load = load i16, ptr addrspace(3) %in
127 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
128 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
129 store <2 x i16> %build1, ptr addrspace(1) undef
133 define <2 x i16> @load_local_lo_v2i16_zerolo(ptr addrspace(3) %in) #0 {
134 ; GFX900-LABEL: load_local_lo_v2i16_zerolo:
135 ; GFX900: ; %bb.0: ; %entry
136 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
137 ; GFX900-NEXT: v_mov_b32_e32 v1, 0
138 ; GFX900-NEXT: ds_read_u16_d16 v1, v0
139 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
140 ; GFX900-NEXT: v_mov_b32_e32 v0, v1
141 ; GFX900-NEXT: s_setpc_b64 s[30:31]
143 ; GFX906-LABEL: load_local_lo_v2i16_zerolo:
144 ; GFX906: ; %bb.0: ; %entry
145 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146 ; GFX906-NEXT: ds_read_u16 v0, v0
147 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
148 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
149 ; GFX906-NEXT: s_setpc_b64 s[30:31]
151 ; GFX803-LABEL: load_local_lo_v2i16_zerolo:
152 ; GFX803: ; %bb.0: ; %entry
153 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154 ; GFX803-NEXT: s_mov_b32 m0, -1
155 ; GFX803-NEXT: ds_read_u16 v0, v0
156 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
157 ; GFX803-NEXT: s_setpc_b64 s[30:31]
159 %load = load i16, ptr addrspace(3) %in
160 %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 0
164 define <2 x half> @load_local_lo_v2f16_fpimm(ptr addrspace(3) %in) #0 {
165 ; GFX900-LABEL: load_local_lo_v2f16_fpimm:
166 ; GFX900: ; %bb.0: ; %entry
167 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168 ; GFX900-NEXT: v_mov_b32_e32 v1, 2.0
169 ; GFX900-NEXT: ds_read_u16_d16 v1, v0
170 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
171 ; GFX900-NEXT: v_mov_b32_e32 v0, v1
172 ; GFX900-NEXT: s_setpc_b64 s[30:31]
174 ; GFX906-LABEL: load_local_lo_v2f16_fpimm:
175 ; GFX906: ; %bb.0: ; %entry
176 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177 ; GFX906-NEXT: ds_read_u16 v0, v0
178 ; GFX906-NEXT: s_movk_i32 s4, 0x4000
179 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x5040100
180 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
181 ; GFX906-NEXT: v_perm_b32 v0, s4, v0, v1
182 ; GFX906-NEXT: s_setpc_b64 s[30:31]
184 ; GFX803-LABEL: load_local_lo_v2f16_fpimm:
185 ; GFX803: ; %bb.0: ; %entry
186 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187 ; GFX803-NEXT: s_mov_b32 m0, -1
188 ; GFX803-NEXT: ds_read_u16 v0, v0
189 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
190 ; GFX803-NEXT: v_or_b32_e32 v0, 2.0, v0
191 ; GFX803-NEXT: s_setpc_b64 s[30:31]
193 %load = load half, ptr addrspace(3) %in
194 %build = insertelement <2 x half> <half 0.0, half 2.0>, half %load, i32 0
195 ret <2 x half> %build
198 define void @load_local_lo_v2f16_reghi_vreg(ptr addrspace(3) %in, i32 %reg) #0 {
199 ; GFX900-LABEL: load_local_lo_v2f16_reghi_vreg:
200 ; GFX900: ; %bb.0: ; %entry
201 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202 ; GFX900-NEXT: ds_read_u16_d16 v1, v0
203 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
204 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
205 ; GFX900-NEXT: s_waitcnt vmcnt(0)
206 ; GFX900-NEXT: s_setpc_b64 s[30:31]
208 ; GFX906-LABEL: load_local_lo_v2f16_reghi_vreg:
209 ; GFX906: ; %bb.0: ; %entry
210 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211 ; GFX906-NEXT: ds_read_u16 v0, v0
212 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
213 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
214 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v1
215 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
216 ; GFX906-NEXT: s_waitcnt vmcnt(0)
217 ; GFX906-NEXT: s_setpc_b64 s[30:31]
219 ; GFX803-LABEL: load_local_lo_v2f16_reghi_vreg:
220 ; GFX803: ; %bb.0: ; %entry
221 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222 ; GFX803-NEXT: s_mov_b32 m0, -1
223 ; GFX803-NEXT: ds_read_u16 v0, v0
224 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
225 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
226 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
227 ; GFX803-NEXT: flat_store_dword v[0:1], v0
228 ; GFX803-NEXT: s_waitcnt vmcnt(0)
229 ; GFX803-NEXT: s_setpc_b64 s[30:31]
231 %reg.bc = bitcast i32 %reg to <2 x half>
232 %load = load half, ptr addrspace(3) %in
233 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
234 store <2 x half> %build1, ptr addrspace(1) undef
238 define void @load_local_lo_v2f16_reglo_vreg(ptr addrspace(3) %in, half %reg) #0 {
239 ; GFX900-MUBUF-LABEL: load_local_lo_v2f16_reglo_vreg:
240 ; GFX900-MUBUF: ; %bb.0: ; %entry
241 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242 ; GFX900-MUBUF-NEXT: ds_read_u16 v0, v0
243 ; GFX900-MUBUF-NEXT: s_mov_b32 s4, 0x5040100
244 ; GFX900-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
245 ; GFX900-MUBUF-NEXT: v_perm_b32 v0, v1, v0, s4
246 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
247 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
248 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
250 ; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg:
251 ; GFX906: ; %bb.0: ; %entry
252 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253 ; GFX906-NEXT: ds_read_u16 v0, v0
254 ; GFX906-NEXT: s_mov_b32 s4, 0x5040100
255 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
256 ; GFX906-NEXT: v_perm_b32 v0, v1, v0, s4
257 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
258 ; GFX906-NEXT: s_waitcnt vmcnt(0)
259 ; GFX906-NEXT: s_setpc_b64 s[30:31]
261 ; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg:
262 ; GFX803: ; %bb.0: ; %entry
263 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264 ; GFX803-NEXT: s_mov_b32 m0, -1
265 ; GFX803-NEXT: ds_read_u16 v0, v0
266 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
267 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
268 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
269 ; GFX803-NEXT: flat_store_dword v[0:1], v0
270 ; GFX803-NEXT: s_waitcnt vmcnt(0)
271 ; GFX803-NEXT: s_setpc_b64 s[30:31]
273 ; GFX900-FLATSCR-LABEL: load_local_lo_v2f16_reglo_vreg:
274 ; GFX900-FLATSCR: ; %bb.0: ; %entry
275 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276 ; GFX900-FLATSCR-NEXT: ds_read_u16 v0, v0
277 ; GFX900-FLATSCR-NEXT: s_mov_b32 s0, 0x5040100
278 ; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
279 ; GFX900-FLATSCR-NEXT: v_perm_b32 v0, v1, v0, s0
280 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
281 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
282 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
284 %load = load half, ptr addrspace(3) %in
285 %build0 = insertelement <2 x half> undef, half %reg, i32 1
286 %build1 = insertelement <2 x half> %build0, half %load, i32 0
287 store <2 x half> %build1, ptr addrspace(1) undef
291 define void @load_local_lo_v2i16_reghi_vreg_zexti8(ptr addrspace(3) %in, i32 %reg) #0 {
292 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8:
293 ; GFX900: ; %bb.0: ; %entry
294 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295 ; GFX900-NEXT: ds_read_u8_d16 v1, v0
296 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
297 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
298 ; GFX900-NEXT: s_waitcnt vmcnt(0)
299 ; GFX900-NEXT: s_setpc_b64 s[30:31]
301 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8:
302 ; GFX906: ; %bb.0: ; %entry
303 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304 ; GFX906-NEXT: ds_read_u8 v0, v0
305 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
306 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
307 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v1
308 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
309 ; GFX906-NEXT: s_waitcnt vmcnt(0)
310 ; GFX906-NEXT: s_setpc_b64 s[30:31]
312 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8:
313 ; GFX803: ; %bb.0: ; %entry
314 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315 ; GFX803-NEXT: s_mov_b32 m0, -1
316 ; GFX803-NEXT: ds_read_u8 v0, v0
317 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
318 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
319 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
320 ; GFX803-NEXT: flat_store_dword v[0:1], v0
321 ; GFX803-NEXT: s_waitcnt vmcnt(0)
322 ; GFX803-NEXT: s_setpc_b64 s[30:31]
324 %reg.bc = bitcast i32 %reg to <2 x i16>
325 %load = load i8, ptr addrspace(3) %in
326 %ext = zext i8 %load to i16
327 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
328 store <2 x i16> %build1, ptr addrspace(1) undef
332 define void @load_local_lo_v2i16_reglo_vreg_zexti8(ptr addrspace(3) %in, i16 %reg) #0 {
333 ; GFX900-MUBUF-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8:
334 ; GFX900-MUBUF: ; %bb.0: ; %entry
335 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336 ; GFX900-MUBUF-NEXT: ds_read_u8 v0, v0
337 ; GFX900-MUBUF-NEXT: s_mov_b32 s4, 0x5040100
338 ; GFX900-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
339 ; GFX900-MUBUF-NEXT: v_perm_b32 v0, v1, v0, s4
340 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
341 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
342 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
344 ; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8:
345 ; GFX906: ; %bb.0: ; %entry
346 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347 ; GFX906-NEXT: ds_read_u8 v0, v0
348 ; GFX906-NEXT: s_mov_b32 s4, 0x5040100
349 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
350 ; GFX906-NEXT: v_perm_b32 v0, v1, v0, s4
351 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
352 ; GFX906-NEXT: s_waitcnt vmcnt(0)
353 ; GFX906-NEXT: s_setpc_b64 s[30:31]
355 ; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8:
356 ; GFX803: ; %bb.0: ; %entry
357 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358 ; GFX803-NEXT: s_mov_b32 m0, -1
359 ; GFX803-NEXT: ds_read_u8 v0, v0
360 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
361 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
362 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
363 ; GFX803-NEXT: flat_store_dword v[0:1], v0
364 ; GFX803-NEXT: s_waitcnt vmcnt(0)
365 ; GFX803-NEXT: s_setpc_b64 s[30:31]
367 ; GFX900-FLATSCR-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8:
368 ; GFX900-FLATSCR: ; %bb.0: ; %entry
369 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370 ; GFX900-FLATSCR-NEXT: ds_read_u8 v0, v0
371 ; GFX900-FLATSCR-NEXT: s_mov_b32 s0, 0x5040100
372 ; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
373 ; GFX900-FLATSCR-NEXT: v_perm_b32 v0, v1, v0, s0
374 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
375 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
376 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
378 %load = load i8, ptr addrspace(3) %in
379 %ext = zext i8 %load to i16
380 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
381 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0
382 store <2 x i16> %build1, ptr addrspace(1) undef
386 define void @load_local_lo_v2i16_reghi_vreg_sexti8(ptr addrspace(3) %in, i32 %reg) #0 {
387 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8:
388 ; GFX900: ; %bb.0: ; %entry
389 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390 ; GFX900-NEXT: ds_read_i8_d16 v1, v0
391 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
392 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
393 ; GFX900-NEXT: s_waitcnt vmcnt(0)
394 ; GFX900-NEXT: s_setpc_b64 s[30:31]
396 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8:
397 ; GFX906: ; %bb.0: ; %entry
398 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
399 ; GFX906-NEXT: ds_read_i8 v0, v0
400 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
401 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
402 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v1
403 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
404 ; GFX906-NEXT: s_waitcnt vmcnt(0)
405 ; GFX906-NEXT: s_setpc_b64 s[30:31]
407 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8:
408 ; GFX803: ; %bb.0: ; %entry
409 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410 ; GFX803-NEXT: s_mov_b32 m0, -1
411 ; GFX803-NEXT: ds_read_i8 v0, v0
412 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
413 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
414 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
415 ; GFX803-NEXT: flat_store_dword v[0:1], v0
416 ; GFX803-NEXT: s_waitcnt vmcnt(0)
417 ; GFX803-NEXT: s_setpc_b64 s[30:31]
419 %reg.bc = bitcast i32 %reg to <2 x i16>
420 %load = load i8, ptr addrspace(3) %in
421 %ext = sext i8 %load to i16
422 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
423 store <2 x i16> %build1, ptr addrspace(1) undef
427 define void @load_local_lo_v2i16_reglo_vreg_sexti8(ptr addrspace(3) %in, i16 %reg) #0 {
428 ; GFX900-MUBUF-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8:
429 ; GFX900-MUBUF: ; %bb.0: ; %entry
430 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431 ; GFX900-MUBUF-NEXT: ds_read_i8 v0, v0
432 ; GFX900-MUBUF-NEXT: s_mov_b32 s4, 0x5040100
433 ; GFX900-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
434 ; GFX900-MUBUF-NEXT: v_perm_b32 v0, v1, v0, s4
435 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
436 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
437 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
439 ; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8:
440 ; GFX906: ; %bb.0: ; %entry
441 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
442 ; GFX906-NEXT: ds_read_i8 v0, v0
443 ; GFX906-NEXT: s_mov_b32 s4, 0x5040100
444 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
445 ; GFX906-NEXT: v_perm_b32 v0, v1, v0, s4
446 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
447 ; GFX906-NEXT: s_waitcnt vmcnt(0)
448 ; GFX906-NEXT: s_setpc_b64 s[30:31]
450 ; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8:
451 ; GFX803: ; %bb.0: ; %entry
452 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453 ; GFX803-NEXT: s_mov_b32 m0, -1
454 ; GFX803-NEXT: ds_read_i8 v0, v0
455 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
456 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
457 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
458 ; GFX803-NEXT: flat_store_dword v[0:1], v0
459 ; GFX803-NEXT: s_waitcnt vmcnt(0)
460 ; GFX803-NEXT: s_setpc_b64 s[30:31]
462 ; GFX900-FLATSCR-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8:
463 ; GFX900-FLATSCR: ; %bb.0: ; %entry
464 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465 ; GFX900-FLATSCR-NEXT: ds_read_i8 v0, v0
466 ; GFX900-FLATSCR-NEXT: s_mov_b32 s0, 0x5040100
467 ; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
468 ; GFX900-FLATSCR-NEXT: v_perm_b32 v0, v1, v0, s0
469 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
470 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
471 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
473 %load = load i8, ptr addrspace(3) %in
474 %ext = sext i8 %load to i16
475 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
476 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0
477 store <2 x i16> %build1, ptr addrspace(1) undef
481 define void @load_local_lo_v2f16_reglo_vreg_zexti8(ptr addrspace(3) %in, half %reg) #0 {
482 ; GFX900-MUBUF-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8:
483 ; GFX900-MUBUF: ; %bb.0: ; %entry
484 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
485 ; GFX900-MUBUF-NEXT: ds_read_u8 v0, v0
486 ; GFX900-MUBUF-NEXT: s_mov_b32 s4, 0x5040100
487 ; GFX900-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
488 ; GFX900-MUBUF-NEXT: v_perm_b32 v0, v1, v0, s4
489 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
490 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
491 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
493 ; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8:
494 ; GFX906: ; %bb.0: ; %entry
495 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496 ; GFX906-NEXT: ds_read_u8 v0, v0
497 ; GFX906-NEXT: s_mov_b32 s4, 0x5040100
498 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
499 ; GFX906-NEXT: v_perm_b32 v0, v1, v0, s4
500 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
501 ; GFX906-NEXT: s_waitcnt vmcnt(0)
502 ; GFX906-NEXT: s_setpc_b64 s[30:31]
504 ; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8:
505 ; GFX803: ; %bb.0: ; %entry
506 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
507 ; GFX803-NEXT: s_mov_b32 m0, -1
508 ; GFX803-NEXT: ds_read_u8 v0, v0
509 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
510 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
511 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
512 ; GFX803-NEXT: flat_store_dword v[0:1], v0
513 ; GFX803-NEXT: s_waitcnt vmcnt(0)
514 ; GFX803-NEXT: s_setpc_b64 s[30:31]
516 ; GFX900-FLATSCR-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8:
517 ; GFX900-FLATSCR: ; %bb.0: ; %entry
518 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519 ; GFX900-FLATSCR-NEXT: ds_read_u8 v0, v0
520 ; GFX900-FLATSCR-NEXT: s_mov_b32 s0, 0x5040100
521 ; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
522 ; GFX900-FLATSCR-NEXT: v_perm_b32 v0, v1, v0, s0
523 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
524 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
525 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
527 %load = load i8, ptr addrspace(3) %in
528 %ext = zext i8 %load to i16
529 %bitcast = bitcast i16 %ext to half
530 %build0 = insertelement <2 x half> undef, half %reg, i32 1
531 %build1 = insertelement <2 x half> %build0, half %bitcast, i32 0
532 store <2 x half> %build1, ptr addrspace(1) undef
536 define void @load_local_lo_v2f16_reglo_vreg_sexti8(ptr addrspace(3) %in, half %reg) #0 {
537 ; GFX900-MUBUF-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8:
538 ; GFX900-MUBUF: ; %bb.0: ; %entry
539 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
540 ; GFX900-MUBUF-NEXT: ds_read_i8 v0, v0
541 ; GFX900-MUBUF-NEXT: s_mov_b32 s4, 0x5040100
542 ; GFX900-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
543 ; GFX900-MUBUF-NEXT: v_perm_b32 v0, v1, v0, s4
544 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
545 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
546 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
548 ; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8:
549 ; GFX906: ; %bb.0: ; %entry
550 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
551 ; GFX906-NEXT: ds_read_i8 v0, v0
552 ; GFX906-NEXT: s_mov_b32 s4, 0x5040100
553 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
554 ; GFX906-NEXT: v_perm_b32 v0, v1, v0, s4
555 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
556 ; GFX906-NEXT: s_waitcnt vmcnt(0)
557 ; GFX906-NEXT: s_setpc_b64 s[30:31]
559 ; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8:
560 ; GFX803: ; %bb.0: ; %entry
561 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562 ; GFX803-NEXT: s_mov_b32 m0, -1
563 ; GFX803-NEXT: ds_read_i8 v0, v0
564 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
565 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
566 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
567 ; GFX803-NEXT: flat_store_dword v[0:1], v0
568 ; GFX803-NEXT: s_waitcnt vmcnt(0)
569 ; GFX803-NEXT: s_setpc_b64 s[30:31]
571 ; GFX900-FLATSCR-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8:
572 ; GFX900-FLATSCR: ; %bb.0: ; %entry
573 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
574 ; GFX900-FLATSCR-NEXT: ds_read_i8 v0, v0
575 ; GFX900-FLATSCR-NEXT: s_mov_b32 s0, 0x5040100
576 ; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
577 ; GFX900-FLATSCR-NEXT: v_perm_b32 v0, v1, v0, s0
578 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
579 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
580 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
582 %load = load i8, ptr addrspace(3) %in
583 %ext = sext i8 %load to i16
584 %bitcast = bitcast i16 %ext to half
585 %build0 = insertelement <2 x half> undef, half %reg, i32 1
586 %build1 = insertelement <2 x half> %build0, half %bitcast, i32 0
587 store <2 x half> %build1, ptr addrspace(1) undef
591 define void @load_local_lo_v2i16_reghi_vreg_multi_use_lo(ptr addrspace(3) %in, <2 x i16> %reg) #0 {
592 ; GFX900-MUBUF-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo:
593 ; GFX900-MUBUF: ; %bb.0: ; %entry
594 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595 ; GFX900-MUBUF-NEXT: ds_read_u16 v0, v0
596 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v2, 0
597 ; GFX900-MUBUF-NEXT: s_mov_b32 s4, 0xffff
598 ; GFX900-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
599 ; GFX900-MUBUF-NEXT: ds_write_b16 v2, v0
600 ; GFX900-MUBUF-NEXT: v_bfi_b32 v0, s4, v0, v1
601 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
602 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
603 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
605 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo:
606 ; GFX906: ; %bb.0: ; %entry
607 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
608 ; GFX906-NEXT: ds_read_u16 v0, v0
609 ; GFX906-NEXT: v_mov_b32_e32 v2, 0
610 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
611 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
612 ; GFX906-NEXT: ds_write_b16 v2, v0
613 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v1
614 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
615 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
616 ; GFX906-NEXT: s_setpc_b64 s[30:31]
618 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo:
619 ; GFX803: ; %bb.0: ; %entry
620 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621 ; GFX803-NEXT: s_mov_b32 m0, -1
622 ; GFX803-NEXT: ds_read_u16 v0, v0
623 ; GFX803-NEXT: v_mov_b32_e32 v2, 0
624 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
625 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
626 ; GFX803-NEXT: ds_write_b16 v2, v0
627 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
628 ; GFX803-NEXT: flat_store_dword v[0:1], v0
629 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
630 ; GFX803-NEXT: s_setpc_b64 s[30:31]
632 ; GFX900-FLATSCR-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo:
633 ; GFX900-FLATSCR: ; %bb.0: ; %entry
634 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
635 ; GFX900-FLATSCR-NEXT: ds_read_u16 v0, v0
636 ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v2, 0
637 ; GFX900-FLATSCR-NEXT: s_mov_b32 s0, 0xffff
638 ; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
639 ; GFX900-FLATSCR-NEXT: ds_write_b16 v2, v0
640 ; GFX900-FLATSCR-NEXT: v_bfi_b32 v0, s0, v0, v1
641 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
642 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
643 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
645 %load = load i16, ptr addrspace(3) %in
646 %elt1 = extractelement <2 x i16> %reg, i32 1
647 store i16 %load, ptr addrspace(3) null
648 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0
649 store <2 x i16> %build1, ptr addrspace(1) undef
653 define void @load_local_lo_v2i16_reghi_vreg_multi_use_hi(ptr addrspace(3) %in, <2 x i16> %reg) #0 {
654 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi:
655 ; GFX900: ; %bb.0: ; %entry
656 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657 ; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v1
658 ; GFX900-NEXT: ds_read_u16_d16 v1, v0
659 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
660 ; GFX900-NEXT: ds_write_b16 v0, v2
661 ; GFX900-NEXT: s_waitcnt lgkmcnt(1)
662 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
663 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
664 ; GFX900-NEXT: s_setpc_b64 s[30:31]
666 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi:
667 ; GFX906: ; %bb.0: ; %entry
668 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
669 ; GFX906-NEXT: ds_read_u16 v0, v0
670 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
671 ; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v1
672 ; GFX906-NEXT: v_mov_b32_e32 v3, 0
673 ; GFX906-NEXT: ds_write_b16 v3, v2
674 ; GFX906-NEXT: s_waitcnt lgkmcnt(1)
675 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v1
676 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
677 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
678 ; GFX906-NEXT: s_setpc_b64 s[30:31]
680 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi:
681 ; GFX803: ; %bb.0: ; %entry
682 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
683 ; GFX803-NEXT: s_mov_b32 m0, -1
684 ; GFX803-NEXT: ds_read_u16 v0, v0
685 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
686 ; GFX803-NEXT: v_lshrrev_b32_e32 v2, 16, v1
687 ; GFX803-NEXT: v_mov_b32_e32 v3, 0
688 ; GFX803-NEXT: ds_write_b16 v3, v2
689 ; GFX803-NEXT: s_waitcnt lgkmcnt(1)
690 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
691 ; GFX803-NEXT: flat_store_dword v[0:1], v0
692 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
693 ; GFX803-NEXT: s_setpc_b64 s[30:31]
695 %load = load i16, ptr addrspace(3) %in
696 %elt1 = extractelement <2 x i16> %reg, i32 1
697 store i16 %elt1, ptr addrspace(3) null
698 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0
699 store <2 x i16> %build1, ptr addrspace(1) undef
703 define void @load_local_lo_v2i16_reghi_vreg_multi_use_lohi(ptr addrspace(3) noalias %in, <2 x i16> %reg, ptr addrspace(3) noalias %out0, ptr addrspace(3) noalias %out1) #0 {
704 ; GFX900-MUBUF-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi:
705 ; GFX900-MUBUF: ; %bb.0: ; %entry
706 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
707 ; GFX900-MUBUF-NEXT: ds_read_u16 v0, v0
708 ; GFX900-MUBUF-NEXT: s_mov_b32 s4, 0xffff
709 ; GFX900-MUBUF-NEXT: v_lshrrev_b32_e32 v4, 16, v1
710 ; GFX900-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
711 ; GFX900-MUBUF-NEXT: ds_write_b16 v2, v0
712 ; GFX900-MUBUF-NEXT: ds_write_b16 v3, v4
713 ; GFX900-MUBUF-NEXT: v_bfi_b32 v0, s4, v0, v1
714 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
715 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
716 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
718 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi:
719 ; GFX906: ; %bb.0: ; %entry
720 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
721 ; GFX906-NEXT: ds_read_u16 v0, v0
722 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
723 ; GFX906-NEXT: v_lshrrev_b32_e32 v4, 16, v1
724 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
725 ; GFX906-NEXT: ds_write_b16 v2, v0
726 ; GFX906-NEXT: ds_write_b16 v3, v4
727 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v1
728 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
729 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
730 ; GFX906-NEXT: s_setpc_b64 s[30:31]
732 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi:
733 ; GFX803: ; %bb.0: ; %entry
734 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
735 ; GFX803-NEXT: s_mov_b32 m0, -1
736 ; GFX803-NEXT: ds_read_u16 v0, v0
737 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
738 ; GFX803-NEXT: v_lshrrev_b32_e32 v4, 16, v1
739 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
740 ; GFX803-NEXT: ds_write_b16 v2, v0
741 ; GFX803-NEXT: ds_write_b16 v3, v4
742 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
743 ; GFX803-NEXT: flat_store_dword v[0:1], v0
744 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
745 ; GFX803-NEXT: s_setpc_b64 s[30:31]
747 ; GFX900-FLATSCR-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi:
748 ; GFX900-FLATSCR: ; %bb.0: ; %entry
749 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
750 ; GFX900-FLATSCR-NEXT: ds_read_u16 v0, v0
751 ; GFX900-FLATSCR-NEXT: s_mov_b32 s0, 0xffff
752 ; GFX900-FLATSCR-NEXT: v_lshrrev_b32_e32 v4, 16, v1
753 ; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
754 ; GFX900-FLATSCR-NEXT: ds_write_b16 v2, v0
755 ; GFX900-FLATSCR-NEXT: ds_write_b16 v3, v4
756 ; GFX900-FLATSCR-NEXT: v_bfi_b32 v0, s0, v0, v1
757 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
758 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
759 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
761 %load = load i16, ptr addrspace(3) %in
762 %elt1 = extractelement <2 x i16> %reg, i32 1
763 store i16 %load, ptr addrspace(3) %out0
764 store i16 %elt1, ptr addrspace(3) %out1
765 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0
766 store <2 x i16> %build1, ptr addrspace(1) undef
770 define void @load_global_lo_v2i16_reglo_vreg(ptr addrspace(1) %in, i32 %reg) #0 {
771 ; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg:
772 ; GFX900: ; %bb.0: ; %entry
773 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
774 ; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
775 ; GFX900-NEXT: s_waitcnt vmcnt(0)
776 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
777 ; GFX900-NEXT: s_waitcnt vmcnt(0)
778 ; GFX900-NEXT: s_setpc_b64 s[30:31]
780 ; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg:
781 ; GFX906: ; %bb.0: ; %entry
782 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
783 ; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094
784 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
785 ; GFX906-NEXT: s_waitcnt vmcnt(0)
786 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
787 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
788 ; GFX906-NEXT: s_waitcnt vmcnt(0)
789 ; GFX906-NEXT: s_setpc_b64 s[30:31]
791 ; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg:
792 ; GFX803: ; %bb.0: ; %entry
793 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
794 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
795 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
796 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
797 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
798 ; GFX803-NEXT: s_waitcnt vmcnt(0)
799 ; GFX803-NEXT: v_perm_b32 v0, v0, v2, s4
800 ; GFX803-NEXT: flat_store_dword v[0:1], v0
801 ; GFX803-NEXT: s_waitcnt vmcnt(0)
802 ; GFX803-NEXT: s_setpc_b64 s[30:31]
804 %reg.bc = bitcast i32 %reg to <2 x i16>
805 %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 -2047
806 %load = load i16, ptr addrspace(1) %gep
807 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
808 store <2 x i16> %build1, ptr addrspace(1) undef
812 define void @load_global_lo_v2f16_reglo_vreg(ptr addrspace(1) %in, i32 %reg) #0 {
813 ; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg:
814 ; GFX900: ; %bb.0: ; %entry
815 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
816 ; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
817 ; GFX900-NEXT: s_waitcnt vmcnt(0)
818 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
819 ; GFX900-NEXT: s_waitcnt vmcnt(0)
820 ; GFX900-NEXT: s_setpc_b64 s[30:31]
822 ; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg:
823 ; GFX906: ; %bb.0: ; %entry
824 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
825 ; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094
826 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
827 ; GFX906-NEXT: s_waitcnt vmcnt(0)
828 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
829 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
830 ; GFX906-NEXT: s_waitcnt vmcnt(0)
831 ; GFX906-NEXT: s_setpc_b64 s[30:31]
833 ; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg:
834 ; GFX803: ; %bb.0: ; %entry
835 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
836 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
837 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
838 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
839 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
840 ; GFX803-NEXT: s_waitcnt vmcnt(0)
841 ; GFX803-NEXT: v_perm_b32 v0, v0, v2, s4
842 ; GFX803-NEXT: flat_store_dword v[0:1], v0
843 ; GFX803-NEXT: s_waitcnt vmcnt(0)
844 ; GFX803-NEXT: s_setpc_b64 s[30:31]
846 %reg.bc = bitcast i32 %reg to <2 x half>
847 %gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 -2047
848 %load = load half, ptr addrspace(1) %gep
849 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
850 store <2 x half> %build1, ptr addrspace(1) undef
854 define void @load_global_lo_v2i16_reglo_vreg_zexti8(ptr addrspace(1) %in, i32 %reg) #0 {
855 ; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8:
856 ; GFX900: ; %bb.0: ; %entry
857 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
858 ; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095
859 ; GFX900-NEXT: s_waitcnt vmcnt(0)
860 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
861 ; GFX900-NEXT: s_waitcnt vmcnt(0)
862 ; GFX900-NEXT: s_setpc_b64 s[30:31]
864 ; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8:
865 ; GFX906: ; %bb.0: ; %entry
866 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
867 ; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095
868 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
869 ; GFX906-NEXT: s_waitcnt vmcnt(0)
870 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
871 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
872 ; GFX906-NEXT: s_waitcnt vmcnt(0)
873 ; GFX906-NEXT: s_setpc_b64 s[30:31]
875 ; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8:
876 ; GFX803: ; %bb.0: ; %entry
877 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
878 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
879 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
880 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
881 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
882 ; GFX803-NEXT: s_waitcnt vmcnt(0)
883 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
884 ; GFX803-NEXT: flat_store_dword v[0:1], v0
885 ; GFX803-NEXT: s_waitcnt vmcnt(0)
886 ; GFX803-NEXT: s_setpc_b64 s[30:31]
888 %reg.bc = bitcast i32 %reg to <2 x i16>
889 %gep = getelementptr inbounds i8, ptr addrspace(1) %in, i64 -4095
890 %load = load i8, ptr addrspace(1) %gep
891 %ext = zext i8 %load to i16
892 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
893 store <2 x i16> %build1, ptr addrspace(1) undef
897 define void @load_global_lo_v2i16_reglo_vreg_sexti8(ptr addrspace(1) %in, i32 %reg) #0 {
898 ; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8:
899 ; GFX900: ; %bb.0: ; %entry
900 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
901 ; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095
902 ; GFX900-NEXT: s_waitcnt vmcnt(0)
903 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
904 ; GFX900-NEXT: s_waitcnt vmcnt(0)
905 ; GFX900-NEXT: s_setpc_b64 s[30:31]
907 ; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8:
908 ; GFX906: ; %bb.0: ; %entry
909 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
910 ; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095
911 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
912 ; GFX906-NEXT: s_waitcnt vmcnt(0)
913 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
914 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
915 ; GFX906-NEXT: s_waitcnt vmcnt(0)
916 ; GFX906-NEXT: s_setpc_b64 s[30:31]
918 ; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8:
919 ; GFX803: ; %bb.0: ; %entry
920 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
921 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
922 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
923 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
924 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
925 ; GFX803-NEXT: s_waitcnt vmcnt(0)
926 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
927 ; GFX803-NEXT: flat_store_dword v[0:1], v0
928 ; GFX803-NEXT: s_waitcnt vmcnt(0)
929 ; GFX803-NEXT: s_setpc_b64 s[30:31]
931 %reg.bc = bitcast i32 %reg to <2 x i16>
932 %gep = getelementptr inbounds i8, ptr addrspace(1) %in, i64 -4095
933 %load = load i8, ptr addrspace(1) %gep
934 %ext = sext i8 %load to i16
935 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
936 store <2 x i16> %build1, ptr addrspace(1) undef
940 define void @load_global_lo_v2f16_reglo_vreg_zexti8(ptr addrspace(1) %in, i32 %reg) #0 {
941 ; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8:
942 ; GFX900: ; %bb.0: ; %entry
943 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
944 ; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095
945 ; GFX900-NEXT: s_waitcnt vmcnt(0)
946 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
947 ; GFX900-NEXT: s_waitcnt vmcnt(0)
948 ; GFX900-NEXT: s_setpc_b64 s[30:31]
950 ; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8:
951 ; GFX906: ; %bb.0: ; %entry
952 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
953 ; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095
954 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
955 ; GFX906-NEXT: s_waitcnt vmcnt(0)
956 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
957 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
958 ; GFX906-NEXT: s_waitcnt vmcnt(0)
959 ; GFX906-NEXT: s_setpc_b64 s[30:31]
961 ; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8:
962 ; GFX803: ; %bb.0: ; %entry
963 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
964 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
965 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
966 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
967 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
968 ; GFX803-NEXT: s_waitcnt vmcnt(0)
969 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
970 ; GFX803-NEXT: flat_store_dword v[0:1], v0
971 ; GFX803-NEXT: s_waitcnt vmcnt(0)
972 ; GFX803-NEXT: s_setpc_b64 s[30:31]
974 %reg.bc = bitcast i32 %reg to <2 x half>
975 %gep = getelementptr inbounds i8, ptr addrspace(1) %in, i64 -4095
976 %load = load i8, ptr addrspace(1) %gep
977 %ext = zext i8 %load to i16
978 %bitcast = bitcast i16 %ext to half
979 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
980 store <2 x half> %build1, ptr addrspace(1) undef
984 define void @load_global_lo_v2f16_reglo_vreg_sexti8(ptr addrspace(1) %in, i32 %reg) #0 {
985 ; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8:
986 ; GFX900: ; %bb.0: ; %entry
987 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
988 ; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095
989 ; GFX900-NEXT: s_waitcnt vmcnt(0)
990 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
991 ; GFX900-NEXT: s_waitcnt vmcnt(0)
992 ; GFX900-NEXT: s_setpc_b64 s[30:31]
994 ; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8:
995 ; GFX906: ; %bb.0: ; %entry
996 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
997 ; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095
998 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
999 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1000 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
1001 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1002 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1003 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1005 ; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8:
1006 ; GFX803: ; %bb.0: ; %entry
1007 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1008 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
1009 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1010 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
1011 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1012 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1013 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1014 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1015 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1016 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1018 %reg.bc = bitcast i32 %reg to <2 x half>
1019 %gep = getelementptr inbounds i8, ptr addrspace(1) %in, i64 -4095
1020 %load = load i8, ptr addrspace(1) %gep
1021 %ext = sext i8 %load to i16
1022 %bitcast = bitcast i16 %ext to half
1023 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
1024 store <2 x half> %build1, ptr addrspace(1) undef
1028 define void @load_flat_lo_v2i16_reghi_vreg(ptr %in, i32 %reg) #0 {
1029 ; GFX900-LABEL: load_flat_lo_v2i16_reghi_vreg:
1030 ; GFX900: ; %bb.0: ; %entry
1031 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1032 ; GFX900-NEXT: flat_load_short_d16 v2, v[0:1]
1033 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1034 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1035 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1036 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1038 ; GFX906-LABEL: load_flat_lo_v2i16_reghi_vreg:
1039 ; GFX906: ; %bb.0: ; %entry
1040 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1041 ; GFX906-NEXT: flat_load_ushort v0, v[0:1]
1042 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1043 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1044 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
1045 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1046 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1047 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1049 ; GFX803-LABEL: load_flat_lo_v2i16_reghi_vreg:
1050 ; GFX803: ; %bb.0: ; %entry
1051 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1052 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
1053 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
1054 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1055 ; GFX803-NEXT: v_perm_b32 v0, v0, v2, s4
1056 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1057 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1058 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1060 %reg.bc = bitcast i32 %reg to <2 x i16>
1061 %load = load i16, ptr %in
1062 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1063 store <2 x i16> %build1, ptr addrspace(1) undef
1067 define void @load_flat_lo_v2f16_reghi_vreg(ptr %in, i32 %reg) #0 {
1068 ; GFX900-LABEL: load_flat_lo_v2f16_reghi_vreg:
1069 ; GFX900: ; %bb.0: ; %entry
1070 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1071 ; GFX900-NEXT: flat_load_short_d16 v2, v[0:1]
1072 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1073 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1074 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1075 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1077 ; GFX906-LABEL: load_flat_lo_v2f16_reghi_vreg:
1078 ; GFX906: ; %bb.0: ; %entry
1079 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1080 ; GFX906-NEXT: flat_load_ushort v0, v[0:1]
1081 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1082 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1083 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
1084 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1085 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1086 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1088 ; GFX803-LABEL: load_flat_lo_v2f16_reghi_vreg:
1089 ; GFX803: ; %bb.0: ; %entry
1090 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1091 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
1092 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
1093 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1094 ; GFX803-NEXT: v_perm_b32 v0, v0, v2, s4
1095 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1096 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1097 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1099 ; FIXME: the and above should be removable
1101 %reg.bc = bitcast i32 %reg to <2 x half>
1102 %load = load half, ptr %in
1103 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
1104 store <2 x half> %build1, ptr addrspace(1) undef
1108 define void @load_flat_lo_v2i16_reglo_vreg_zexti8(ptr %in, i32 %reg) #0 {
1109 ; GFX900-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8:
1110 ; GFX900: ; %bb.0: ; %entry
1111 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1112 ; GFX900-NEXT: flat_load_ubyte_d16 v2, v[0:1]
1113 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1114 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1115 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1116 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1118 ; GFX906-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8:
1119 ; GFX906: ; %bb.0: ; %entry
1120 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1121 ; GFX906-NEXT: flat_load_ubyte v0, v[0:1]
1122 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1123 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1124 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
1125 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1126 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1127 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1129 ; GFX803-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8:
1130 ; GFX803: ; %bb.0: ; %entry
1131 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1132 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
1133 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1134 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1135 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1136 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1137 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1138 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1140 %reg.bc = bitcast i32 %reg to <2 x i16>
1141 %load = load i8, ptr %in
1142 %ext = zext i8 %load to i16
1143 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1144 store <2 x i16> %build1, ptr addrspace(1) undef
1148 define void @load_flat_lo_v2i16_reglo_vreg_sexti8(ptr %in, i32 %reg) #0 {
1149 ; GFX900-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8:
1150 ; GFX900: ; %bb.0: ; %entry
1151 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1152 ; GFX900-NEXT: flat_load_sbyte_d16 v2, v[0:1]
1153 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1154 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1155 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1156 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1158 ; GFX906-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8:
1159 ; GFX906: ; %bb.0: ; %entry
1160 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1161 ; GFX906-NEXT: flat_load_sbyte v0, v[0:1]
1162 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1163 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1164 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
1165 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1166 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1167 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1169 ; GFX803-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8:
1170 ; GFX803: ; %bb.0: ; %entry
1171 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1172 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
1173 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1174 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1175 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1176 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1177 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1178 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1180 %reg.bc = bitcast i32 %reg to <2 x i16>
1181 %load = load i8, ptr %in
1182 %ext = sext i8 %load to i16
1183 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1184 store <2 x i16> %build1, ptr addrspace(1) undef
1188 define void @load_flat_lo_v2f16_reglo_vreg_zexti8(ptr %in, i32 %reg) #0 {
1189 ; GFX900-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8:
1190 ; GFX900: ; %bb.0: ; %entry
1191 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1192 ; GFX900-NEXT: flat_load_ubyte_d16 v2, v[0:1]
1193 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1194 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1195 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1196 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1198 ; GFX906-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8:
1199 ; GFX906: ; %bb.0: ; %entry
1200 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1201 ; GFX906-NEXT: flat_load_ubyte v0, v[0:1]
1202 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1203 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1204 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
1205 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1206 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1207 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1209 ; GFX803-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8:
1210 ; GFX803: ; %bb.0: ; %entry
1211 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1212 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
1213 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1214 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1215 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1216 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1217 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1218 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1220 %reg.bc = bitcast i32 %reg to <2 x half>
1221 %load = load i8, ptr %in
1222 %ext = zext i8 %load to i16
1223 %bitcast = bitcast i16 %ext to half
1224 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
1225 store <2 x half> %build1, ptr addrspace(1) undef
1229 define void @load_flat_lo_v2f16_reglo_vreg_sexti8(ptr %in, i32 %reg) #0 {
1230 ; GFX900-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8:
1231 ; GFX900: ; %bb.0: ; %entry
1232 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1233 ; GFX900-NEXT: flat_load_sbyte_d16 v2, v[0:1]
1234 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1235 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1236 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1237 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1239 ; GFX906-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8:
1240 ; GFX906: ; %bb.0: ; %entry
1241 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1242 ; GFX906-NEXT: flat_load_sbyte v0, v[0:1]
1243 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1244 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1245 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
1246 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1247 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1248 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1250 ; GFX803-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8:
1251 ; GFX803: ; %bb.0: ; %entry
1252 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1253 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
1254 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1255 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1256 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1257 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1258 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1259 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1261 %reg.bc = bitcast i32 %reg to <2 x half>
1262 %load = load i8, ptr %in
1263 %ext = sext i8 %load to i16
1264 %bitcast = bitcast i16 %ext to half
1265 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
1266 store <2 x half> %build1, ptr addrspace(1) undef
1270 define void @load_private_lo_v2i16_reglo_vreg(ptr addrspace(5) byval(i16) %in, i32 %reg) #0 {
1271 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg:
1272 ; GFX900-MUBUF: ; %bb.0: ; %entry
1273 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1274 ; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094
1275 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1276 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
1277 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1278 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1280 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg:
1281 ; GFX906: ; %bb.0: ; %entry
1282 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1283 ; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1284 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1285 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1286 ; GFX906-NEXT: v_bfi_b32 v0, s4, v1, v0
1287 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1288 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1289 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1291 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg:
1292 ; GFX803: ; %bb.0: ; %entry
1293 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1294 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1295 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
1296 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1297 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
1298 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1299 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1300 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1302 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg:
1303 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1304 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1305 ; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, s32 offset:4094
1306 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1307 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
1308 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1309 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1311 %reg.bc = bitcast i32 %reg to <2 x i16>
1312 %gep = getelementptr inbounds i16, ptr addrspace(5) %in, i64 2047
1313 %load = load i16, ptr addrspace(5) %gep
1314 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1315 store <2 x i16> %build1, ptr addrspace(1) undef
1319 define void @load_private_lo_v2i16_reghi_vreg(ptr addrspace(5) byval(i16) %in, i16 %reg) #0 {
1320 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reghi_vreg:
1321 ; GFX900-MUBUF: ; %bb.0: ; %entry
1322 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1323 ; GFX900-MUBUF-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1324 ; GFX900-MUBUF-NEXT: s_mov_b32 s4, 0x5040100
1325 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1326 ; GFX900-MUBUF-NEXT: v_perm_b32 v0, v0, v1, s4
1327 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
1328 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1329 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1331 ; GFX906-LABEL: load_private_lo_v2i16_reghi_vreg:
1332 ; GFX906: ; %bb.0: ; %entry
1333 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1334 ; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1335 ; GFX906-NEXT: s_mov_b32 s4, 0x5040100
1336 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1337 ; GFX906-NEXT: v_perm_b32 v0, v0, v1, s4
1338 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1339 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1340 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1342 ; GFX803-LABEL: load_private_lo_v2i16_reghi_vreg:
1343 ; GFX803: ; %bb.0: ; %entry
1344 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1345 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1346 ; GFX803-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1347 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1348 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1349 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1350 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1351 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1353 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reghi_vreg:
1354 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1355 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356 ; GFX900-FLATSCR-NEXT: scratch_load_ushort v1, off, s32 offset:4094
1357 ; GFX900-FLATSCR-NEXT: s_mov_b32 s0, 0x5040100
1358 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1359 ; GFX900-FLATSCR-NEXT: v_perm_b32 v0, v0, v1, s0
1360 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
1361 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1362 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1364 %gep = getelementptr inbounds i16, ptr addrspace(5) %in, i64 2047
1365 %load = load i16, ptr addrspace(5) %gep
1366 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
1367 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
1368 store <2 x i16> %build1, ptr addrspace(1) undef
1372 define void @load_private_lo_v2f16_reglo_vreg(ptr addrspace(5) byval(half) %in, i32 %reg) #0 {
1373 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg:
1374 ; GFX900-MUBUF: ; %bb.0: ; %entry
1375 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1376 ; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094
1377 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1378 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
1379 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1380 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1382 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg:
1383 ; GFX906: ; %bb.0: ; %entry
1384 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1385 ; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1386 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1387 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1388 ; GFX906-NEXT: v_bfi_b32 v0, s4, v1, v0
1389 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1390 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1391 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1393 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg:
1394 ; GFX803: ; %bb.0: ; %entry
1395 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1396 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1397 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
1398 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1399 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
1400 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1401 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1402 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1404 ; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg:
1405 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1406 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1407 ; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, s32 offset:4094
1408 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1409 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
1410 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1411 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1413 %reg.bc = bitcast i32 %reg to <2 x half>
1414 %gep = getelementptr inbounds half, ptr addrspace(5) %in, i64 2047
1415 %load = load half, ptr addrspace(5) %gep
1416 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
1417 store <2 x half> %build1, ptr addrspace(1) undef
1421 define void @load_private_lo_v2i16_reglo_vreg_nooff(ptr addrspace(5) %in, i32 %reg) #0 {
1422 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff:
1423 ; GFX900-MUBUF: ; %bb.0: ; %entry
1424 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1425 ; GFX900-MUBUF-NEXT: buffer_load_short_d16 v1, off, s[0:3], 0 offset:4094 glc
1426 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1427 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off
1428 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1429 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1431 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff:
1432 ; GFX906: ; %bb.0: ; %entry
1433 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1434 ; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc
1435 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1436 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1437 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v1
1438 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1439 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1440 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1442 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff:
1443 ; GFX803: ; %bb.0: ; %entry
1444 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1445 ; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc
1446 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1447 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
1448 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
1449 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1450 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1451 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1453 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_nooff:
1454 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1455 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1456 ; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe
1457 ; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v1, off, s0 glc
1458 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1459 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off
1460 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1461 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1463 %reg.bc = bitcast i32 %reg to <2 x i16>
1464 %load = load volatile i16, ptr addrspace(5) inttoptr (i32 4094 to ptr addrspace(5))
1465 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1466 store <2 x i16> %build1, ptr addrspace(1) undef
1470 define void @load_private_lo_v2i16_reghi_vreg_nooff(ptr addrspace(5) %in, i32 %reg) #0 {
1471 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reghi_vreg_nooff:
1472 ; GFX900-MUBUF: ; %bb.0: ; %entry
1473 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1474 ; GFX900-MUBUF-NEXT: buffer_load_short_d16 v1, off, s[0:3], 0 offset:4094 glc
1475 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1476 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off
1477 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1478 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1480 ; GFX906-LABEL: load_private_lo_v2i16_reghi_vreg_nooff:
1481 ; GFX906: ; %bb.0: ; %entry
1482 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1483 ; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc
1484 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1485 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1486 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v1
1487 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1488 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1489 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1491 ; GFX803-LABEL: load_private_lo_v2i16_reghi_vreg_nooff:
1492 ; GFX803: ; %bb.0: ; %entry
1493 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1494 ; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc
1495 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1496 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
1497 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
1498 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1499 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1500 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1502 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reghi_vreg_nooff:
1503 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1504 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1505 ; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe
1506 ; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v1, off, s0 glc
1507 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1508 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off
1509 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1510 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1512 %reg.bc = bitcast i32 %reg to <2 x i16>
1513 %load = load volatile i16, ptr addrspace(5) inttoptr (i32 4094 to ptr addrspace(5))
1514 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1515 store <2 x i16> %build1, ptr addrspace(1) undef
1519 define void @load_private_lo_v2f16_reglo_vreg_nooff(ptr addrspace(5) %in, i32 %reg) #0 {
1520 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_nooff:
1521 ; GFX900-MUBUF: ; %bb.0: ; %entry
1522 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1523 ; GFX900-MUBUF-NEXT: buffer_load_short_d16 v1, off, s[0:3], 0 offset:4094 glc
1524 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1525 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off
1526 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1527 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1529 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_nooff:
1530 ; GFX906: ; %bb.0: ; %entry
1531 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1532 ; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc
1533 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1534 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1535 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v1
1536 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1537 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1538 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1540 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_nooff:
1541 ; GFX803: ; %bb.0: ; %entry
1542 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1543 ; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc
1544 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1545 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
1546 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
1547 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1548 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1549 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1551 ; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_nooff:
1552 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1553 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1554 ; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe
1555 ; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v1, off, s0 glc
1556 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1557 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off
1558 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1559 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1561 %reg.bc = bitcast i32 %reg to <2 x half>
1562 %load = load volatile half, ptr addrspace(5) inttoptr (i32 4094 to ptr addrspace(5))
1563 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
1564 store <2 x half> %build1, ptr addrspace(1) undef
1568 define void @load_private_lo_v2i16_reglo_vreg_zexti8(ptr addrspace(5) byval(i8) %in, i32 %reg) #0 {
1569 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8:
1570 ; GFX900-MUBUF: ; %bb.0: ; %entry
1571 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1572 ; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095
1573 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1574 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
1575 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1576 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1578 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8:
1579 ; GFX906: ; %bb.0: ; %entry
1580 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1581 ; GFX906-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095
1582 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1583 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1584 ; GFX906-NEXT: v_bfi_b32 v0, s4, v1, v0
1585 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1586 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1587 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1589 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8:
1590 ; GFX803: ; %bb.0: ; %entry
1591 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1592 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095
1593 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1594 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1595 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1596 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1597 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1598 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1600 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8:
1601 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1602 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1603 ; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s32 offset:4095
1604 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1605 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
1606 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1607 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1609 %reg.bc = bitcast i32 %reg to <2 x i16>
1610 %gep = getelementptr inbounds i8, ptr addrspace(5) %in, i64 4095
1611 %load = load i8, ptr addrspace(5) %gep
1612 %ext = zext i8 %load to i16
1613 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1614 store <2 x i16> %build1, ptr addrspace(1) undef
1618 define void @load_private_lo_v2i16_reglo_vreg_sexti8(ptr addrspace(5) byval(i8) %in, i32 %reg) #0 {
1619 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8:
1620 ; GFX900-MUBUF: ; %bb.0: ; %entry
1621 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1622 ; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095
1623 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1624 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
1625 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1626 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1628 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8:
1629 ; GFX906: ; %bb.0: ; %entry
1630 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1631 ; GFX906-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095
1632 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1633 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1634 ; GFX906-NEXT: v_bfi_b32 v0, s4, v1, v0
1635 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1636 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1637 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1639 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8:
1640 ; GFX803: ; %bb.0: ; %entry
1641 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1642 ; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095
1643 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1644 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1645 ; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1646 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1647 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1648 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1650 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8:
1651 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1652 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1653 ; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s32 offset:4095
1654 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1655 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
1656 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1657 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1659 %reg.bc = bitcast i32 %reg to <2 x i16>
1660 %gep = getelementptr inbounds i8, ptr addrspace(5) %in, i64 4095
1661 %load = load i8, ptr addrspace(5) %gep
1662 %ext = sext i8 %load to i16
1663 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1664 store <2 x i16> %build1, ptr addrspace(1) undef
1668 define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(ptr addrspace(5) %in, i32 %reg) #0 {
1669 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
1670 ; GFX900-MUBUF: ; %bb.0: ; %entry
1671 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1672 ; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], 0 offset:4094 glc
1673 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1674 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off
1675 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1676 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1678 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
1679 ; GFX906: ; %bb.0: ; %entry
1680 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1681 ; GFX906-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4094 glc
1682 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1683 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1684 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v1
1685 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1686 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1687 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1689 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
1690 ; GFX803: ; %bb.0: ; %entry
1691 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1692 ; GFX803-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4094 glc
1693 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1694 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1695 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1696 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1697 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1698 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1700 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
1701 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1702 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1703 ; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe
1704 ; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v1, off, s0 glc
1705 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1706 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off
1707 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1708 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1710 %reg.bc = bitcast i32 %reg to <2 x i16>
1711 %load = load volatile i8, ptr addrspace(5) inttoptr (i32 4094 to ptr addrspace(5))
1712 %ext = zext i8 %load to i16
1713 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1714 store <2 x i16> %build1, ptr addrspace(1) undef
1718 define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(ptr addrspace(5) %in, i32 %reg) #0 {
1719 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
1720 ; GFX900-MUBUF: ; %bb.0: ; %entry
1721 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1722 ; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v1, off, s[0:3], 0 offset:4094 glc
1723 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1724 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off
1725 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1726 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1728 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
1729 ; GFX906: ; %bb.0: ; %entry
1730 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1731 ; GFX906-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 offset:4094 glc
1732 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1733 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1734 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v1
1735 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1736 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1737 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1739 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
1740 ; GFX803: ; %bb.0: ; %entry
1741 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1742 ; GFX803-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 offset:4094 glc
1743 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1744 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1745 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1746 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1747 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1748 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1750 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
1751 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1752 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1753 ; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe
1754 ; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v1, off, s0 glc
1755 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1756 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off
1757 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1758 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1760 %reg.bc = bitcast i32 %reg to <2 x i16>
1761 %load = load volatile i8, ptr addrspace(5) inttoptr (i32 4094 to ptr addrspace(5))
1762 %ext = sext i8 %load to i16
1763 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1764 store <2 x i16> %build1, ptr addrspace(1) undef
1768 define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(ptr addrspace(5) %in, i32 %reg) #0 {
1769 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
1770 ; GFX900-MUBUF: ; %bb.0: ; %entry
1771 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1772 ; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], 0 offset:4094 glc
1773 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1774 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off
1775 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1776 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1778 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
1779 ; GFX906: ; %bb.0: ; %entry
1780 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1781 ; GFX906-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4094 glc
1782 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1783 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1784 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v1
1785 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1786 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1787 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1789 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
1790 ; GFX803: ; %bb.0: ; %entry
1791 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1792 ; GFX803-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4094 glc
1793 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1794 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1795 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1796 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1797 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1798 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1800 ; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
1801 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1802 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1803 ; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe
1804 ; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v1, off, s0 glc
1805 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1806 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off
1807 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1808 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1810 %reg.bc = bitcast i32 %reg to <2 x half>
1811 %load = load volatile i8, ptr addrspace(5) inttoptr (i32 4094 to ptr addrspace(5))
1812 %ext = zext i8 %load to i16
1813 %bc.ext = bitcast i16 %ext to half
1814 %build1 = insertelement <2 x half> %reg.bc, half %bc.ext, i32 0
1815 store <2 x half> %build1, ptr addrspace(1) undef
1819 define void @load_constant_lo_v2i16_reglo_vreg(ptr addrspace(4) %in, i32 %reg) #0 {
1820 ; GFX900-LABEL: load_constant_lo_v2i16_reglo_vreg:
1821 ; GFX900: ; %bb.0: ; %entry
1822 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1823 ; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
1824 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1825 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1826 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1827 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1829 ; GFX906-LABEL: load_constant_lo_v2i16_reglo_vreg:
1830 ; GFX906: ; %bb.0: ; %entry
1831 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1832 ; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094
1833 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1834 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1835 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
1836 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1837 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1838 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1840 ; GFX803-LABEL: load_constant_lo_v2i16_reglo_vreg:
1841 ; GFX803: ; %bb.0: ; %entry
1842 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1843 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
1844 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1845 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
1846 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
1847 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1848 ; GFX803-NEXT: v_perm_b32 v0, v0, v2, s4
1849 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1850 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1851 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1853 %reg.bc = bitcast i32 %reg to <2 x i16>
1854 %gep = getelementptr inbounds i16, ptr addrspace(4) %in, i64 -2047
1855 %load = load i16, ptr addrspace(4) %gep
1856 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1857 store <2 x i16> %build1, ptr addrspace(1) undef
1861 define void @load_constant_lo_v2f16_reglo_vreg(ptr addrspace(4) %in, i32 %reg) #0 {
1862 ; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg:
1863 ; GFX900: ; %bb.0: ; %entry
1864 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1865 ; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
1866 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1867 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1868 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1869 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1871 ; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg:
1872 ; GFX906: ; %bb.0: ; %entry
1873 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1874 ; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094
1875 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1876 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1877 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
1878 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1879 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1880 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1882 ; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg:
1883 ; GFX803: ; %bb.0: ; %entry
1884 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1885 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
1886 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1887 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
1888 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
1889 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1890 ; GFX803-NEXT: v_perm_b32 v0, v0, v2, s4
1891 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1892 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1893 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1895 %reg.bc = bitcast i32 %reg to <2 x half>
1896 %gep = getelementptr inbounds half, ptr addrspace(4) %in, i64 -2047
1897 %load = load half, ptr addrspace(4) %gep
1898 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
1899 store <2 x half> %build1, ptr addrspace(1) undef
1903 define void @load_constant_lo_v2f16_reglo_vreg_zexti8(ptr addrspace(4) %in, i32 %reg) #0 {
1904 ; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8:
1905 ; GFX900: ; %bb.0: ; %entry
1906 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1907 ; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095
1908 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1909 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1910 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1911 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1913 ; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8:
1914 ; GFX906: ; %bb.0: ; %entry
1915 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1916 ; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095
1917 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1918 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1919 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
1920 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1921 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1922 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1924 ; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8:
1925 ; GFX803: ; %bb.0: ; %entry
1926 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1927 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
1928 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1929 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
1930 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1931 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1932 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1933 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1934 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1935 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1937 %reg.bc = bitcast i32 %reg to <2 x half>
1938 %gep = getelementptr inbounds i8, ptr addrspace(4) %in, i64 -4095
1939 %load = load i8, ptr addrspace(4) %gep
1940 %ext = zext i8 %load to i16
1941 %bitcast = bitcast i16 %ext to half
1942 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
1943 store <2 x half> %build1, ptr addrspace(1) undef
1947 define void @load_constant_lo_v2f16_reglo_vreg_sexti8(ptr addrspace(4) %in, i32 %reg) #0 {
1948 ; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8:
1949 ; GFX900: ; %bb.0: ; %entry
1950 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1951 ; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095
1952 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1953 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1954 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1955 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1957 ; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8:
1958 ; GFX906: ; %bb.0: ; %entry
1959 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1960 ; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095
1961 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
1962 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1963 ; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v2
1964 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1965 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1966 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1968 ; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8:
1969 ; GFX803: ; %bb.0: ; %entry
1970 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1971 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
1972 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1973 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
1974 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1975 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1976 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1977 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1978 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1979 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1981 %reg.bc = bitcast i32 %reg to <2 x half>
1982 %gep = getelementptr inbounds i8, ptr addrspace(4) %in, i64 -4095
1983 %load = load i8, ptr addrspace(4) %gep
1984 %ext = sext i8 %load to i16
1985 %bitcast = bitcast i16 %ext to half
1986 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
1987 store <2 x half> %build1, ptr addrspace(1) undef
1991 define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 {
1992 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset:
1993 ; GFX900-MUBUF: ; %bb.0: ; %entry
1994 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1995 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
1996 ; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
1997 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1998 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44
1999 ; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, v1, s[0:3], s32 offen offset:4054 glc
2000 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2001 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
2002 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2003 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
2005 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset:
2006 ; GFX906: ; %bb.0: ; %entry
2007 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2008 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
2009 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2010 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2011 ; GFX906-NEXT: v_mov_b32_e32 v2, 44
2012 ; GFX906-NEXT: buffer_load_ushort v1, v2, s[0:3], s32 offen offset:4054 glc
2013 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2014 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
2015 ; GFX906-NEXT: v_bfi_b32 v0, s4, v1, v0
2016 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
2017 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2018 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2020 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset:
2021 ; GFX803: ; %bb.0: ; %entry
2022 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2023 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2024 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2025 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2026 ; GFX803-NEXT: v_mov_b32_e32 v2, 44
2027 ; GFX803-NEXT: buffer_load_ushort v1, v2, s[0:3], s32 offen offset:4054 glc
2028 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2029 ; GFX803-NEXT: s_mov_b32 s4, 0x3020504
2030 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
2031 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2032 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2033 ; GFX803-NEXT: s_setpc_b64 s[30:31]
2035 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset:
2036 ; GFX900-FLATSCR: ; %bb.0: ; %entry
2037 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2038 ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
2039 ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4
2040 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2041 ; GFX900-FLATSCR-NEXT: s_add_i32 s0, s32, 44
2042 ; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, s0 offset:4054 glc
2043 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2044 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
2045 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2046 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
2048 %obj0 = alloca [10 x i32], align 4, addrspace(5)
2049 %obj1 = alloca [4096 x i16], align 2, addrspace(5)
2050 %reg.bc = bitcast i32 %reg to <2 x i16>
2051 store volatile i32 123, ptr addrspace(5) %obj0
2052 %gep = getelementptr inbounds [4096 x i16], ptr addrspace(5) %obj1, i32 0, i32 2027
2053 %load = load volatile i16, ptr addrspace(5) %gep
2054 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
2055 store <2 x i16> %build1, ptr addrspace(1) undef
2059 define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
2060 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
2061 ; GFX900-MUBUF: ; %bb.0: ; %entry
2062 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2063 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
2064 ; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2065 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2066 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44
2067 ; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, v1, s[0:3], s32 offen offset:4055 glc
2068 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2069 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
2070 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2071 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
2073 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
2074 ; GFX906: ; %bb.0: ; %entry
2075 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2076 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
2077 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2078 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2079 ; GFX906-NEXT: v_mov_b32_e32 v2, 44
2080 ; GFX906-NEXT: buffer_load_sbyte v1, v2, s[0:3], s32 offen offset:4055 glc
2081 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2082 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
2083 ; GFX906-NEXT: v_bfi_b32 v0, s4, v1, v0
2084 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
2085 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2086 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2088 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
2089 ; GFX803: ; %bb.0: ; %entry
2090 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2091 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2092 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2093 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2094 ; GFX803-NEXT: v_mov_b32_e32 v2, 44
2095 ; GFX803-NEXT: buffer_load_sbyte v1, v2, s[0:3], s32 offen offset:4055 glc
2096 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2097 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2098 ; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2099 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2100 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2101 ; GFX803-NEXT: s_setpc_b64 s[30:31]
2103 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
2104 ; GFX900-FLATSCR: ; %bb.0: ; %entry
2105 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2106 ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
2107 ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4
2108 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2109 ; GFX900-FLATSCR-NEXT: s_add_i32 s0, s32, 44
2110 ; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s0 offset:4055 glc
2111 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2112 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
2113 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2114 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
2116 %obj0 = alloca [10 x i32], align 4, addrspace(5)
2117 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
2118 %reg.bc = bitcast i32 %reg to <2 x i16>
2119 store volatile i32 123, ptr addrspace(5) %obj0
2120 %gep = getelementptr inbounds [4096 x i8], ptr addrspace(5) %obj1, i32 0, i32 4055
2121 %load = load volatile i8, ptr addrspace(5) %gep
2122 %load.ext = sext i8 %load to i16
2123 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
2124 store <2 x i16> %build1, ptr addrspace(1) undef
2128 define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
2129 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
2130 ; GFX900-MUBUF: ; %bb.0: ; %entry
2131 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2132 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
2133 ; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2134 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2135 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44
2136 ; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, v1, s[0:3], s32 offen offset:4055 glc
2137 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2138 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
2139 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2140 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
2142 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
2143 ; GFX906: ; %bb.0: ; %entry
2144 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2145 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
2146 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2147 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2148 ; GFX906-NEXT: v_mov_b32_e32 v2, 44
2149 ; GFX906-NEXT: buffer_load_ubyte v1, v2, s[0:3], s32 offen offset:4055 glc
2150 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2151 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
2152 ; GFX906-NEXT: v_bfi_b32 v0, s4, v1, v0
2153 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
2154 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2155 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2157 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
2158 ; GFX803: ; %bb.0: ; %entry
2159 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2160 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2161 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2162 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2163 ; GFX803-NEXT: v_mov_b32_e32 v2, 44
2164 ; GFX803-NEXT: buffer_load_ubyte v1, v2, s[0:3], s32 offen offset:4055 glc
2165 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2166 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2167 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
2168 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2169 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2170 ; GFX803-NEXT: s_setpc_b64 s[30:31]
2172 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
2173 ; GFX900-FLATSCR: ; %bb.0: ; %entry
2174 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2175 ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
2176 ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4
2177 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2178 ; GFX900-FLATSCR-NEXT: s_add_i32 s0, s32, 44
2179 ; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s0 offset:4055 glc
2180 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2181 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
2182 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2183 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
2185 %obj0 = alloca [10 x i32], align 4, addrspace(5)
2186 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
2187 %reg.bc = bitcast i32 %reg to <2 x i16>
2188 store volatile i32 123, ptr addrspace(5) %obj0
2189 %gep = getelementptr inbounds [4096 x i8], ptr addrspace(5) %obj1, i32 0, i32 4055
2190 %load = load volatile i8, ptr addrspace(5) %gep
2191 %load.ext = zext i8 %load to i16
2192 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
2193 store <2 x i16> %build1, ptr addrspace(1) undef
2197 define void @load_private_lo_v2f16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
2198 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
2199 ; GFX900-MUBUF: ; %bb.0: ; %entry
2200 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2201 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
2202 ; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2203 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2204 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44
2205 ; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, v1, s[0:3], s32 offen offset:4055 glc
2206 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2207 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
2208 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2209 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
2211 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
2212 ; GFX906: ; %bb.0: ; %entry
2213 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2214 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
2215 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2216 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2217 ; GFX906-NEXT: v_mov_b32_e32 v2, 44
2218 ; GFX906-NEXT: buffer_load_sbyte v1, v2, s[0:3], s32 offen offset:4055 glc
2219 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2220 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
2221 ; GFX906-NEXT: v_bfi_b32 v0, s4, v1, v0
2222 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
2223 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2224 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2226 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
2227 ; GFX803: ; %bb.0: ; %entry
2228 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2229 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2230 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2231 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2232 ; GFX803-NEXT: v_mov_b32_e32 v2, 44
2233 ; GFX803-NEXT: buffer_load_sbyte v1, v2, s[0:3], s32 offen offset:4055 glc
2234 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2235 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2236 ; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2237 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2238 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2239 ; GFX803-NEXT: s_setpc_b64 s[30:31]
2241 ; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
2242 ; GFX900-FLATSCR: ; %bb.0: ; %entry
2243 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2244 ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
2245 ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4
2246 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2247 ; GFX900-FLATSCR-NEXT: s_add_i32 s0, s32, 44
2248 ; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s0 offset:4055 glc
2249 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2250 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
2251 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2252 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
2254 %obj0 = alloca [10 x i32], align 4, addrspace(5)
2255 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
2256 %reg.bc = bitcast i32 %reg to <2 x half>
2257 store volatile i32 123, ptr addrspace(5) %obj0
2258 %gep = getelementptr inbounds [4096 x i8], ptr addrspace(5) %obj1, i32 0, i32 4055
2259 %load = load volatile i8, ptr addrspace(5) %gep
2260 %load.ext = sext i8 %load to i16
2261 %bitcast = bitcast i16 %load.ext to half
2262 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
2263 store <2 x half> %build1, ptr addrspace(1) undef
2267 define void @load_private_lo_v2f16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
2268 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
2269 ; GFX900-MUBUF: ; %bb.0: ; %entry
2270 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2271 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
2272 ; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2273 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2274 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44
2275 ; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, v1, s[0:3], s32 offen offset:4055 glc
2276 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2277 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
2278 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2279 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
2281 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
2282 ; GFX906: ; %bb.0: ; %entry
2283 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2284 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
2285 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2286 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2287 ; GFX906-NEXT: v_mov_b32_e32 v2, 44
2288 ; GFX906-NEXT: buffer_load_ubyte v1, v2, s[0:3], s32 offen offset:4055 glc
2289 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2290 ; GFX906-NEXT: s_mov_b32 s4, 0xffff
2291 ; GFX906-NEXT: v_bfi_b32 v0, s4, v1, v0
2292 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
2293 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2294 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2296 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
2297 ; GFX803: ; %bb.0: ; %entry
2298 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2299 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2300 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
2301 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2302 ; GFX803-NEXT: v_mov_b32_e32 v2, 44
2303 ; GFX803-NEXT: buffer_load_ubyte v1, v2, s[0:3], s32 offen offset:4055 glc
2304 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2305 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2306 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
2307 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2308 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2309 ; GFX803-NEXT: s_setpc_b64 s[30:31]
2311 ; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
2312 ; GFX900-FLATSCR: ; %bb.0: ; %entry
2313 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2314 ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
2315 ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4
2316 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2317 ; GFX900-FLATSCR-NEXT: s_add_i32 s0, s32, 44
2318 ; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s0 offset:4055 glc
2319 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2320 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
2321 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2322 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
2324 %obj0 = alloca [10 x i32], align 4, addrspace(5)
2325 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
2326 %reg.bc = bitcast i32 %reg to <2 x half>
2327 store volatile i32 123, ptr addrspace(5) %obj0
2328 %gep = getelementptr inbounds [4096 x i8], ptr addrspace(5) %obj1, i32 0, i32 4055
2329 %load = load volatile i8, ptr addrspace(5) %gep
2330 %load.ext = zext i8 %load to i16
2331 %bitcast = bitcast i16 %load.ext to half
2332 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
2333 store <2 x half> %build1, ptr addrspace(1) undef
2337 attributes #0 = { nounwind }