1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GFX900-MUBUF %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck --check-prefix=GFX906 %s
4 ; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck --check-prefix=GFX803 %s
5 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs --amdgpu-enable-flat-scratch < %s | FileCheck -check-prefixes=GFX900,GFX900-FLATSCR %s
7 define <2 x i16> @load_local_lo_v2i16_undeflo(i16 addrspace(3)* %in) #0 {
8 ; GFX900-LABEL: load_local_lo_v2i16_undeflo:
9 ; GFX900: ; %bb.0: ; %entry
10 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX900-NEXT: ds_read_u16_d16 v0, v0
12 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
13 ; GFX900-NEXT: s_setpc_b64 s[30:31]
15 ; GFX906-LABEL: load_local_lo_v2i16_undeflo:
16 ; GFX906: ; %bb.0: ; %entry
17 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX906-NEXT: ds_read_u16 v0, v0
19 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
20 ; GFX906-NEXT: s_setpc_b64 s[30:31]
22 ; GFX803-LABEL: load_local_lo_v2i16_undeflo:
23 ; GFX803: ; %bb.0: ; %entry
24 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25 ; GFX803-NEXT: s_mov_b32 m0, -1
26 ; GFX803-NEXT: ds_read_u16 v0, v0
27 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
28 ; GFX803-NEXT: s_setpc_b64 s[30:31]
30 %load = load i16, i16 addrspace(3)* %in
31 %build = insertelement <2 x i16> undef, i16 %load, i32 0
35 define <2 x i16> @load_local_lo_v2i16_reglo(i16 addrspace(3)* %in, i16 %reg) #0 {
36 ; GFX900-LABEL: load_local_lo_v2i16_reglo:
37 ; GFX900: ; %bb.0: ; %entry
38 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; GFX900-NEXT: ds_read_u16 v0, v0
40 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
41 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
42 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
43 ; GFX900-NEXT: s_setpc_b64 s[30:31]
45 ; GFX906-LABEL: load_local_lo_v2i16_reglo:
46 ; GFX906: ; %bb.0: ; %entry
47 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GFX906-NEXT: ds_read_u16 v0, v0
49 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
50 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
51 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
52 ; GFX906-NEXT: s_setpc_b64 s[30:31]
54 ; GFX803-LABEL: load_local_lo_v2i16_reglo:
55 ; GFX803: ; %bb.0: ; %entry
56 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57 ; GFX803-NEXT: s_mov_b32 m0, -1
58 ; GFX803-NEXT: ds_read_u16 v0, v0
59 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
60 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
61 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
62 ; GFX803-NEXT: s_setpc_b64 s[30:31]
64 %load = load i16, i16 addrspace(3)* %in
65 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
66 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
70 ; Show that we get reasonable regalloc without physreg constraints.
71 define void @load_local_lo_v2i16_reglo_vreg(i16 addrspace(3)* %in, i16 %reg) #0 {
72 ; GFX900-LABEL: load_local_lo_v2i16_reglo_vreg:
73 ; GFX900: ; %bb.0: ; %entry
74 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75 ; GFX900-NEXT: ds_read_u16 v0, v0
76 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
77 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
78 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
79 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
80 ; GFX900-NEXT: s_waitcnt vmcnt(0)
81 ; GFX900-NEXT: s_setpc_b64 s[30:31]
83 ; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg:
84 ; GFX906: ; %bb.0: ; %entry
85 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86 ; GFX906-NEXT: ds_read_u16 v0, v0
87 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
88 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
89 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
90 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
91 ; GFX906-NEXT: s_waitcnt vmcnt(0)
92 ; GFX906-NEXT: s_setpc_b64 s[30:31]
94 ; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg:
95 ; GFX803: ; %bb.0: ; %entry
96 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97 ; GFX803-NEXT: s_mov_b32 m0, -1
98 ; GFX803-NEXT: ds_read_u16 v0, v0
99 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
100 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
101 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
102 ; GFX803-NEXT: flat_store_dword v[0:1], v0
103 ; GFX803-NEXT: s_waitcnt vmcnt(0)
104 ; GFX803-NEXT: s_setpc_b64 s[30:31]
106 %load = load i16, i16 addrspace(3)* %in
107 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
108 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
109 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
113 define <2 x i16> @load_local_lo_v2i16_zerolo(i16 addrspace(3)* %in) #0 {
114 ; GFX900-LABEL: load_local_lo_v2i16_zerolo:
115 ; GFX900: ; %bb.0: ; %entry
116 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117 ; GFX900-NEXT: v_mov_b32_e32 v1, 0
118 ; GFX900-NEXT: ds_read_u16_d16 v1, v0
119 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
120 ; GFX900-NEXT: v_mov_b32_e32 v0, v1
121 ; GFX900-NEXT: s_setpc_b64 s[30:31]
123 ; GFX906-LABEL: load_local_lo_v2i16_zerolo:
124 ; GFX906: ; %bb.0: ; %entry
125 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; GFX906-NEXT: ds_read_u16 v0, v0
127 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
128 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
129 ; GFX906-NEXT: s_setpc_b64 s[30:31]
131 ; GFX803-LABEL: load_local_lo_v2i16_zerolo:
132 ; GFX803: ; %bb.0: ; %entry
133 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134 ; GFX803-NEXT: s_mov_b32 m0, -1
135 ; GFX803-NEXT: ds_read_u16 v0, v0
136 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
137 ; GFX803-NEXT: s_setpc_b64 s[30:31]
139 %load = load i16, i16 addrspace(3)* %in
140 %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 0
144 define <2 x half> @load_local_lo_v2f16_fpimm(half addrspace(3)* %in) #0 {
145 ; GFX900-LABEL: load_local_lo_v2f16_fpimm:
146 ; GFX900: ; %bb.0: ; %entry
147 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148 ; GFX900-NEXT: v_mov_b32_e32 v1, 2.0
149 ; GFX900-NEXT: ds_read_u16_d16 v1, v0
150 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
151 ; GFX900-NEXT: v_mov_b32_e32 v0, v1
152 ; GFX900-NEXT: s_setpc_b64 s[30:31]
154 ; GFX906-LABEL: load_local_lo_v2f16_fpimm:
155 ; GFX906: ; %bb.0: ; %entry
156 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX906-NEXT: ds_read_u16 v0, v0
158 ; GFX906-NEXT: s_movk_i32 s4, 0x4000
159 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
160 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
161 ; GFX906-NEXT: v_lshl_or_b32 v0, s4, 16, v0
162 ; GFX906-NEXT: s_setpc_b64 s[30:31]
164 ; GFX803-LABEL: load_local_lo_v2f16_fpimm:
165 ; GFX803: ; %bb.0: ; %entry
166 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167 ; GFX803-NEXT: s_mov_b32 m0, -1
168 ; GFX803-NEXT: ds_read_u16 v0, v0
169 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
170 ; GFX803-NEXT: v_or_b32_e32 v0, 2.0, v0
171 ; GFX803-NEXT: s_setpc_b64 s[30:31]
173 %load = load half, half addrspace(3)* %in
174 %build = insertelement <2 x half> <half 0.0, half 2.0>, half %load, i32 0
175 ret <2 x half> %build
178 define void @load_local_lo_v2f16_reghi_vreg(half addrspace(3)* %in, i32 %reg) #0 {
179 ; GFX900-LABEL: load_local_lo_v2f16_reghi_vreg:
180 ; GFX900: ; %bb.0: ; %entry
181 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182 ; GFX900-NEXT: ds_read_u16_d16 v1, v0
183 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
184 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
185 ; GFX900-NEXT: s_waitcnt vmcnt(0)
186 ; GFX900-NEXT: s_setpc_b64 s[30:31]
188 ; GFX906-LABEL: load_local_lo_v2f16_reghi_vreg:
189 ; GFX906: ; %bb.0: ; %entry
190 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
191 ; GFX906-NEXT: ds_read_u16 v0, v0
192 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v1
193 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
194 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
195 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
196 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
197 ; GFX906-NEXT: s_waitcnt vmcnt(0)
198 ; GFX906-NEXT: s_setpc_b64 s[30:31]
200 ; GFX803-LABEL: load_local_lo_v2f16_reghi_vreg:
201 ; GFX803: ; %bb.0: ; %entry
202 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203 ; GFX803-NEXT: s_mov_b32 m0, -1
204 ; GFX803-NEXT: ds_read_u16 v0, v0
205 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
206 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
207 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
208 ; GFX803-NEXT: flat_store_dword v[0:1], v0
209 ; GFX803-NEXT: s_waitcnt vmcnt(0)
210 ; GFX803-NEXT: s_setpc_b64 s[30:31]
212 %reg.bc = bitcast i32 %reg to <2 x half>
213 %load = load half, half addrspace(3)* %in
214 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
215 store <2 x half> %build1, <2 x half> addrspace(1)* undef
219 define void @load_local_lo_v2f16_reglo_vreg(half addrspace(3)* %in, half %reg) #0 {
220 ; GFX900-LABEL: load_local_lo_v2f16_reglo_vreg:
221 ; GFX900: ; %bb.0: ; %entry
222 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223 ; GFX900-NEXT: ds_read_u16 v0, v0
224 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
225 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
226 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
227 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
228 ; GFX900-NEXT: s_waitcnt vmcnt(0)
229 ; GFX900-NEXT: s_setpc_b64 s[30:31]
231 ; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg:
232 ; GFX906: ; %bb.0: ; %entry
233 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234 ; GFX906-NEXT: ds_read_u16 v0, v0
235 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
236 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
237 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
238 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
239 ; GFX906-NEXT: s_waitcnt vmcnt(0)
240 ; GFX906-NEXT: s_setpc_b64 s[30:31]
242 ; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg:
243 ; GFX803: ; %bb.0: ; %entry
244 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245 ; GFX803-NEXT: s_mov_b32 m0, -1
246 ; GFX803-NEXT: ds_read_u16 v0, v0
247 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
248 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
249 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
250 ; GFX803-NEXT: flat_store_dword v[0:1], v0
251 ; GFX803-NEXT: s_waitcnt vmcnt(0)
252 ; GFX803-NEXT: s_setpc_b64 s[30:31]
254 %load = load half, half addrspace(3)* %in
255 %build0 = insertelement <2 x half> undef, half %reg, i32 1
256 %build1 = insertelement <2 x half> %build0, half %load, i32 0
257 store <2 x half> %build1, <2 x half> addrspace(1)* undef
261 define void @load_local_lo_v2i16_reghi_vreg_zexti8(i8 addrspace(3)* %in, i32 %reg) #0 {
262 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8:
263 ; GFX900: ; %bb.0: ; %entry
264 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
265 ; GFX900-NEXT: ds_read_u8_d16 v1, v0
266 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
267 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
268 ; GFX900-NEXT: s_waitcnt vmcnt(0)
269 ; GFX900-NEXT: s_setpc_b64 s[30:31]
271 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8:
272 ; GFX906: ; %bb.0: ; %entry
273 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274 ; GFX906-NEXT: ds_read_u8 v0, v0
275 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
276 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
277 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
278 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
279 ; GFX906-NEXT: s_waitcnt vmcnt(0)
280 ; GFX906-NEXT: s_setpc_b64 s[30:31]
282 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8:
283 ; GFX803: ; %bb.0: ; %entry
284 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285 ; GFX803-NEXT: s_mov_b32 m0, -1
286 ; GFX803-NEXT: ds_read_u8 v0, v0
287 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1
288 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
289 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
290 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
291 ; GFX803-NEXT: flat_store_dword v[0:1], v0
292 ; GFX803-NEXT: s_waitcnt vmcnt(0)
293 ; GFX803-NEXT: s_setpc_b64 s[30:31]
295 %reg.bc = bitcast i32 %reg to <2 x i16>
296 %load = load i8, i8 addrspace(3)* %in
297 %ext = zext i8 %load to i16
298 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
299 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
303 define void @load_local_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(3)* %in, i16 %reg) #0 {
304 ; GFX900-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8:
305 ; GFX900: ; %bb.0: ; %entry
306 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307 ; GFX900-NEXT: ds_read_u8 v0, v0
308 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
309 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
310 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
311 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
312 ; GFX900-NEXT: s_waitcnt vmcnt(0)
313 ; GFX900-NEXT: s_setpc_b64 s[30:31]
315 ; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8:
316 ; GFX906: ; %bb.0: ; %entry
317 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318 ; GFX906-NEXT: ds_read_u8 v0, v0
319 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
320 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
321 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
322 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
323 ; GFX906-NEXT: s_waitcnt vmcnt(0)
324 ; GFX906-NEXT: s_setpc_b64 s[30:31]
326 ; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8:
327 ; GFX803: ; %bb.0: ; %entry
328 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
329 ; GFX803-NEXT: s_mov_b32 m0, -1
330 ; GFX803-NEXT: ds_read_u8 v0, v0
331 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
332 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
333 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
334 ; GFX803-NEXT: flat_store_dword v[0:1], v0
335 ; GFX803-NEXT: s_waitcnt vmcnt(0)
336 ; GFX803-NEXT: s_setpc_b64 s[30:31]
338 %load = load i8, i8 addrspace(3)* %in
339 %ext = zext i8 %load to i16
340 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
341 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0
342 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
346 define void @load_local_lo_v2i16_reghi_vreg_sexti8(i8 addrspace(3)* %in, i32 %reg) #0 {
347 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8:
348 ; GFX900: ; %bb.0: ; %entry
349 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
350 ; GFX900-NEXT: ds_read_i8_d16 v1, v0
351 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
352 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
353 ; GFX900-NEXT: s_waitcnt vmcnt(0)
354 ; GFX900-NEXT: s_setpc_b64 s[30:31]
356 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8:
357 ; GFX906: ; %bb.0: ; %entry
358 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359 ; GFX906-NEXT: ds_read_i8 v0, v0
360 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
361 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
362 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
363 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
364 ; GFX906-NEXT: s_waitcnt vmcnt(0)
365 ; GFX906-NEXT: s_setpc_b64 s[30:31]
367 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8:
368 ; GFX803: ; %bb.0: ; %entry
369 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370 ; GFX803-NEXT: s_mov_b32 m0, -1
371 ; GFX803-NEXT: ds_read_i8 v0, v0
372 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
373 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
374 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
375 ; GFX803-NEXT: flat_store_dword v[0:1], v0
376 ; GFX803-NEXT: s_waitcnt vmcnt(0)
377 ; GFX803-NEXT: s_setpc_b64 s[30:31]
379 %reg.bc = bitcast i32 %reg to <2 x i16>
380 %load = load i8, i8 addrspace(3)* %in
381 %ext = sext i8 %load to i16
382 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
383 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
387 define void @load_local_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(3)* %in, i16 %reg) #0 {
388 ; GFX900-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8:
389 ; GFX900: ; %bb.0: ; %entry
390 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391 ; GFX900-NEXT: ds_read_i8 v0, v0
392 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
393 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
394 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
395 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
396 ; GFX900-NEXT: s_waitcnt vmcnt(0)
397 ; GFX900-NEXT: s_setpc_b64 s[30:31]
399 ; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8:
400 ; GFX906: ; %bb.0: ; %entry
401 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
402 ; GFX906-NEXT: ds_read_i8 v0, v0
403 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
404 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
405 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
406 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
407 ; GFX906-NEXT: s_waitcnt vmcnt(0)
408 ; GFX906-NEXT: s_setpc_b64 s[30:31]
410 ; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8:
411 ; GFX803: ; %bb.0: ; %entry
412 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
413 ; GFX803-NEXT: s_mov_b32 m0, -1
414 ; GFX803-NEXT: ds_read_i8 v0, v0
415 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
416 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
417 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
418 ; GFX803-NEXT: flat_store_dword v[0:1], v0
419 ; GFX803-NEXT: s_waitcnt vmcnt(0)
420 ; GFX803-NEXT: s_setpc_b64 s[30:31]
422 %load = load i8, i8 addrspace(3)* %in
423 %ext = sext i8 %load to i16
424 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
425 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0
426 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
430 define void @load_local_lo_v2f16_reglo_vreg_zexti8(i8 addrspace(3)* %in, half %reg) #0 {
431 ; GFX900-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8:
432 ; GFX900: ; %bb.0: ; %entry
433 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434 ; GFX900-NEXT: ds_read_u8 v0, v0
435 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
436 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
437 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
438 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
439 ; GFX900-NEXT: s_waitcnt vmcnt(0)
440 ; GFX900-NEXT: s_setpc_b64 s[30:31]
442 ; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8:
443 ; GFX906: ; %bb.0: ; %entry
444 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445 ; GFX906-NEXT: ds_read_u8 v0, v0
446 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
447 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
448 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
449 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
450 ; GFX906-NEXT: s_waitcnt vmcnt(0)
451 ; GFX906-NEXT: s_setpc_b64 s[30:31]
453 ; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8:
454 ; GFX803: ; %bb.0: ; %entry
455 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
456 ; GFX803-NEXT: s_mov_b32 m0, -1
457 ; GFX803-NEXT: ds_read_u8 v0, v0
458 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
459 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
460 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
461 ; GFX803-NEXT: flat_store_dword v[0:1], v0
462 ; GFX803-NEXT: s_waitcnt vmcnt(0)
463 ; GFX803-NEXT: s_setpc_b64 s[30:31]
465 %load = load i8, i8 addrspace(3)* %in
466 %ext = zext i8 %load to i16
467 %bitcast = bitcast i16 %ext to half
468 %build0 = insertelement <2 x half> undef, half %reg, i32 1
469 %build1 = insertelement <2 x half> %build0, half %bitcast, i32 0
470 store <2 x half> %build1, <2 x half> addrspace(1)* undef
474 define void @load_local_lo_v2f16_reglo_vreg_sexti8(i8 addrspace(3)* %in, half %reg) #0 {
475 ; GFX900-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8:
476 ; GFX900: ; %bb.0: ; %entry
477 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
478 ; GFX900-NEXT: ds_read_i8 v0, v0
479 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
480 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
481 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
482 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
483 ; GFX900-NEXT: s_waitcnt vmcnt(0)
484 ; GFX900-NEXT: s_setpc_b64 s[30:31]
486 ; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8:
487 ; GFX906: ; %bb.0: ; %entry
488 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489 ; GFX906-NEXT: ds_read_i8 v0, v0
490 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
491 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
492 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
493 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
494 ; GFX906-NEXT: s_waitcnt vmcnt(0)
495 ; GFX906-NEXT: s_setpc_b64 s[30:31]
497 ; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8:
498 ; GFX803: ; %bb.0: ; %entry
499 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
500 ; GFX803-NEXT: s_mov_b32 m0, -1
501 ; GFX803-NEXT: ds_read_i8 v0, v0
502 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
503 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
504 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
505 ; GFX803-NEXT: flat_store_dword v[0:1], v0
506 ; GFX803-NEXT: s_waitcnt vmcnt(0)
507 ; GFX803-NEXT: s_setpc_b64 s[30:31]
509 %load = load i8, i8 addrspace(3)* %in
510 %ext = sext i8 %load to i16
511 %bitcast = bitcast i16 %ext to half
512 %build0 = insertelement <2 x half> undef, half %reg, i32 1
513 %build1 = insertelement <2 x half> %build0, half %bitcast, i32 0
514 store <2 x half> %build1, <2 x half> addrspace(1)* undef
518 define void @load_local_lo_v2i16_reghi_vreg_multi_use_lo(i16 addrspace(3)* %in, <2 x i16> %reg) #0 {
519 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo:
520 ; GFX900: ; %bb.0: ; %entry
521 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
522 ; GFX900-NEXT: ds_read_u16 v0, v0
523 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
524 ; GFX900-NEXT: v_mov_b32_e32 v3, 0xffff
525 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
526 ; GFX900-NEXT: ds_write_b16 v2, v0
527 ; GFX900-NEXT: v_bfi_b32 v0, v3, v0, v1
528 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
529 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
530 ; GFX900-NEXT: s_setpc_b64 s[30:31]
532 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo:
533 ; GFX906: ; %bb.0: ; %entry
534 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535 ; GFX906-NEXT: ds_read_u16 v0, v0
536 ; GFX906-NEXT: v_mov_b32_e32 v2, 0
537 ; GFX906-NEXT: v_mov_b32_e32 v3, 0xffff
538 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
539 ; GFX906-NEXT: ds_write_b16 v2, v0
540 ; GFX906-NEXT: v_bfi_b32 v0, v3, v0, v1
541 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
542 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
543 ; GFX906-NEXT: s_setpc_b64 s[30:31]
545 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo:
546 ; GFX803: ; %bb.0: ; %entry
547 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
548 ; GFX803-NEXT: s_mov_b32 m0, -1
549 ; GFX803-NEXT: ds_read_u16 v0, v0
550 ; GFX803-NEXT: v_mov_b32_e32 v2, 0
551 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
552 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
553 ; GFX803-NEXT: ds_write_b16 v2, v0
554 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
555 ; GFX803-NEXT: flat_store_dword v[0:1], v0
556 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
557 ; GFX803-NEXT: s_setpc_b64 s[30:31]
559 %load = load i16, i16 addrspace(3)* %in
560 %elt1 = extractelement <2 x i16> %reg, i32 1
561 store i16 %load, i16 addrspace(3)* null
562 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0
563 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
567 define void @load_local_lo_v2i16_reghi_vreg_multi_use_hi(i16 addrspace(3)* %in, <2 x i16> %reg) #0 {
568 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi:
569 ; GFX900: ; %bb.0: ; %entry
570 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571 ; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v1
572 ; GFX900-NEXT: ds_read_u16_d16 v1, v0
573 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
574 ; GFX900-NEXT: ds_write_b16 v0, v2
575 ; GFX900-NEXT: s_waitcnt lgkmcnt(1)
576 ; GFX900-NEXT: global_store_dword v[0:1], v1, off
577 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
578 ; GFX900-NEXT: s_setpc_b64 s[30:31]
580 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi:
581 ; GFX906: ; %bb.0: ; %entry
582 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
583 ; GFX906-NEXT: ds_read_u16 v0, v0
584 ; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v1
585 ; GFX906-NEXT: v_mov_b32_e32 v3, 0
586 ; GFX906-NEXT: ds_write_b16 v3, v2
587 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
588 ; GFX906-NEXT: s_waitcnt lgkmcnt(1)
589 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
590 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
591 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
592 ; GFX906-NEXT: s_setpc_b64 s[30:31]
594 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi:
595 ; GFX803: ; %bb.0: ; %entry
596 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597 ; GFX803-NEXT: s_mov_b32 m0, -1
598 ; GFX803-NEXT: ds_read_u16 v0, v0
599 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1
600 ; GFX803-NEXT: v_mov_b32_e32 v2, 0
601 ; GFX803-NEXT: ds_write_b16 v2, v1
602 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
603 ; GFX803-NEXT: s_waitcnt lgkmcnt(1)
604 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
605 ; GFX803-NEXT: flat_store_dword v[0:1], v0
606 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
607 ; GFX803-NEXT: s_setpc_b64 s[30:31]
609 %load = load i16, i16 addrspace(3)* %in
610 %elt1 = extractelement <2 x i16> %reg, i32 1
611 store i16 %elt1, i16 addrspace(3)* null
612 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0
613 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
617 define void @load_local_lo_v2i16_reghi_vreg_multi_use_lohi(i16 addrspace(3)* noalias %in, <2 x i16> %reg, i16 addrspace(3)* noalias %out0, i16 addrspace(3)* noalias %out1) #0 {
618 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi:
619 ; GFX900: ; %bb.0: ; %entry
620 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621 ; GFX900-NEXT: ds_read_u16 v0, v0
622 ; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v1
623 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
624 ; GFX900-NEXT: ds_write_b16 v2, v0
625 ; GFX900-NEXT: ds_write_b16 v3, v4
626 ; GFX900-NEXT: v_mov_b32_e32 v2, 0xffff
627 ; GFX900-NEXT: v_bfi_b32 v0, v2, v0, v1
628 ; GFX900-NEXT: global_store_dword v[0:1], v0, off
629 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
630 ; GFX900-NEXT: s_setpc_b64 s[30:31]
632 ; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi:
633 ; GFX906: ; %bb.0: ; %entry
634 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
635 ; GFX906-NEXT: ds_read_u16 v0, v0
636 ; GFX906-NEXT: v_lshrrev_b32_e32 v4, 16, v1
637 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
638 ; GFX906-NEXT: ds_write_b16 v2, v0
639 ; GFX906-NEXT: ds_write_b16 v3, v4
640 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
641 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
642 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
643 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
644 ; GFX906-NEXT: s_setpc_b64 s[30:31]
646 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi:
647 ; GFX803: ; %bb.0: ; %entry
648 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
649 ; GFX803-NEXT: s_mov_b32 m0, -1
650 ; GFX803-NEXT: ds_read_u16 v0, v0
651 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1
652 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
653 ; GFX803-NEXT: ds_write_b16 v2, v0
654 ; GFX803-NEXT: ds_write_b16 v3, v1
655 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
656 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
657 ; GFX803-NEXT: flat_store_dword v[0:1], v0
658 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
659 ; GFX803-NEXT: s_setpc_b64 s[30:31]
661 %load = load i16, i16 addrspace(3)* %in
662 %elt1 = extractelement <2 x i16> %reg, i32 1
663 store i16 %load, i16 addrspace(3)* %out0
664 store i16 %elt1, i16 addrspace(3)* %out1
665 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0
666 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
670 define void @load_global_lo_v2i16_reglo_vreg(i16 addrspace(1)* %in, i32 %reg) #0 {
671 ; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg:
672 ; GFX900: ; %bb.0: ; %entry
673 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
674 ; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
675 ; GFX900-NEXT: s_waitcnt vmcnt(0)
676 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
677 ; GFX900-NEXT: s_waitcnt vmcnt(0)
678 ; GFX900-NEXT: s_setpc_b64 s[30:31]
680 ; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg:
681 ; GFX906: ; %bb.0: ; %entry
682 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
683 ; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094
684 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
685 ; GFX906-NEXT: s_waitcnt vmcnt(0)
686 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
687 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
688 ; GFX906-NEXT: s_waitcnt vmcnt(0)
689 ; GFX906-NEXT: s_setpc_b64 s[30:31]
691 ; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg:
692 ; GFX803: ; %bb.0: ; %entry
693 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
694 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
695 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
696 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
697 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
698 ; GFX803-NEXT: s_waitcnt vmcnt(0)
699 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
700 ; GFX803-NEXT: flat_store_dword v[0:1], v0
701 ; GFX803-NEXT: s_waitcnt vmcnt(0)
702 ; GFX803-NEXT: s_setpc_b64 s[30:31]
704 %reg.bc = bitcast i32 %reg to <2 x i16>
705 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047
706 %load = load i16, i16 addrspace(1)* %gep
707 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
708 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
712 define void @load_global_lo_v2f16_reglo_vreg(half addrspace(1)* %in, i32 %reg) #0 {
713 ; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg:
714 ; GFX900: ; %bb.0: ; %entry
715 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
716 ; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
717 ; GFX900-NEXT: s_waitcnt vmcnt(0)
718 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
719 ; GFX900-NEXT: s_waitcnt vmcnt(0)
720 ; GFX900-NEXT: s_setpc_b64 s[30:31]
722 ; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg:
723 ; GFX906: ; %bb.0: ; %entry
724 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
725 ; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094
726 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
727 ; GFX906-NEXT: s_waitcnt vmcnt(0)
728 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
729 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
730 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
731 ; GFX906-NEXT: s_waitcnt vmcnt(0)
732 ; GFX906-NEXT: s_setpc_b64 s[30:31]
734 ; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg:
735 ; GFX803: ; %bb.0: ; %entry
736 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
737 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
738 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
739 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
740 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
741 ; GFX803-NEXT: s_waitcnt vmcnt(0)
742 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
743 ; GFX803-NEXT: flat_store_dword v[0:1], v0
744 ; GFX803-NEXT: s_waitcnt vmcnt(0)
745 ; GFX803-NEXT: s_setpc_b64 s[30:31]
747 %reg.bc = bitcast i32 %reg to <2 x half>
748 %gep = getelementptr inbounds half, half addrspace(1)* %in, i64 -2047
749 %load = load half, half addrspace(1)* %gep
750 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
751 store <2 x half> %build1, <2 x half> addrspace(1)* undef
755 define void @load_global_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
756 ; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8:
757 ; GFX900: ; %bb.0: ; %entry
758 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
759 ; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095
760 ; GFX900-NEXT: s_waitcnt vmcnt(0)
761 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
762 ; GFX900-NEXT: s_waitcnt vmcnt(0)
763 ; GFX900-NEXT: s_setpc_b64 s[30:31]
765 ; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8:
766 ; GFX906: ; %bb.0: ; %entry
767 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
768 ; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095
769 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
770 ; GFX906-NEXT: s_waitcnt vmcnt(0)
771 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
772 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
773 ; GFX906-NEXT: s_waitcnt vmcnt(0)
774 ; GFX906-NEXT: s_setpc_b64 s[30:31]
776 ; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8:
777 ; GFX803: ; %bb.0: ; %entry
778 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
779 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
780 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
781 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
782 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2
783 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
784 ; GFX803-NEXT: s_waitcnt vmcnt(0)
785 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
786 ; GFX803-NEXT: flat_store_dword v[0:1], v0
787 ; GFX803-NEXT: s_waitcnt vmcnt(0)
788 ; GFX803-NEXT: s_setpc_b64 s[30:31]
790 %reg.bc = bitcast i32 %reg to <2 x i16>
791 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
792 %load = load i8, i8 addrspace(1)* %gep
793 %ext = zext i8 %load to i16
794 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
795 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
799 define void @load_global_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
800 ; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8:
801 ; GFX900: ; %bb.0: ; %entry
802 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
803 ; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095
804 ; GFX900-NEXT: s_waitcnt vmcnt(0)
805 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
806 ; GFX900-NEXT: s_waitcnt vmcnt(0)
807 ; GFX900-NEXT: s_setpc_b64 s[30:31]
809 ; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8:
810 ; GFX906: ; %bb.0: ; %entry
811 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
812 ; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095
813 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
814 ; GFX906-NEXT: s_waitcnt vmcnt(0)
815 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
816 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
817 ; GFX906-NEXT: s_waitcnt vmcnt(0)
818 ; GFX906-NEXT: s_setpc_b64 s[30:31]
820 ; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8:
821 ; GFX803: ; %bb.0: ; %entry
822 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
823 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
824 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
825 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
826 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
827 ; GFX803-NEXT: s_waitcnt vmcnt(0)
828 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
829 ; GFX803-NEXT: flat_store_dword v[0:1], v0
830 ; GFX803-NEXT: s_waitcnt vmcnt(0)
831 ; GFX803-NEXT: s_setpc_b64 s[30:31]
833 %reg.bc = bitcast i32 %reg to <2 x i16>
834 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
835 %load = load i8, i8 addrspace(1)* %gep
836 %ext = sext i8 %load to i16
837 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
838 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
842 define void @load_global_lo_v2f16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
843 ; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8:
844 ; GFX900: ; %bb.0: ; %entry
845 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
846 ; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095
847 ; GFX900-NEXT: s_waitcnt vmcnt(0)
848 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
849 ; GFX900-NEXT: s_waitcnt vmcnt(0)
850 ; GFX900-NEXT: s_setpc_b64 s[30:31]
852 ; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8:
853 ; GFX906: ; %bb.0: ; %entry
854 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
855 ; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095
856 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
857 ; GFX906-NEXT: s_waitcnt vmcnt(0)
858 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
859 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
860 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
861 ; GFX906-NEXT: s_waitcnt vmcnt(0)
862 ; GFX906-NEXT: s_setpc_b64 s[30:31]
864 ; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8:
865 ; GFX803: ; %bb.0: ; %entry
866 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
867 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
868 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
869 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
870 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2
871 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
872 ; GFX803-NEXT: s_waitcnt vmcnt(0)
873 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
874 ; GFX803-NEXT: flat_store_dword v[0:1], v0
875 ; GFX803-NEXT: s_waitcnt vmcnt(0)
876 ; GFX803-NEXT: s_setpc_b64 s[30:31]
878 %reg.bc = bitcast i32 %reg to <2 x half>
879 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
880 %load = load i8, i8 addrspace(1)* %gep
881 %ext = zext i8 %load to i16
882 %bitcast = bitcast i16 %ext to half
883 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
884 store <2 x half> %build1, <2 x half> addrspace(1)* undef
888 define void @load_global_lo_v2f16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
889 ; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8:
890 ; GFX900: ; %bb.0: ; %entry
891 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
892 ; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095
893 ; GFX900-NEXT: s_waitcnt vmcnt(0)
894 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
895 ; GFX900-NEXT: s_waitcnt vmcnt(0)
896 ; GFX900-NEXT: s_setpc_b64 s[30:31]
898 ; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8:
899 ; GFX906: ; %bb.0: ; %entry
900 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
901 ; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095
902 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
903 ; GFX906-NEXT: s_waitcnt vmcnt(0)
904 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
905 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
906 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
907 ; GFX906-NEXT: s_waitcnt vmcnt(0)
908 ; GFX906-NEXT: s_setpc_b64 s[30:31]
910 ; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8:
911 ; GFX803: ; %bb.0: ; %entry
912 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
914 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
915 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
916 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
917 ; GFX803-NEXT: s_waitcnt vmcnt(0)
918 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
919 ; GFX803-NEXT: flat_store_dword v[0:1], v0
920 ; GFX803-NEXT: s_waitcnt vmcnt(0)
921 ; GFX803-NEXT: s_setpc_b64 s[30:31]
923 %reg.bc = bitcast i32 %reg to <2 x half>
924 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
925 %load = load i8, i8 addrspace(1)* %gep
926 %ext = sext i8 %load to i16
927 %bitcast = bitcast i16 %ext to half
928 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
929 store <2 x half> %build1, <2 x half> addrspace(1)* undef
933 define void @load_flat_lo_v2i16_reghi_vreg(i16* %in, i32 %reg) #0 {
934 ; GFX900-LABEL: load_flat_lo_v2i16_reghi_vreg:
935 ; GFX900: ; %bb.0: ; %entry
936 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
937 ; GFX900-NEXT: flat_load_short_d16 v2, v[0:1]
938 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
939 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
940 ; GFX900-NEXT: s_waitcnt vmcnt(0)
941 ; GFX900-NEXT: s_setpc_b64 s[30:31]
943 ; GFX906-LABEL: load_flat_lo_v2i16_reghi_vreg:
944 ; GFX906: ; %bb.0: ; %entry
945 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
946 ; GFX906-NEXT: flat_load_ushort v0, v[0:1]
947 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
948 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
949 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
950 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
951 ; GFX906-NEXT: s_waitcnt vmcnt(0)
952 ; GFX906-NEXT: s_setpc_b64 s[30:31]
954 ; GFX803-LABEL: load_flat_lo_v2i16_reghi_vreg:
955 ; GFX803: ; %bb.0: ; %entry
956 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
957 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
958 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
959 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
960 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
961 ; GFX803-NEXT: flat_store_dword v[0:1], v0
962 ; GFX803-NEXT: s_waitcnt vmcnt(0)
963 ; GFX803-NEXT: s_setpc_b64 s[30:31]
965 %reg.bc = bitcast i32 %reg to <2 x i16>
966 %load = load i16, i16* %in
967 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
968 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
972 define void @load_flat_lo_v2f16_reghi_vreg(half* %in, i32 %reg) #0 {
973 ; GFX900-LABEL: load_flat_lo_v2f16_reghi_vreg:
974 ; GFX900: ; %bb.0: ; %entry
975 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
976 ; GFX900-NEXT: flat_load_short_d16 v2, v[0:1]
977 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
978 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
979 ; GFX900-NEXT: s_waitcnt vmcnt(0)
980 ; GFX900-NEXT: s_setpc_b64 s[30:31]
982 ; GFX906-LABEL: load_flat_lo_v2f16_reghi_vreg:
983 ; GFX906: ; %bb.0: ; %entry
984 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985 ; GFX906-NEXT: flat_load_ushort v0, v[0:1]
986 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
987 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
988 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
989 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
990 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
991 ; GFX906-NEXT: s_waitcnt vmcnt(0)
992 ; GFX906-NEXT: s_setpc_b64 s[30:31]
994 ; GFX803-LABEL: load_flat_lo_v2f16_reghi_vreg:
995 ; GFX803: ; %bb.0: ; %entry
996 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
997 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
998 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
999 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1000 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1001 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1002 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1003 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1005 ; FIXME: the and above should be removable
1007 %reg.bc = bitcast i32 %reg to <2 x half>
1008 %load = load half, half* %in
1009 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
1010 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1014 define void @load_flat_lo_v2i16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 {
1015 ; GFX900-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8:
1016 ; GFX900: ; %bb.0: ; %entry
1017 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1018 ; GFX900-NEXT: flat_load_ubyte_d16 v2, v[0:1]
1019 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1020 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1021 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1022 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1024 ; GFX906-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8:
1025 ; GFX906: ; %bb.0: ; %entry
1026 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1027 ; GFX906-NEXT: flat_load_ubyte v0, v[0:1]
1028 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
1029 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1030 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
1031 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1032 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1033 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1035 ; GFX803-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8:
1036 ; GFX803: ; %bb.0: ; %entry
1037 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1038 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
1039 ; GFX803-NEXT: v_lshrrev_b32_e32 v2, 16, v2
1040 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1041 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1042 ; GFX803-NEXT: v_perm_b32 v0, v2, v0, s4
1043 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1044 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1045 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1047 %reg.bc = bitcast i32 %reg to <2 x i16>
1048 %load = load i8, i8* %in
1049 %ext = zext i8 %load to i16
1050 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1051 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1055 define void @load_flat_lo_v2i16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 {
1056 ; GFX900-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8:
1057 ; GFX900: ; %bb.0: ; %entry
1058 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1059 ; GFX900-NEXT: flat_load_sbyte_d16 v2, v[0:1]
1060 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1061 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1062 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1063 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1065 ; GFX906-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8:
1066 ; GFX906: ; %bb.0: ; %entry
1067 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1068 ; GFX906-NEXT: flat_load_sbyte v0, v[0:1]
1069 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
1070 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1071 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
1072 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1073 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1074 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1076 ; GFX803-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8:
1077 ; GFX803: ; %bb.0: ; %entry
1078 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1079 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
1080 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1081 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1082 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1083 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1084 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1085 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1087 %reg.bc = bitcast i32 %reg to <2 x i16>
1088 %load = load i8, i8* %in
1089 %ext = sext i8 %load to i16
1090 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1091 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1095 define void @load_flat_lo_v2f16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 {
1096 ; GFX900-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8:
1097 ; GFX900: ; %bb.0: ; %entry
1098 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1099 ; GFX900-NEXT: flat_load_ubyte_d16 v2, v[0:1]
1100 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1101 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1102 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1103 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1105 ; GFX906-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8:
1106 ; GFX906: ; %bb.0: ; %entry
1107 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1108 ; GFX906-NEXT: flat_load_ubyte v0, v[0:1]
1109 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1110 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1111 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1112 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1113 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1114 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1115 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1117 ; GFX803-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8:
1118 ; GFX803: ; %bb.0: ; %entry
1119 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1120 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
1121 ; GFX803-NEXT: v_lshrrev_b32_e32 v2, 16, v2
1122 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1123 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1124 ; GFX803-NEXT: v_perm_b32 v0, v2, v0, s4
1125 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1126 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1127 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1129 %reg.bc = bitcast i32 %reg to <2 x half>
1130 %load = load i8, i8* %in
1131 %ext = zext i8 %load to i16
1132 %bitcast = bitcast i16 %ext to half
1133 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
1134 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1138 define void @load_flat_lo_v2f16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 {
1139 ; GFX900-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8:
1140 ; GFX900: ; %bb.0: ; %entry
1141 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1142 ; GFX900-NEXT: flat_load_sbyte_d16 v2, v[0:1]
1143 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1144 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1145 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1146 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1148 ; GFX906-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8:
1149 ; GFX906: ; %bb.0: ; %entry
1150 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1151 ; GFX906-NEXT: flat_load_sbyte v0, v[0:1]
1152 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1153 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1154 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1155 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1156 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1157 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1158 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1160 ; GFX803-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8:
1161 ; GFX803: ; %bb.0: ; %entry
1162 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1163 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
1164 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1165 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1166 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1167 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1168 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1169 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1171 %reg.bc = bitcast i32 %reg to <2 x half>
1172 %load = load i8, i8* %in
1173 %ext = sext i8 %load to i16
1174 %bitcast = bitcast i16 %ext to half
1175 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
1176 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1180 define void @load_private_lo_v2i16_reglo_vreg(i16 addrspace(5)* byval(i16) %in, i32 %reg) #0 {
1181 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg:
1182 ; GFX900-MUBUF: ; %bb.0: ; %entry
1183 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1184 ; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094
1185 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1186 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
1187 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1188 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1190 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg:
1191 ; GFX906: ; %bb.0: ; %entry
1192 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1193 ; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1194 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1195 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1196 ; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
1197 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1198 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1199 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1201 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg:
1202 ; GFX803: ; %bb.0: ; %entry
1203 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1204 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1205 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1206 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1207 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1208 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1209 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1210 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1212 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg:
1213 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1214 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1215 ; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, s32 offset:4094
1216 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1217 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
1218 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1219 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1221 %reg.bc = bitcast i32 %reg to <2 x i16>
1222 %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2047
1223 %load = load i16, i16 addrspace(5)* %gep
1224 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1225 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1229 define void @load_private_lo_v2i16_reghi_vreg(i16 addrspace(5)* byval(i16) %in, i16 %reg) #0 {
1230 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reghi_vreg:
1231 ; GFX900-MUBUF: ; %bb.0: ; %entry
1232 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1233 ; GFX900-MUBUF-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1234 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1235 ; GFX900-MUBUF-NEXT: v_and_b32_e32 v1, 0xffff, v1
1236 ; GFX900-MUBUF-NEXT: v_lshl_or_b32 v0, v0, 16, v1
1237 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
1238 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1239 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1241 ; GFX906-LABEL: load_private_lo_v2i16_reghi_vreg:
1242 ; GFX906: ; %bb.0: ; %entry
1243 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1244 ; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1245 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1246 ; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
1247 ; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1
1248 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1249 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1250 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1252 ; GFX803-LABEL: load_private_lo_v2i16_reghi_vreg:
1253 ; GFX803: ; %bb.0: ; %entry
1254 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1255 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1256 ; GFX803-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1257 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1258 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1259 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1260 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1261 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1263 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reghi_vreg:
1264 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1265 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1266 ; GFX900-FLATSCR-NEXT: scratch_load_ushort v1, off, s32 offset:4094
1267 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1268 ; GFX900-FLATSCR-NEXT: v_and_b32_e32 v1, 0xffff, v1
1269 ; GFX900-FLATSCR-NEXT: v_lshl_or_b32 v0, v0, 16, v1
1270 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
1271 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1272 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1274 %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2047
1275 %load = load i16, i16 addrspace(5)* %gep
1276 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
1277 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
1278 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1282 define void @load_private_lo_v2f16_reglo_vreg(half addrspace(5)* byval(half) %in, i32 %reg) #0 {
1283 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg:
1284 ; GFX900-MUBUF: ; %bb.0: ; %entry
1285 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1286 ; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094
1287 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1288 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
1289 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1290 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1292 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg:
1293 ; GFX906: ; %bb.0: ; %entry
1294 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1295 ; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1296 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1297 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1298 ; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
1299 ; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1
1300 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1301 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1302 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1304 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg:
1305 ; GFX803: ; %bb.0: ; %entry
1306 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1307 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1308 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1309 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1310 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1311 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1312 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1313 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1315 ; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg:
1316 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1317 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1318 ; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, s32 offset:4094
1319 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1320 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
1321 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1322 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1324 %reg.bc = bitcast i32 %reg to <2 x half>
1325 %gep = getelementptr inbounds half, half addrspace(5)* %in, i64 2047
1326 %load = load half, half addrspace(5)* %gep
1327 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
1328 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1332 define void @load_private_lo_v2i16_reglo_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 {
1333 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff:
1334 ; GFX900-MUBUF: ; %bb.0: ; %entry
1335 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1336 ; GFX900-MUBUF-NEXT: buffer_load_short_d16 v1, off, s[0:3], 0 offset:4094 glc
1337 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1338 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off
1339 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1340 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1342 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff:
1343 ; GFX906: ; %bb.0: ; %entry
1344 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1345 ; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc
1346 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1347 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1348 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
1349 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1350 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1351 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1353 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff:
1354 ; GFX803: ; %bb.0: ; %entry
1355 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356 ; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc
1357 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1358 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1359 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1360 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1361 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1362 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1364 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_nooff:
1365 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1366 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367 ; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe
1368 ; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v1, off, s0 glc
1369 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1370 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off
1371 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1372 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1374 %reg.bc = bitcast i32 %reg to <2 x i16>
1375 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*)
1376 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1377 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1381 define void @load_private_lo_v2i16_reghi_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 {
1382 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reghi_vreg_nooff:
1383 ; GFX900-MUBUF: ; %bb.0: ; %entry
1384 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1385 ; GFX900-MUBUF-NEXT: buffer_load_short_d16 v1, off, s[0:3], 0 offset:4094 glc
1386 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1387 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off
1388 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1389 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1391 ; GFX906-LABEL: load_private_lo_v2i16_reghi_vreg_nooff:
1392 ; GFX906: ; %bb.0: ; %entry
1393 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1394 ; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc
1395 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1396 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1397 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
1398 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1399 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1400 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1402 ; GFX803-LABEL: load_private_lo_v2i16_reghi_vreg_nooff:
1403 ; GFX803: ; %bb.0: ; %entry
1404 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1405 ; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc
1406 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1407 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1408 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1409 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1410 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1411 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1413 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reghi_vreg_nooff:
1414 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1415 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1416 ; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe
1417 ; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v1, off, s0 glc
1418 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1419 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off
1420 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1421 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1423 %reg.bc = bitcast i32 %reg to <2 x i16>
1424 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*)
1425 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1426 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1430 define void @load_private_lo_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, i32 %reg) #0 {
1431 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_nooff:
1432 ; GFX900-MUBUF: ; %bb.0: ; %entry
1433 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1434 ; GFX900-MUBUF-NEXT: buffer_load_short_d16 v1, off, s[0:3], 0 offset:4094 glc
1435 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1436 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off
1437 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1438 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1440 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_nooff:
1441 ; GFX906: ; %bb.0: ; %entry
1442 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443 ; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc
1444 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1445 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1446 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1447 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1448 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1449 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1450 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1452 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_nooff:
1453 ; GFX803: ; %bb.0: ; %entry
1454 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1455 ; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc
1456 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1457 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1458 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1459 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1460 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1461 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1463 ; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_nooff:
1464 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1465 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1466 ; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe
1467 ; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v1, off, s0 glc
1468 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1469 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off
1470 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1471 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1473 %reg.bc = bitcast i32 %reg to <2 x half>
1474 %load = load volatile half, half addrspace(5)* inttoptr (i32 4094 to half addrspace(5)*)
1475 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
1476 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1480 define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* byval(i8) %in, i32 %reg) #0 {
1481 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8:
1482 ; GFX900-MUBUF: ; %bb.0: ; %entry
1483 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1484 ; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095
1485 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1486 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
1487 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1488 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1490 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8:
1491 ; GFX906: ; %bb.0: ; %entry
1492 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1493 ; GFX906-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095
1494 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1495 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1496 ; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
1497 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1498 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1499 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1501 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8:
1502 ; GFX803: ; %bb.0: ; %entry
1503 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1504 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095
1505 ; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1506 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1507 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1508 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
1509 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1510 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1511 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1513 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8:
1514 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1515 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1516 ; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s32 offset:4095
1517 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1518 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
1519 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1520 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1522 %reg.bc = bitcast i32 %reg to <2 x i16>
1523 %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4095
1524 %load = load i8, i8 addrspace(5)* %gep
1525 %ext = zext i8 %load to i16
1526 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1527 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1531 define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* byval(i8) %in, i32 %reg) #0 {
1532 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8:
1533 ; GFX900-MUBUF: ; %bb.0: ; %entry
1534 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1535 ; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095
1536 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1537 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
1538 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1539 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1541 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8:
1542 ; GFX906: ; %bb.0: ; %entry
1543 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1544 ; GFX906-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095
1545 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1546 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1547 ; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
1548 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1549 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1550 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1552 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8:
1553 ; GFX803: ; %bb.0: ; %entry
1554 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1555 ; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095
1556 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1557 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1558 ; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1559 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1560 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1561 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1563 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8:
1564 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1565 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1566 ; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s32 offset:4095
1567 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1568 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
1569 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1570 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1572 %reg.bc = bitcast i32 %reg to <2 x i16>
1573 %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4095
1574 %load = load i8, i8 addrspace(5)* %gep
1575 %ext = sext i8 %load to i16
1576 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1577 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1581 define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
1582 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
1583 ; GFX900-MUBUF: ; %bb.0: ; %entry
1584 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1585 ; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], 0 offset:4094 glc
1586 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1587 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off
1588 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1589 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1591 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
1592 ; GFX906: ; %bb.0: ; %entry
1593 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1594 ; GFX906-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4094 glc
1595 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1596 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1597 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
1598 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1599 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1600 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1602 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
1603 ; GFX803: ; %bb.0: ; %entry
1604 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1605 ; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1606 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:4094 glc
1607 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1608 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1609 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
1610 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1611 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1612 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1614 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
1615 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1616 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1617 ; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe
1618 ; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v1, off, s0 glc
1619 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1620 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off
1621 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1622 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1624 %reg.bc = bitcast i32 %reg to <2 x i16>
1625 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
1626 %ext = zext i8 %load to i16
1627 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1628 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1632 define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
1633 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
1634 ; GFX900-MUBUF: ; %bb.0: ; %entry
1635 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1636 ; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v1, off, s[0:3], 0 offset:4094 glc
1637 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1638 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off
1639 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1640 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1642 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
1643 ; GFX906: ; %bb.0: ; %entry
1644 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1645 ; GFX906-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 offset:4094 glc
1646 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1647 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1648 ; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1
1649 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1650 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1651 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1653 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
1654 ; GFX803: ; %bb.0: ; %entry
1655 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1656 ; GFX803-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 offset:4094 glc
1657 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1658 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1659 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1660 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1661 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1662 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1664 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
1665 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1666 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1667 ; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe
1668 ; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v1, off, s0 glc
1669 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1670 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off
1671 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1672 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1674 %reg.bc = bitcast i32 %reg to <2 x i16>
1675 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
1676 %ext = sext i8 %load to i16
1677 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
1678 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1682 define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
1683 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
1684 ; GFX900-MUBUF: ; %bb.0: ; %entry
1685 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1686 ; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], 0 offset:4094 glc
1687 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1688 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off
1689 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1690 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1692 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
1693 ; GFX906: ; %bb.0: ; %entry
1694 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1695 ; GFX906-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4094 glc
1696 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1697 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1698 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1699 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1700 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1701 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1702 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1704 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
1705 ; GFX803: ; %bb.0: ; %entry
1706 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1707 ; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1708 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:4094 glc
1709 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1710 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1711 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
1712 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1713 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1714 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1716 ; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
1717 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1718 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1719 ; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe
1720 ; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v1, off, s0 glc
1721 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1722 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off
1723 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1724 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1726 %reg.bc = bitcast i32 %reg to <2 x half>
1727 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
1728 %ext = zext i8 %load to i16
1729 %bc.ext = bitcast i16 %ext to half
1730 %build1 = insertelement <2 x half> %reg.bc, half %bc.ext, i32 0
1731 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1735 define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(4)* %in, i32 %reg) #0 {
1736 ; GFX900-LABEL: load_constant_lo_v2i16_reglo_vreg:
1737 ; GFX900: ; %bb.0: ; %entry
1738 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1739 ; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
1740 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1741 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1742 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1743 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1745 ; GFX906-LABEL: load_constant_lo_v2i16_reglo_vreg:
1746 ; GFX906: ; %bb.0: ; %entry
1747 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1748 ; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094
1749 ; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
1750 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1751 ; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2
1752 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1753 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1754 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1756 ; GFX803-LABEL: load_constant_lo_v2i16_reglo_vreg:
1757 ; GFX803: ; %bb.0: ; %entry
1758 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1759 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
1760 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1761 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
1762 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1763 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1764 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1765 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1766 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1767 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1769 %reg.bc = bitcast i32 %reg to <2 x i16>
1770 %gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 -2047
1771 %load = load i16, i16 addrspace(4)* %gep
1772 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1773 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1777 define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(4)* %in, i32 %reg) #0 {
1778 ; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg:
1779 ; GFX900: ; %bb.0: ; %entry
1780 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1781 ; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
1782 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1783 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1784 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1785 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1787 ; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg:
1788 ; GFX906: ; %bb.0: ; %entry
1789 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1790 ; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094
1791 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1792 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1793 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1794 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1795 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1796 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1797 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1799 ; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg:
1800 ; GFX803: ; %bb.0: ; %entry
1801 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1802 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
1803 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1804 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
1805 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1806 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1807 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1808 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1809 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1810 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1812 %reg.bc = bitcast i32 %reg to <2 x half>
1813 %gep = getelementptr inbounds half, half addrspace(4)* %in, i64 -2047
1814 %load = load half, half addrspace(4)* %gep
1815 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
1816 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1820 define void @load_constant_lo_v2f16_reglo_vreg_zexti8(i8 addrspace(4)* %in, i32 %reg) #0 {
1821 ; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8:
1822 ; GFX900: ; %bb.0: ; %entry
1823 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1824 ; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095
1825 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1826 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1827 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1828 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1830 ; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8:
1831 ; GFX906: ; %bb.0: ; %entry
1832 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1833 ; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095
1834 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1835 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1836 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1837 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1838 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1839 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1840 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1842 ; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8:
1843 ; GFX803: ; %bb.0: ; %entry
1844 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1845 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
1846 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1847 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
1848 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1849 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1850 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1851 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
1852 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1853 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1854 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1856 %reg.bc = bitcast i32 %reg to <2 x half>
1857 %gep = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 -4095
1858 %load = load i8, i8 addrspace(4)* %gep
1859 %ext = zext i8 %load to i16
1860 %bitcast = bitcast i16 %ext to half
1861 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
1862 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1866 define void @load_constant_lo_v2f16_reglo_vreg_sexti8(i8 addrspace(4)* %in, i32 %reg) #0 {
1867 ; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8:
1868 ; GFX900: ; %bb.0: ; %entry
1869 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1870 ; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095
1871 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1872 ; GFX900-NEXT: global_store_dword v[0:1], v2, off
1873 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1874 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1876 ; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8:
1877 ; GFX906: ; %bb.0: ; %entry
1878 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1879 ; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095
1880 ; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1881 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1882 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
1883 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1884 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1885 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1886 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1888 ; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8:
1889 ; GFX803: ; %bb.0: ; %entry
1890 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1891 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
1892 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1893 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
1894 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1895 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1896 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1897 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1898 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1899 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1901 %reg.bc = bitcast i32 %reg to <2 x half>
1902 %gep = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 -4095
1903 %load = load i8, i8 addrspace(4)* %gep
1904 %ext = sext i8 %load to i16
1905 %bitcast = bitcast i16 %ext to half
1906 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
1907 store <2 x half> %build1, <2 x half> addrspace(1)* undef
1911 define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 {
1912 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset:
1913 ; GFX900-MUBUF: ; %bb.0: ; %entry
1914 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1915 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
1916 ; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32
1917 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1918 ; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094 glc
1919 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1920 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
1921 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1922 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1924 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset:
1925 ; GFX906: ; %bb.0: ; %entry
1926 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1927 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
1928 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
1929 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1930 ; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 glc
1931 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1932 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1933 ; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
1934 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
1935 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1936 ; GFX906-NEXT: s_setpc_b64 s[30:31]
1938 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset:
1939 ; GFX803: ; %bb.0: ; %entry
1940 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1941 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
1942 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
1943 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1944 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 glc
1945 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1946 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1947 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1948 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1949 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1950 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1952 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset:
1953 ; GFX900-FLATSCR: ; %bb.0: ; %entry
1954 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1955 ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
1956 ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32
1957 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1958 ; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, s32 offset:4094 glc
1959 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1960 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
1961 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
1962 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
1964 %obj0 = alloca [10 x i32], align 4, addrspace(5)
1965 %obj1 = alloca [4096 x i16], align 2, addrspace(5)
1966 %reg.bc = bitcast i32 %reg to <2 x i16>
1967 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
1968 store volatile i32 123, i32 addrspace(5)* %bc
1969 %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2027
1970 %load = load volatile i16, i16 addrspace(5)* %gep
1971 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
1972 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
1976 define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
1977 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
1978 ; GFX900-MUBUF: ; %bb.0: ; %entry
1979 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1980 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
1981 ; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32
1982 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1983 ; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095 glc
1984 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1985 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
1986 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
1987 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
1989 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
1990 ; GFX906: ; %bb.0: ; %entry
1991 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1992 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
1993 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
1994 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1995 ; GFX906-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 glc
1996 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1997 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
1998 ; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
1999 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
2000 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2001 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2003 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
2004 ; GFX803: ; %bb.0: ; %entry
2005 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2006 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2007 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
2008 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2009 ; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 glc
2010 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2011 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2012 ; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2013 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2014 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2015 ; GFX803-NEXT: s_setpc_b64 s[30:31]
2017 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
2018 ; GFX900-FLATSCR: ; %bb.0: ; %entry
2019 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2020 ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
2021 ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32
2022 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2023 ; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s32 offset:4095 glc
2024 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2025 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
2026 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2027 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
2029 %obj0 = alloca [10 x i32], align 4, addrspace(5)
2030 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
2031 %reg.bc = bitcast i32 %reg to <2 x i16>
2032 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
2033 store volatile i32 123, i32 addrspace(5)* %bc
2034 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055
2035 %load = load volatile i8, i8 addrspace(5)* %gep
2036 %load.ext = sext i8 %load to i16
2037 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
2038 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
2042 define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
2043 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
2044 ; GFX900-MUBUF: ; %bb.0: ; %entry
2045 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2046 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
2047 ; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32
2048 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2049 ; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095 glc
2050 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2051 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
2052 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2053 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
2055 ; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
2056 ; GFX906: ; %bb.0: ; %entry
2057 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2058 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
2059 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
2060 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2061 ; GFX906-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 glc
2062 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2063 ; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
2064 ; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
2065 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
2066 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2067 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2069 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
2070 ; GFX803: ; %bb.0: ; %entry
2071 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2072 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2073 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
2074 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2075 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 glc
2076 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2077 ; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2078 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
2079 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
2080 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2081 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2082 ; GFX803-NEXT: s_setpc_b64 s[30:31]
2084 ; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
2085 ; GFX900-FLATSCR: ; %bb.0: ; %entry
2086 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2087 ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
2088 ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32
2089 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2090 ; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s32 offset:4095 glc
2091 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2092 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
2093 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2094 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
2096 %obj0 = alloca [10 x i32], align 4, addrspace(5)
2097 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
2098 %reg.bc = bitcast i32 %reg to <2 x i16>
2099 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
2100 store volatile i32 123, i32 addrspace(5)* %bc
2101 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055
2102 %load = load volatile i8, i8 addrspace(5)* %gep
2103 %load.ext = zext i8 %load to i16
2104 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
2105 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
2109 define void @load_private_lo_v2f16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
2110 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
2111 ; GFX900-MUBUF: ; %bb.0: ; %entry
2112 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2113 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
2114 ; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32
2115 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2116 ; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095 glc
2117 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2118 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
2119 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2120 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
2122 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
2123 ; GFX906: ; %bb.0: ; %entry
2124 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2125 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
2126 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
2127 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2128 ; GFX906-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 glc
2129 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2130 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2131 ; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
2132 ; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1
2133 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
2134 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2135 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2137 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
2138 ; GFX803: ; %bb.0: ; %entry
2139 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2140 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2141 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
2142 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2143 ; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 glc
2144 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2145 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2146 ; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2147 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2148 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2149 ; GFX803-NEXT: s_setpc_b64 s[30:31]
2151 ; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
2152 ; GFX900-FLATSCR: ; %bb.0: ; %entry
2153 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2154 ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
2155 ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32
2156 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2157 ; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s32 offset:4095 glc
2158 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2159 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
2160 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2161 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
2163 %obj0 = alloca [10 x i32], align 4, addrspace(5)
2164 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
2165 %reg.bc = bitcast i32 %reg to <2 x half>
2166 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
2167 store volatile i32 123, i32 addrspace(5)* %bc
2168 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055
2169 %load = load volatile i8, i8 addrspace(5)* %gep
2170 %load.ext = sext i8 %load to i16
2171 %bitcast = bitcast i16 %load.ext to half
2172 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
2173 store <2 x half> %build1, <2 x half> addrspace(1)* undef
2177 define void @load_private_lo_v2f16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
2178 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
2179 ; GFX900-MUBUF: ; %bb.0: ; %entry
2180 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2181 ; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
2182 ; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32
2183 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2184 ; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095 glc
2185 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2186 ; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
2187 ; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
2188 ; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31]
2190 ; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
2191 ; GFX906: ; %bb.0: ; %entry
2192 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2193 ; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
2194 ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
2195 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2196 ; GFX906-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 glc
2197 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2198 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2199 ; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
2200 ; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1
2201 ; GFX906-NEXT: global_store_dword v[0:1], v0, off
2202 ; GFX906-NEXT: s_waitcnt vmcnt(0)
2203 ; GFX906-NEXT: s_setpc_b64 s[30:31]
2205 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
2206 ; GFX803: ; %bb.0: ; %entry
2207 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2208 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2209 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
2210 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2211 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 glc
2212 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2213 ; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2214 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
2215 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
2216 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2217 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2218 ; GFX803-NEXT: s_setpc_b64 s[30:31]
2220 ; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
2221 ; GFX900-FLATSCR: ; %bb.0: ; %entry
2222 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2223 ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
2224 ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32
2225 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2226 ; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s32 offset:4095 glc
2227 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2228 ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
2229 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
2230 ; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31]
2232 %obj0 = alloca [10 x i32], align 4, addrspace(5)
2233 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
2234 %reg.bc = bitcast i32 %reg to <2 x half>
2235 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
2236 store volatile i32 123, i32 addrspace(5)* %bc
2237 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055
2238 %load = load volatile i8, i8 addrspace(5)* %gep
2239 %load.ext = zext i8 %load to i16
2240 %bitcast = bitcast i16 %load.ext to half
2241 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0
2242 store <2 x half> %build1, <2 x half> addrspace(1)* undef
2246 attributes #0 = { nounwind }