1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
8 define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
9 ; GCN-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
11 ; GCN-NEXT: s_load_dword s0, s[2:3], 0x0
12 ; GCN-NEXT: s_and_b32 s1, s4, 3
13 ; GCN-NEXT: s_lshl_b32 s1, s1, 3
14 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
15 ; GCN-NEXT: s_lshr_b32 s0, s0, s1
16 ; GCN-NEXT: ; return to shader part epilog
18 ; GFX10-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
20 ; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0
21 ; GFX10-NEXT: s_and_b32 s1, s4, 3
22 ; GFX10-NEXT: s_lshl_b32 s1, s1, 3
23 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
24 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1
25 ; GFX10-NEXT: ; return to shader part epilog
27 ; GFX11-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
29 ; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0
30 ; GFX11-NEXT: s_and_b32 s1, s4, 3
31 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
32 ; GFX11-NEXT: s_lshl_b32 s1, s1, 3
33 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
34 ; GFX11-NEXT: s_lshr_b32 s0, s0, s1
35 ; GFX11-NEXT: ; return to shader part epilog
36 %vector = load <4 x i8>, ptr addrspace(4) %ptr
37 %element = extractelement <4 x i8> %vector, i32 %idx
41 define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
42 ; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
44 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
45 ; GFX9-NEXT: s_and_b32 s0, s2, 3
46 ; GFX9-NEXT: s_lshl_b32 s0, s0, 3
47 ; GFX9-NEXT: s_waitcnt vmcnt(0)
48 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0
49 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
50 ; GFX9-NEXT: ; return to shader part epilog
52 ; GFX8-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
54 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
55 ; GFX8-NEXT: s_and_b32 s0, s2, 3
56 ; GFX8-NEXT: s_lshl_b32 s0, s0, 3
57 ; GFX8-NEXT: s_waitcnt vmcnt(0)
58 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0
59 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
60 ; GFX8-NEXT: ; return to shader part epilog
62 ; GFX7-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
64 ; GFX7-NEXT: s_mov_b32 s6, 0
65 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
66 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
67 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
68 ; GFX7-NEXT: s_and_b32 s0, s2, 3
69 ; GFX7-NEXT: s_lshl_b32 s0, s0, 3
70 ; GFX7-NEXT: s_waitcnt vmcnt(0)
71 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0
72 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
73 ; GFX7-NEXT: ; return to shader part epilog
75 ; GFX10-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
77 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
78 ; GFX10-NEXT: s_and_b32 s0, s2, 3
79 ; GFX10-NEXT: s_lshl_b32 s0, s0, 3
80 ; GFX10-NEXT: s_waitcnt vmcnt(0)
81 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, s0, v0
82 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
83 ; GFX10-NEXT: ; return to shader part epilog
85 ; GFX11-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
87 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
88 ; GFX11-NEXT: s_and_b32 s0, s2, 3
89 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
90 ; GFX11-NEXT: s_lshl_b32 s0, s0, 3
91 ; GFX11-NEXT: s_waitcnt vmcnt(0)
92 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, s0, v0
93 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
94 ; GFX11-NEXT: ; return to shader part epilog
95 %vector = load <4 x i8>, ptr addrspace(1) %ptr
96 %element = extractelement <4 x i8> %vector, i32 %idx
100 define i8 @extractelement_vgpr_v4i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
101 ; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
103 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
105 ; GFX9-NEXT: v_and_b32_e32 v1, 3, v2
106 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v1
107 ; GFX9-NEXT: s_waitcnt vmcnt(0)
108 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0
109 ; GFX9-NEXT: s_setpc_b64 s[30:31]
111 ; GFX8-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
113 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
115 ; GFX8-NEXT: v_and_b32_e32 v1, 3, v2
116 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v1
117 ; GFX8-NEXT: s_waitcnt vmcnt(0)
118 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0
119 ; GFX8-NEXT: s_setpc_b64 s[30:31]
121 ; GFX7-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
123 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124 ; GFX7-NEXT: s_mov_b32 s6, 0
125 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
126 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
127 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
128 ; GFX7-NEXT: v_and_b32_e32 v1, 3, v2
129 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v1
130 ; GFX7-NEXT: s_waitcnt vmcnt(0)
131 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0
132 ; GFX7-NEXT: s_setpc_b64 s[30:31]
134 ; GFX10-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
136 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
137 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
138 ; GFX10-NEXT: v_and_b32_e32 v1, 3, v2
139 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v1
140 ; GFX10-NEXT: s_waitcnt vmcnt(0)
141 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0
142 ; GFX10-NEXT: s_setpc_b64 s[30:31]
144 ; GFX11-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
146 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
148 ; GFX11-NEXT: v_and_b32_e32 v1, 3, v2
149 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
150 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 3, v1
151 ; GFX11-NEXT: s_waitcnt vmcnt(0)
152 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v1, v0
153 ; GFX11-NEXT: s_setpc_b64 s[30:31]
154 %vector = load <4 x i8>, ptr addrspace(1) %ptr
155 %element = extractelement <4 x i8> %vector, i32 %idx
159 define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
160 ; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
162 ; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0
163 ; GFX9-NEXT: v_and_b32_e32 v0, 3, v0
164 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
165 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
166 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s0
167 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
168 ; GFX9-NEXT: ; return to shader part epilog
170 ; GFX8-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
172 ; GFX8-NEXT: s_load_dword s0, s[2:3], 0x0
173 ; GFX8-NEXT: v_and_b32_e32 v0, 3, v0
174 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0
175 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
176 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s0
177 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
178 ; GFX8-NEXT: ; return to shader part epilog
180 ; GFX7-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
182 ; GFX7-NEXT: s_load_dword s0, s[2:3], 0x0
183 ; GFX7-NEXT: v_and_b32_e32 v0, 3, v0
184 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0
185 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
186 ; GFX7-NEXT: v_lshr_b32_e32 v0, s0, v0
187 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
188 ; GFX7-NEXT: ; return to shader part epilog
190 ; GFX10-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
192 ; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0
193 ; GFX10-NEXT: v_and_b32_e32 v0, 3, v0
194 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
195 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
196 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s0
197 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
198 ; GFX10-NEXT: ; return to shader part epilog
200 ; GFX11-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
202 ; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0
203 ; GFX11-NEXT: v_and_b32_e32 v0, 3, v0
204 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
205 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
206 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
207 ; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s0
208 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
209 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
210 ; GFX11-NEXT: ; return to shader part epilog
211 %vector = load <4 x i8>, ptr addrspace(4) %ptr
212 %element = extractelement <4 x i8> %vector, i32 %idx
216 define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(ptr addrspace(4) inreg %ptr) {
217 ; GCN-LABEL: extractelement_sgpr_v4i8_idx0:
219 ; GCN-NEXT: s_load_dword s0, s[2:3], 0x0
220 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
221 ; GCN-NEXT: ; return to shader part epilog
223 ; GFX10-LABEL: extractelement_sgpr_v4i8_idx0:
225 ; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0
226 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
227 ; GFX10-NEXT: ; return to shader part epilog
229 ; GFX11-LABEL: extractelement_sgpr_v4i8_idx0:
231 ; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0
232 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
233 ; GFX11-NEXT: ; return to shader part epilog
234 %vector = load <4 x i8>, ptr addrspace(4) %ptr
235 %element = extractelement <4 x i8> %vector, i32 0
239 define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(ptr addrspace(4) inreg %ptr) {
240 ; GCN-LABEL: extractelement_sgpr_v4i8_idx1:
242 ; GCN-NEXT: s_load_dword s0, s[2:3], 0x0
243 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
244 ; GCN-NEXT: s_lshr_b32 s0, s0, 8
245 ; GCN-NEXT: ; return to shader part epilog
247 ; GFX10-LABEL: extractelement_sgpr_v4i8_idx1:
249 ; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0
250 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
251 ; GFX10-NEXT: s_lshr_b32 s0, s0, 8
252 ; GFX10-NEXT: ; return to shader part epilog
254 ; GFX11-LABEL: extractelement_sgpr_v4i8_idx1:
256 ; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0
257 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
258 ; GFX11-NEXT: s_lshr_b32 s0, s0, 8
259 ; GFX11-NEXT: ; return to shader part epilog
260 %vector = load <4 x i8>, ptr addrspace(4) %ptr
261 %element = extractelement <4 x i8> %vector, i32 1
265 define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(ptr addrspace(4) inreg %ptr) {
266 ; GCN-LABEL: extractelement_sgpr_v4i8_idx2:
268 ; GCN-NEXT: s_load_dword s0, s[2:3], 0x0
269 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
270 ; GCN-NEXT: s_lshr_b32 s0, s0, 16
271 ; GCN-NEXT: ; return to shader part epilog
273 ; GFX10-LABEL: extractelement_sgpr_v4i8_idx2:
275 ; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0
276 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
277 ; GFX10-NEXT: s_lshr_b32 s0, s0, 16
278 ; GFX10-NEXT: ; return to shader part epilog
280 ; GFX11-LABEL: extractelement_sgpr_v4i8_idx2:
282 ; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0
283 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
284 ; GFX11-NEXT: s_lshr_b32 s0, s0, 16
285 ; GFX11-NEXT: ; return to shader part epilog
286 %vector = load <4 x i8>, ptr addrspace(4) %ptr
287 %element = extractelement <4 x i8> %vector, i32 2
291 define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(ptr addrspace(4) inreg %ptr) {
292 ; GCN-LABEL: extractelement_sgpr_v4i8_idx3:
294 ; GCN-NEXT: s_load_dword s0, s[2:3], 0x0
295 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
296 ; GCN-NEXT: s_lshr_b32 s0, s0, 24
297 ; GCN-NEXT: ; return to shader part epilog
299 ; GFX10-LABEL: extractelement_sgpr_v4i8_idx3:
301 ; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0
302 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
303 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24
304 ; GFX10-NEXT: ; return to shader part epilog
306 ; GFX11-LABEL: extractelement_sgpr_v4i8_idx3:
308 ; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0
309 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
310 ; GFX11-NEXT: s_lshr_b32 s0, s0, 24
311 ; GFX11-NEXT: ; return to shader part epilog
312 %vector = load <4 x i8>, ptr addrspace(4) %ptr
313 %element = extractelement <4 x i8> %vector, i32 3
317 define i8 @extractelement_vgpr_v4i8_idx0(ptr addrspace(1) %ptr) {
318 ; GFX9-LABEL: extractelement_vgpr_v4i8_idx0:
320 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
321 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
322 ; GFX9-NEXT: s_waitcnt vmcnt(0)
323 ; GFX9-NEXT: s_setpc_b64 s[30:31]
325 ; GFX8-LABEL: extractelement_vgpr_v4i8_idx0:
327 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
329 ; GFX8-NEXT: s_waitcnt vmcnt(0)
330 ; GFX8-NEXT: s_setpc_b64 s[30:31]
332 ; GFX7-LABEL: extractelement_vgpr_v4i8_idx0:
334 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; GFX7-NEXT: s_mov_b32 s6, 0
336 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
337 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
338 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
339 ; GFX7-NEXT: s_waitcnt vmcnt(0)
340 ; GFX7-NEXT: s_setpc_b64 s[30:31]
342 ; GFX10-LABEL: extractelement_vgpr_v4i8_idx0:
344 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
346 ; GFX10-NEXT: s_waitcnt vmcnt(0)
347 ; GFX10-NEXT: s_setpc_b64 s[30:31]
349 ; GFX11-LABEL: extractelement_vgpr_v4i8_idx0:
351 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
352 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
353 ; GFX11-NEXT: s_waitcnt vmcnt(0)
354 ; GFX11-NEXT: s_setpc_b64 s[30:31]
355 %vector = load <4 x i8>, ptr addrspace(1) %ptr
356 %element = extractelement <4 x i8> %vector, i32 0
360 define i8 @extractelement_vgpr_v4i8_idx1(ptr addrspace(1) %ptr) {
361 ; GFX9-LABEL: extractelement_vgpr_v4i8_idx1:
363 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
365 ; GFX9-NEXT: s_waitcnt vmcnt(0)
366 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0
367 ; GFX9-NEXT: s_setpc_b64 s[30:31]
369 ; GFX8-LABEL: extractelement_vgpr_v4i8_idx1:
371 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
373 ; GFX8-NEXT: s_waitcnt vmcnt(0)
374 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0
375 ; GFX8-NEXT: s_setpc_b64 s[30:31]
377 ; GFX7-LABEL: extractelement_vgpr_v4i8_idx1:
379 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
380 ; GFX7-NEXT: s_mov_b32 s6, 0
381 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
382 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
383 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
384 ; GFX7-NEXT: s_waitcnt vmcnt(0)
385 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0
386 ; GFX7-NEXT: s_setpc_b64 s[30:31]
388 ; GFX10-LABEL: extractelement_vgpr_v4i8_idx1:
390 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
392 ; GFX10-NEXT: s_waitcnt vmcnt(0)
393 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0
394 ; GFX10-NEXT: s_setpc_b64 s[30:31]
396 ; GFX11-LABEL: extractelement_vgpr_v4i8_idx1:
398 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
399 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
400 ; GFX11-NEXT: s_waitcnt vmcnt(0)
401 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0
402 ; GFX11-NEXT: s_setpc_b64 s[30:31]
403 %vector = load <4 x i8>, ptr addrspace(1) %ptr
404 %element = extractelement <4 x i8> %vector, i32 1
408 define i8 @extractelement_vgpr_v4i8_idx2(ptr addrspace(1) %ptr) {
409 ; GFX9-LABEL: extractelement_vgpr_v4i8_idx2:
411 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
413 ; GFX9-NEXT: s_waitcnt vmcnt(0)
414 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
415 ; GFX9-NEXT: s_setpc_b64 s[30:31]
417 ; GFX8-LABEL: extractelement_vgpr_v4i8_idx2:
419 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
420 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
421 ; GFX8-NEXT: s_waitcnt vmcnt(0)
422 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
423 ; GFX8-NEXT: s_setpc_b64 s[30:31]
425 ; GFX7-LABEL: extractelement_vgpr_v4i8_idx2:
427 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428 ; GFX7-NEXT: s_mov_b32 s6, 0
429 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
430 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
431 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
432 ; GFX7-NEXT: s_waitcnt vmcnt(0)
433 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
434 ; GFX7-NEXT: s_setpc_b64 s[30:31]
436 ; GFX10-LABEL: extractelement_vgpr_v4i8_idx2:
438 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
440 ; GFX10-NEXT: s_waitcnt vmcnt(0)
441 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
442 ; GFX10-NEXT: s_setpc_b64 s[30:31]
444 ; GFX11-LABEL: extractelement_vgpr_v4i8_idx2:
446 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
447 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
448 ; GFX11-NEXT: s_waitcnt vmcnt(0)
449 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
450 ; GFX11-NEXT: s_setpc_b64 s[30:31]
451 %vector = load <4 x i8>, ptr addrspace(1) %ptr
452 %element = extractelement <4 x i8> %vector, i32 2
456 define i8 @extractelement_vgpr_v4i8_idx3(ptr addrspace(1) %ptr) {
457 ; GFX9-LABEL: extractelement_vgpr_v4i8_idx3:
459 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
460 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
461 ; GFX9-NEXT: s_waitcnt vmcnt(0)
462 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0
463 ; GFX9-NEXT: s_setpc_b64 s[30:31]
465 ; GFX8-LABEL: extractelement_vgpr_v4i8_idx3:
467 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
469 ; GFX8-NEXT: s_waitcnt vmcnt(0)
470 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0
471 ; GFX8-NEXT: s_setpc_b64 s[30:31]
473 ; GFX7-LABEL: extractelement_vgpr_v4i8_idx3:
475 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476 ; GFX7-NEXT: s_mov_b32 s6, 0
477 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
478 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
479 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
480 ; GFX7-NEXT: s_waitcnt vmcnt(0)
481 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0
482 ; GFX7-NEXT: s_setpc_b64 s[30:31]
484 ; GFX10-LABEL: extractelement_vgpr_v4i8_idx3:
486 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
487 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
488 ; GFX10-NEXT: s_waitcnt vmcnt(0)
489 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0
490 ; GFX10-NEXT: s_setpc_b64 s[30:31]
492 ; GFX11-LABEL: extractelement_vgpr_v4i8_idx3:
494 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
495 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
496 ; GFX11-NEXT: s_waitcnt vmcnt(0)
497 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0
498 ; GFX11-NEXT: s_setpc_b64 s[30:31]
499 %vector = load <4 x i8>, ptr addrspace(1) %ptr
500 %element = extractelement <4 x i8> %vector, i32 3
504 define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
505 ; GCN-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
507 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
508 ; GCN-NEXT: s_lshr_b32 s2, s4, 2
509 ; GCN-NEXT: s_cmp_eq_u32 s2, 1
510 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
511 ; GCN-NEXT: s_cselect_b32 s0, s1, s0
512 ; GCN-NEXT: s_and_b32 s1, s4, 3
513 ; GCN-NEXT: s_lshl_b32 s1, s1, 3
514 ; GCN-NEXT: s_lshr_b32 s0, s0, s1
515 ; GCN-NEXT: ; return to shader part epilog
517 ; GFX10-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
519 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
520 ; GFX10-NEXT: s_lshr_b32 s2, s4, 2
521 ; GFX10-NEXT: s_cmp_eq_u32 s2, 1
522 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
523 ; GFX10-NEXT: s_cselect_b32 s0, s1, s0
524 ; GFX10-NEXT: s_and_b32 s1, s4, 3
525 ; GFX10-NEXT: s_lshl_b32 s1, s1, 3
526 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1
527 ; GFX10-NEXT: ; return to shader part epilog
529 ; GFX11-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
531 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
532 ; GFX11-NEXT: s_lshr_b32 s2, s4, 2
533 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
534 ; GFX11-NEXT: s_cmp_eq_u32 s2, 1
535 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
536 ; GFX11-NEXT: s_cselect_b32 s0, s1, s0
537 ; GFX11-NEXT: s_and_b32 s1, s4, 3
538 ; GFX11-NEXT: s_lshl_b32 s1, s1, 3
539 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
540 ; GFX11-NEXT: s_lshr_b32 s0, s0, s1
541 ; GFX11-NEXT: ; return to shader part epilog
542 %vector = load <8 x i8>, ptr addrspace(4) %ptr
543 %element = extractelement <8 x i8> %vector, i32 %idx
547 define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
548 ; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
550 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
551 ; GFX9-NEXT: s_lshr_b32 s0, s2, 2
552 ; GFX9-NEXT: s_and_b32 s1, s2, 3
553 ; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1
554 ; GFX9-NEXT: s_lshl_b32 s0, s1, 3
555 ; GFX9-NEXT: s_waitcnt vmcnt(0)
556 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
557 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0
558 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
559 ; GFX9-NEXT: ; return to shader part epilog
561 ; GFX8-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
563 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
564 ; GFX8-NEXT: s_lshr_b32 s0, s2, 2
565 ; GFX8-NEXT: s_and_b32 s1, s2, 3
566 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1
567 ; GFX8-NEXT: s_lshl_b32 s0, s1, 3
568 ; GFX8-NEXT: s_waitcnt vmcnt(0)
569 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
570 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0
571 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
572 ; GFX8-NEXT: ; return to shader part epilog
574 ; GFX7-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
576 ; GFX7-NEXT: s_mov_b32 s6, 0
577 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
578 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
579 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
580 ; GFX7-NEXT: s_lshr_b32 s0, s2, 2
581 ; GFX7-NEXT: s_and_b32 s1, s2, 3
582 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1
583 ; GFX7-NEXT: s_lshl_b32 s0, s1, 3
584 ; GFX7-NEXT: s_waitcnt vmcnt(0)
585 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
586 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0
587 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
588 ; GFX7-NEXT: ; return to shader part epilog
590 ; GFX10-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
592 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
593 ; GFX10-NEXT: s_lshr_b32 s0, s2, 2
594 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1
595 ; GFX10-NEXT: s_and_b32 s0, s2, 3
596 ; GFX10-NEXT: s_lshl_b32 s0, s0, 3
597 ; GFX10-NEXT: s_waitcnt vmcnt(0)
598 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
599 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, s0, v0
600 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
601 ; GFX10-NEXT: ; return to shader part epilog
603 ; GFX11-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
605 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
606 ; GFX11-NEXT: s_lshr_b32 s0, s2, 2
607 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
608 ; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1
609 ; GFX11-NEXT: s_and_b32 s0, s2, 3
610 ; GFX11-NEXT: s_lshl_b32 s0, s0, 3
611 ; GFX11-NEXT: s_waitcnt vmcnt(0)
612 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
613 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
614 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, s0, v0
615 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
616 ; GFX11-NEXT: ; return to shader part epilog
617 %vector = load <8 x i8>, ptr addrspace(1) %ptr
618 %element = extractelement <8 x i8> %vector, i32 %idx
622 define i8 @extractelement_vgpr_v8i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
623 ; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
625 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
626 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
627 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 2, v2
628 ; GFX9-NEXT: v_and_b32_e32 v2, 3, v2
629 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
630 ; GFX9-NEXT: s_waitcnt vmcnt(0)
631 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
632 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2
633 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0
634 ; GFX9-NEXT: s_setpc_b64 s[30:31]
636 ; GFX8-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
638 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
639 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
640 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 2, v2
641 ; GFX8-NEXT: v_and_b32_e32 v2, 3, v2
642 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
643 ; GFX8-NEXT: s_waitcnt vmcnt(0)
644 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
645 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2
646 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0
647 ; GFX8-NEXT: s_setpc_b64 s[30:31]
649 ; GFX7-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
651 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652 ; GFX7-NEXT: s_mov_b32 s6, 0
653 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
654 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
655 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
656 ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 2, v2
657 ; GFX7-NEXT: v_and_b32_e32 v2, 3, v2
658 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
659 ; GFX7-NEXT: s_waitcnt vmcnt(0)
660 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
661 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v2
662 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0
663 ; GFX7-NEXT: s_setpc_b64 s[30:31]
665 ; GFX10-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
667 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
668 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
669 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 2, v2
670 ; GFX10-NEXT: v_and_b32_e32 v2, 3, v2
671 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
672 ; GFX10-NEXT: s_waitcnt vmcnt(0)
673 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
674 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v2
675 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0
676 ; GFX10-NEXT: s_setpc_b64 s[30:31]
678 ; GFX11-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
680 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
681 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
682 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 2, v2
683 ; GFX11-NEXT: v_and_b32_e32 v2, 3, v2
684 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
685 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
686 ; GFX11-NEXT: s_waitcnt vmcnt(0)
687 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_lshlrev_b32 v1, 3, v2
688 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
689 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v1, v0
690 ; GFX11-NEXT: s_setpc_b64 s[30:31]
691 %vector = load <8 x i8>, ptr addrspace(1) %ptr
692 %element = extractelement <8 x i8> %vector, i32 %idx
696 define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
697 ; GCN-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
699 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
700 ; GCN-NEXT: v_lshrrev_b32_e32 v1, 2, v0
701 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
702 ; GCN-NEXT: v_and_b32_e32 v0, 3, v0
703 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0
704 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
705 ; GCN-NEXT: v_mov_b32_e32 v2, s0
706 ; GCN-NEXT: v_mov_b32_e32 v3, s1
707 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
708 ; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1
709 ; GCN-NEXT: v_readfirstlane_b32 s0, v0
710 ; GCN-NEXT: ; return to shader part epilog
712 ; GFX10-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
714 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
715 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 2, v0
716 ; GFX10-NEXT: v_and_b32_e32 v0, 3, v0
717 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
718 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
719 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
720 ; GFX10-NEXT: v_mov_b32_e32 v2, s1
721 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v2, vcc_lo
722 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v0, v1
723 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
724 ; GFX10-NEXT: ; return to shader part epilog
726 ; GFX11-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
728 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
729 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 2, v0
730 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
731 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
732 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
733 ; GFX11-NEXT: v_mov_b32_e32 v2, s1
734 ; GFX11-NEXT: v_dual_cndmask_b32 v1, s0, v2 :: v_dual_and_b32 v0, 3, v0
735 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
736 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
737 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v0, v1
738 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
739 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
740 ; GFX11-NEXT: ; return to shader part epilog
741 %vector = load <8 x i8>, ptr addrspace(4) %ptr
742 %element = extractelement <8 x i8> %vector, i32 %idx
746 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(ptr addrspace(4) inreg %ptr) {
747 ; GCN-LABEL: extractelement_sgpr_v8i8_idx0:
749 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
750 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
751 ; GCN-NEXT: ; return to shader part epilog
753 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx0:
755 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
756 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
757 ; GFX10-NEXT: ; return to shader part epilog
759 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx0:
761 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
762 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
763 ; GFX11-NEXT: ; return to shader part epilog
764 %vector = load <8 x i8>, ptr addrspace(4) %ptr
765 %element = extractelement <8 x i8> %vector, i32 0
769 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(ptr addrspace(4) inreg %ptr) {
770 ; GCN-LABEL: extractelement_sgpr_v8i8_idx1:
772 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
773 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
774 ; GCN-NEXT: s_lshr_b32 s0, s0, 8
775 ; GCN-NEXT: ; return to shader part epilog
777 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx1:
779 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
780 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
781 ; GFX10-NEXT: s_lshr_b32 s0, s0, 8
782 ; GFX10-NEXT: ; return to shader part epilog
784 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx1:
786 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
787 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
788 ; GFX11-NEXT: s_lshr_b32 s0, s0, 8
789 ; GFX11-NEXT: ; return to shader part epilog
790 %vector = load <8 x i8>, ptr addrspace(4) %ptr
791 %element = extractelement <8 x i8> %vector, i32 1
795 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(ptr addrspace(4) inreg %ptr) {
796 ; GCN-LABEL: extractelement_sgpr_v8i8_idx2:
798 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
799 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
800 ; GCN-NEXT: s_lshr_b32 s0, s0, 16
801 ; GCN-NEXT: ; return to shader part epilog
803 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx2:
805 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
806 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
807 ; GFX10-NEXT: s_lshr_b32 s0, s0, 16
808 ; GFX10-NEXT: ; return to shader part epilog
810 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx2:
812 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
813 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
814 ; GFX11-NEXT: s_lshr_b32 s0, s0, 16
815 ; GFX11-NEXT: ; return to shader part epilog
816 %vector = load <8 x i8>, ptr addrspace(4) %ptr
817 %element = extractelement <8 x i8> %vector, i32 2
821 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(ptr addrspace(4) inreg %ptr) {
822 ; GCN-LABEL: extractelement_sgpr_v8i8_idx3:
824 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
825 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
826 ; GCN-NEXT: s_lshr_b32 s0, s0, 24
827 ; GCN-NEXT: ; return to shader part epilog
829 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx3:
831 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
832 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
833 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24
834 ; GFX10-NEXT: ; return to shader part epilog
836 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx3:
838 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
839 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
840 ; GFX11-NEXT: s_lshr_b32 s0, s0, 24
841 ; GFX11-NEXT: ; return to shader part epilog
842 %vector = load <8 x i8>, ptr addrspace(4) %ptr
843 %element = extractelement <8 x i8> %vector, i32 3
847 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(ptr addrspace(4) inreg %ptr) {
848 ; GCN-LABEL: extractelement_sgpr_v8i8_idx4:
850 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
851 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
852 ; GCN-NEXT: s_mov_b32 s0, s1
853 ; GCN-NEXT: ; return to shader part epilog
855 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx4:
857 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
858 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
859 ; GFX10-NEXT: s_mov_b32 s0, s1
860 ; GFX10-NEXT: ; return to shader part epilog
862 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx4:
864 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
865 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
866 ; GFX11-NEXT: s_mov_b32 s0, s1
867 ; GFX11-NEXT: ; return to shader part epilog
868 %vector = load <8 x i8>, ptr addrspace(4) %ptr
869 %element = extractelement <8 x i8> %vector, i32 4
873 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(ptr addrspace(4) inreg %ptr) {
874 ; GCN-LABEL: extractelement_sgpr_v8i8_idx5:
876 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
877 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
878 ; GCN-NEXT: s_lshr_b32 s0, s1, 8
879 ; GCN-NEXT: ; return to shader part epilog
881 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx5:
883 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
884 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
885 ; GFX10-NEXT: s_lshr_b32 s0, s1, 8
886 ; GFX10-NEXT: ; return to shader part epilog
888 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx5:
890 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
891 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
892 ; GFX11-NEXT: s_lshr_b32 s0, s1, 8
893 ; GFX11-NEXT: ; return to shader part epilog
894 %vector = load <8 x i8>, ptr addrspace(4) %ptr
895 %element = extractelement <8 x i8> %vector, i32 5
899 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(ptr addrspace(4) inreg %ptr) {
900 ; GCN-LABEL: extractelement_sgpr_v8i8_idx6:
902 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
903 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
904 ; GCN-NEXT: s_lshr_b32 s0, s1, 16
905 ; GCN-NEXT: ; return to shader part epilog
907 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx6:
909 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
910 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
911 ; GFX10-NEXT: s_lshr_b32 s0, s1, 16
912 ; GFX10-NEXT: ; return to shader part epilog
914 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx6:
916 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
917 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
918 ; GFX11-NEXT: s_lshr_b32 s0, s1, 16
919 ; GFX11-NEXT: ; return to shader part epilog
920 %vector = load <8 x i8>, ptr addrspace(4) %ptr
921 %element = extractelement <8 x i8> %vector, i32 6
925 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(ptr addrspace(4) inreg %ptr) {
926 ; GCN-LABEL: extractelement_sgpr_v8i8_idx7:
928 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
929 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
930 ; GCN-NEXT: s_lshr_b32 s0, s1, 24
931 ; GCN-NEXT: ; return to shader part epilog
933 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx7:
935 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
936 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
937 ; GFX10-NEXT: s_lshr_b32 s0, s1, 24
938 ; GFX10-NEXT: ; return to shader part epilog
940 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx7:
942 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
943 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
944 ; GFX11-NEXT: s_lshr_b32 s0, s1, 24
945 ; GFX11-NEXT: ; return to shader part epilog
946 %vector = load <8 x i8>, ptr addrspace(4) %ptr
947 %element = extractelement <8 x i8> %vector, i32 7
951 define i8 @extractelement_vgpr_v8i8_idx0(ptr addrspace(1) %ptr) {
952 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx0:
954 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
955 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
956 ; GFX9-NEXT: s_waitcnt vmcnt(0)
957 ; GFX9-NEXT: s_setpc_b64 s[30:31]
959 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx0:
961 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
962 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
963 ; GFX8-NEXT: s_waitcnt vmcnt(0)
964 ; GFX8-NEXT: s_setpc_b64 s[30:31]
966 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx0:
968 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
969 ; GFX7-NEXT: s_mov_b32 s6, 0
970 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
971 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
972 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
973 ; GFX7-NEXT: s_waitcnt vmcnt(0)
974 ; GFX7-NEXT: s_setpc_b64 s[30:31]
976 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx0:
978 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
979 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
980 ; GFX10-NEXT: s_waitcnt vmcnt(0)
981 ; GFX10-NEXT: s_setpc_b64 s[30:31]
983 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx0:
985 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
986 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
987 ; GFX11-NEXT: s_waitcnt vmcnt(0)
988 ; GFX11-NEXT: s_setpc_b64 s[30:31]
989 %vector = load <8 x i8>, ptr addrspace(1) %ptr
990 %element = extractelement <8 x i8> %vector, i32 0
994 define i8 @extractelement_vgpr_v8i8_idx1(ptr addrspace(1) %ptr) {
995 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx1:
997 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
998 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
999 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1000 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0
1001 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1003 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx1:
1005 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1006 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
1007 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1008 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0
1009 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1011 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx1:
1013 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1014 ; GFX7-NEXT: s_mov_b32 s6, 0
1015 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1016 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1017 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1018 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1019 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0
1020 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1022 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx1:
1024 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1025 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
1026 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1027 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0
1028 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1030 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx1:
1032 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1033 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
1034 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1035 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0
1036 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1037 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1038 %element = extractelement <8 x i8> %vector, i32 1
1042 define i8 @extractelement_vgpr_v8i8_idx2(ptr addrspace(1) %ptr) {
1043 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx2:
1045 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1046 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
1047 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1048 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1049 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1051 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx2:
1053 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1054 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
1055 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1056 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1057 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1059 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx2:
1061 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1062 ; GFX7-NEXT: s_mov_b32 s6, 0
1063 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1064 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1065 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1066 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1067 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1068 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1070 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx2:
1072 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1073 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
1074 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1075 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1076 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1078 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx2:
1080 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1081 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
1082 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1083 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1084 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1085 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1086 %element = extractelement <8 x i8> %vector, i32 2
1090 define i8 @extractelement_vgpr_v8i8_idx3(ptr addrspace(1) %ptr) {
1091 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx3:
1093 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1094 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
1095 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1096 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0
1097 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1099 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx3:
1101 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1102 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
1103 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1104 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0
1105 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1107 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx3:
1109 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1110 ; GFX7-NEXT: s_mov_b32 s6, 0
1111 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1112 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1113 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1114 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1115 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0
1116 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1118 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx3:
1120 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1121 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
1122 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1123 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0
1124 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1126 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx3:
1128 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1129 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
1130 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1131 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0
1132 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1133 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1134 %element = extractelement <8 x i8> %vector, i32 3
1138 define i8 @extractelement_vgpr_v8i8_idx4(ptr addrspace(1) %ptr) {
1139 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx4:
1141 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1142 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
1143 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1144 ; GFX9-NEXT: v_mov_b32_e32 v0, v1
1145 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1147 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx4:
1149 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1150 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
1151 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1152 ; GFX8-NEXT: v_mov_b32_e32 v0, v1
1153 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1155 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx4:
1157 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1158 ; GFX7-NEXT: s_mov_b32 s6, 0
1159 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1160 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1161 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1162 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1163 ; GFX7-NEXT: v_mov_b32_e32 v0, v1
1164 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1166 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx4:
1168 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1169 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
1170 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1171 ; GFX10-NEXT: v_mov_b32_e32 v0, v1
1172 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1174 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx4:
1176 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1177 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
1178 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1179 ; GFX11-NEXT: v_mov_b32_e32 v0, v1
1180 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1181 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1182 %element = extractelement <8 x i8> %vector, i32 4
1186 define i8 @extractelement_vgpr_v8i8_idx5(ptr addrspace(1) %ptr) {
1187 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx5:
1189 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1190 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
1191 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1192 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1
1193 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1195 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx5:
1197 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1198 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
1199 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1200 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v1
1201 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1203 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx5:
1205 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1206 ; GFX7-NEXT: s_mov_b32 s6, 0
1207 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1208 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1209 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1210 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1211 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1
1212 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1214 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx5:
1216 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1217 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
1218 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1219 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1
1220 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1222 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx5:
1224 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1225 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
1226 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1227 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v1
1228 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1229 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1230 %element = extractelement <8 x i8> %vector, i32 5
1234 define i8 @extractelement_vgpr_v8i8_idx6(ptr addrspace(1) %ptr) {
1235 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx6:
1237 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1238 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
1239 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1240 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1241 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1243 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx6:
1245 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1246 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
1247 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1248 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1249 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1251 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx6:
1253 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1254 ; GFX7-NEXT: s_mov_b32 s6, 0
1255 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1256 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1257 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1258 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1259 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1260 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1262 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx6:
1264 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1265 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
1266 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1267 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1268 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1270 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx6:
1272 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
1274 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1275 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1276 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1277 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1278 %element = extractelement <8 x i8> %vector, i32 6
1282 define i8 @extractelement_vgpr_v8i8_idx7(ptr addrspace(1) %ptr) {
1283 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx7:
1285 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1286 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
1287 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1288 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v1
1289 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1291 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx7:
1293 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1294 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
1295 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1296 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v1
1297 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1299 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx7:
1301 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1302 ; GFX7-NEXT: s_mov_b32 s6, 0
1303 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1304 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1305 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1306 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1307 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1
1308 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1310 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx7:
1312 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1313 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
1314 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1315 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v1
1316 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1318 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx7:
1320 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1321 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
1322 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1323 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1
1324 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1325 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1326 %element = extractelement <8 x i8> %vector, i32 7
1330 define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
1331 ; GCN-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1333 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
1334 ; GCN-NEXT: s_lshr_b32 s5, s4, 2
1335 ; GCN-NEXT: s_cmp_eq_u32 s5, 1
1336 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1337 ; GCN-NEXT: s_cselect_b32 s0, s1, s0
1338 ; GCN-NEXT: s_cmp_eq_u32 s5, 2
1339 ; GCN-NEXT: s_cselect_b32 s0, s2, s0
1340 ; GCN-NEXT: s_cmp_eq_u32 s5, 3
1341 ; GCN-NEXT: s_cselect_b32 s0, s3, s0
1342 ; GCN-NEXT: s_and_b32 s1, s4, 3
1343 ; GCN-NEXT: s_lshl_b32 s1, s1, 3
1344 ; GCN-NEXT: s_lshr_b32 s0, s0, s1
1345 ; GCN-NEXT: ; return to shader part epilog
1347 ; GFX10-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1349 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
1350 ; GFX10-NEXT: s_lshr_b32 s5, s4, 2
1351 ; GFX10-NEXT: s_cmp_eq_u32 s5, 1
1352 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1353 ; GFX10-NEXT: s_cselect_b32 s0, s1, s0
1354 ; GFX10-NEXT: s_cmp_eq_u32 s5, 2
1355 ; GFX10-NEXT: s_cselect_b32 s0, s2, s0
1356 ; GFX10-NEXT: s_cmp_eq_u32 s5, 3
1357 ; GFX10-NEXT: s_cselect_b32 s0, s3, s0
1358 ; GFX10-NEXT: s_and_b32 s1, s4, 3
1359 ; GFX10-NEXT: s_lshl_b32 s1, s1, 3
1360 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1
1361 ; GFX10-NEXT: ; return to shader part epilog
1363 ; GFX11-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1365 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
1366 ; GFX11-NEXT: s_lshr_b32 s5, s4, 2
1367 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1368 ; GFX11-NEXT: s_cmp_eq_u32 s5, 1
1369 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1370 ; GFX11-NEXT: s_cselect_b32 s0, s1, s0
1371 ; GFX11-NEXT: s_cmp_eq_u32 s5, 2
1372 ; GFX11-NEXT: s_cselect_b32 s0, s2, s0
1373 ; GFX11-NEXT: s_cmp_eq_u32 s5, 3
1374 ; GFX11-NEXT: s_cselect_b32 s0, s3, s0
1375 ; GFX11-NEXT: s_and_b32 s1, s4, 3
1376 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1377 ; GFX11-NEXT: s_lshl_b32 s1, s1, 3
1378 ; GFX11-NEXT: s_lshr_b32 s0, s0, s1
1379 ; GFX11-NEXT: ; return to shader part epilog
1380 %vector = load <16 x i8>, ptr addrspace(4) %ptr
1381 %element = extractelement <16 x i8> %vector, i32 %idx
1385 define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
1386 ; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1388 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1389 ; GFX9-NEXT: s_lshr_b32 s0, s2, 2
1390 ; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1
1391 ; GFX9-NEXT: s_and_b32 s1, s2, 3
1392 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1393 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1394 ; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2
1395 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1396 ; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3
1397 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1398 ; GFX9-NEXT: s_lshl_b32 s0, s1, 3
1399 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0
1400 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1401 ; GFX9-NEXT: ; return to shader part epilog
1403 ; GFX8-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1405 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1406 ; GFX8-NEXT: s_lshr_b32 s0, s2, 2
1407 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1
1408 ; GFX8-NEXT: s_and_b32 s1, s2, 3
1409 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1410 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1411 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2
1412 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1413 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3
1414 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1415 ; GFX8-NEXT: s_lshl_b32 s0, s1, 3
1416 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0
1417 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1418 ; GFX8-NEXT: ; return to shader part epilog
1420 ; GFX7-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1422 ; GFX7-NEXT: s_mov_b32 s6, 0
1423 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1424 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1425 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1426 ; GFX7-NEXT: s_lshr_b32 s0, s2, 2
1427 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1
1428 ; GFX7-NEXT: s_and_b32 s1, s2, 3
1429 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1430 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1431 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2
1432 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1433 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3
1434 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1435 ; GFX7-NEXT: s_lshl_b32 s0, s1, 3
1436 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0
1437 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
1438 ; GFX7-NEXT: ; return to shader part epilog
1440 ; GFX10-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1442 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1443 ; GFX10-NEXT: s_lshr_b32 s0, s2, 2
1444 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1
1445 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1446 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1447 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 2
1448 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1449 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 3
1450 ; GFX10-NEXT: s_and_b32 s0, s2, 3
1451 ; GFX10-NEXT: s_lshl_b32 s0, s0, 3
1452 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1453 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, s0, v0
1454 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1455 ; GFX10-NEXT: ; return to shader part epilog
1457 ; GFX11-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1459 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1460 ; GFX11-NEXT: s_lshr_b32 s0, s2, 2
1461 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
1462 ; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1
1463 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1464 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1465 ; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 2
1466 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1467 ; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 3
1468 ; GFX11-NEXT: s_and_b32 s0, s2, 3
1469 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1470 ; GFX11-NEXT: s_lshl_b32 s0, s0, 3
1471 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1472 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1473 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, s0, v0
1474 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1475 ; GFX11-NEXT: ; return to shader part epilog
1476 %vector = load <16 x i8>, ptr addrspace(1) %ptr
1477 %element = extractelement <16 x i8> %vector, i32 %idx
1481 define i8 @extractelement_vgpr_v16i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
1482 ; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1484 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1485 ; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off
1486 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 2, v2
1487 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
1488 ; GFX9-NEXT: v_and_b32_e32 v1, 3, v2
1489 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v1
1490 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1491 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc
1492 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
1493 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
1494 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
1495 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc
1496 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0
1497 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1499 ; GFX8-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1501 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1502 ; GFX8-NEXT: flat_load_dwordx4 v[3:6], v[0:1]
1503 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 2, v2
1504 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
1505 ; GFX8-NEXT: v_and_b32_e32 v1, 3, v2
1506 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v1
1507 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1508 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc
1509 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
1510 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
1511 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
1512 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc
1513 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0
1514 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1516 ; GFX7-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1518 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1519 ; GFX7-NEXT: s_mov_b32 s6, 0
1520 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1521 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1522 ; GFX7-NEXT: buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64
1523 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 2, v2
1524 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
1525 ; GFX7-NEXT: v_and_b32_e32 v1, 3, v2
1526 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v1
1527 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1528 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc
1529 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
1530 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
1531 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
1532 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc
1533 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0
1534 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1536 ; GFX10-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1538 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1539 ; GFX10-NEXT: global_load_dwordx4 v[3:6], v[0:1], off
1540 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 2, v2
1541 ; GFX10-NEXT: v_and_b32_e32 v2, 3, v2
1542 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
1543 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1544 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc_lo
1545 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
1546 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1547 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
1548 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v6, vcc_lo
1549 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v2
1550 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0
1551 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1553 ; GFX11-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1555 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1556 ; GFX11-NEXT: global_load_b128 v[3:6], v[0:1], off
1557 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 2, v2
1558 ; GFX11-NEXT: v_and_b32_e32 v2, 3, v2
1559 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
1560 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
1561 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1562 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc_lo
1563 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
1564 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1565 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
1566 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1567 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v6, vcc_lo
1568 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 3, v2
1569 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v1, v0
1570 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1571 %vector = load <16 x i8>, ptr addrspace(1) %ptr
1572 %element = extractelement <16 x i8> %vector, i32 %idx
1576 define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
1577 ; GCN-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
1579 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
1580 ; GCN-NEXT: v_lshrrev_b32_e32 v1, 2, v0
1581 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
1582 ; GCN-NEXT: v_and_b32_e32 v0, 3, v0
1583 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0
1584 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1585 ; GCN-NEXT: v_mov_b32_e32 v2, s0
1586 ; GCN-NEXT: v_mov_b32_e32 v3, s1
1587 ; GCN-NEXT: v_mov_b32_e32 v4, s2
1588 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1589 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1
1590 ; GCN-NEXT: v_mov_b32_e32 v5, s3
1591 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1592 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1
1593 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc
1594 ; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1
1595 ; GCN-NEXT: v_readfirstlane_b32 s0, v0
1596 ; GCN-NEXT: ; return to shader part epilog
1598 ; GFX10-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
1600 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
1601 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 2, v0
1602 ; GFX10-NEXT: v_and_b32_e32 v0, 3, v0
1603 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
1604 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
1605 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1606 ; GFX10-NEXT: v_mov_b32_e32 v2, s1
1607 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo
1608 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
1609 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s2, vcc_lo
1610 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
1611 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo
1612 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v0, v1
1613 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1614 ; GFX10-NEXT: ; return to shader part epilog
1616 ; GFX11-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
1618 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
1619 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 2, v0
1620 ; GFX11-NEXT: v_and_b32_e32 v0, 3, v0
1621 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1622 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
1623 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
1624 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1625 ; GFX11-NEXT: v_mov_b32_e32 v2, s1
1626 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1627 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo
1628 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
1629 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s2, vcc_lo
1630 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
1631 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1632 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo
1633 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v0, v1
1634 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1635 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1636 ; GFX11-NEXT: ; return to shader part epilog
1637 %vector = load <16 x i8>, ptr addrspace(4) %ptr
1638 %element = extractelement <16 x i8> %vector, i32 %idx
1642 define i8 @extractelement_vgpr_v16i8_idx0(ptr addrspace(1) %ptr) {
1643 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx0:
1645 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1646 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1647 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1648 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1650 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx0:
1652 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1653 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1654 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1655 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1657 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx0:
1659 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1660 ; GFX7-NEXT: s_mov_b32 s6, 0
1661 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1662 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1663 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1664 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1665 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1667 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx0:
1669 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1670 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1671 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1672 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1674 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx0:
1676 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1677 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1678 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1679 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1680 %vector = load <16 x i8>, ptr addrspace(1) %ptr
1681 %element = extractelement <16 x i8> %vector, i32 0
1685 define i8 @extractelement_vgpr_v16i8_idx1(ptr addrspace(1) %ptr) {
1686 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx1:
1688 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1689 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1690 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1691 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0
1692 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1694 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx1:
1696 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1697 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1698 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1699 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0
1700 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1702 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx1:
1704 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1705 ; GFX7-NEXT: s_mov_b32 s6, 0
1706 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1707 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1708 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1709 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1710 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0
1711 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1713 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx1:
1715 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1716 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1717 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1718 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0
1719 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1721 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx1:
1723 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1724 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1725 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1726 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0
1727 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1728 %vector = load <16 x i8>, ptr addrspace(1) %ptr
1729 %element = extractelement <16 x i8> %vector, i32 1
1733 define i8 @extractelement_vgpr_v16i8_idx2(ptr addrspace(1) %ptr) {
1734 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx2:
1736 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1737 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1738 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1739 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1740 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1742 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx2:
1744 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1745 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1746 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1747 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1748 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1750 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx2:
1752 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1753 ; GFX7-NEXT: s_mov_b32 s6, 0
1754 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1755 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1756 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1757 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1758 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1759 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1761 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx2:
1763 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1764 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1765 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1766 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1767 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1769 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx2:
1771 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1772 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1773 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1774 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1775 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1776 %vector = load <16 x i8>, ptr addrspace(1) %ptr
1777 %element = extractelement <16 x i8> %vector, i32 2
1781 define i8 @extractelement_vgpr_v16i8_idx3(ptr addrspace(1) %ptr) {
1782 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx3:
1784 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1785 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1786 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1787 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0
1788 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1790 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx3:
1792 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1793 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1794 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1795 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0
1796 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1798 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx3:
1800 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1801 ; GFX7-NEXT: s_mov_b32 s6, 0
1802 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1803 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1804 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1805 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1806 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0
1807 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1809 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx3:
1811 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1812 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1813 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1814 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0
1815 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1817 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx3:
1819 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1820 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1821 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1822 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0
1823 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1824 %vector = load <16 x i8>, ptr addrspace(1) %ptr
1825 %element = extractelement <16 x i8> %vector, i32 3
1829 define i8 @extractelement_vgpr_v16i8_idx4(ptr addrspace(1) %ptr) {
1830 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx4:
1832 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1833 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1834 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1835 ; GFX9-NEXT: v_mov_b32_e32 v0, v1
1836 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1838 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx4:
1840 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1841 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1842 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1843 ; GFX8-NEXT: v_mov_b32_e32 v0, v1
1844 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1846 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx4:
1848 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1849 ; GFX7-NEXT: s_mov_b32 s6, 0
1850 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1851 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1852 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1853 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1854 ; GFX7-NEXT: v_mov_b32_e32 v0, v1
1855 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1857 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx4:
1859 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1860 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1861 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1862 ; GFX10-NEXT: v_mov_b32_e32 v0, v1
1863 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1865 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx4:
1867 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1868 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1869 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1870 ; GFX11-NEXT: v_mov_b32_e32 v0, v1
1871 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1872 %vector = load <16 x i8>, ptr addrspace(1) %ptr
1873 %element = extractelement <16 x i8> %vector, i32 4
1877 define i8 @extractelement_vgpr_v16i8_idx5(ptr addrspace(1) %ptr) {
1878 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx5:
1880 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1881 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1882 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1883 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1
1884 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1886 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx5:
1888 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1889 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1890 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1891 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v1
1892 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1894 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx5:
1896 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1897 ; GFX7-NEXT: s_mov_b32 s6, 0
1898 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1899 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1900 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1901 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1902 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1
1903 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1905 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx5:
1907 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1908 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1909 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1910 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1
1911 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1913 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx5:
1915 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1916 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1917 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1918 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v1
1919 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1920 %vector = load <16 x i8>, ptr addrspace(1) %ptr
1921 %element = extractelement <16 x i8> %vector, i32 5
1925 define i8 @extractelement_vgpr_v16i8_idx6(ptr addrspace(1) %ptr) {
1926 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx6:
1928 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1929 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1930 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1931 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1932 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1934 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx6:
1936 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1937 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1938 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1939 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1940 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1942 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx6:
1944 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1945 ; GFX7-NEXT: s_mov_b32 s6, 0
1946 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1947 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1948 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1949 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1950 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1951 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1953 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx6:
1955 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1956 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1957 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1958 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1959 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1961 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx6:
1963 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1964 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1965 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1966 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1967 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1968 %vector = load <16 x i8>, ptr addrspace(1) %ptr
1969 %element = extractelement <16 x i8> %vector, i32 6
1973 define i8 @extractelement_vgpr_v16i8_idx7(ptr addrspace(1) %ptr) {
1974 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx7:
1976 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1977 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1978 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1979 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v1
1980 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1982 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx7:
1984 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1985 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1986 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1987 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v1
1988 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1990 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx7:
1992 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1993 ; GFX7-NEXT: s_mov_b32 s6, 0
1994 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1995 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1996 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1997 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1998 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1
1999 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2001 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx7:
2003 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2004 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2005 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2006 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v1
2007 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2009 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx7:
2011 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2012 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
2013 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2014 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1
2015 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2016 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2017 %element = extractelement <16 x i8> %vector, i32 7
2021 define i8 @extractelement_vgpr_v16i8_idx8(ptr addrspace(1) %ptr) {
2022 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx8:
2024 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2025 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2026 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2027 ; GFX9-NEXT: v_mov_b32_e32 v0, v2
2028 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2030 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx8:
2032 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2033 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2034 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2035 ; GFX8-NEXT: v_mov_b32_e32 v0, v2
2036 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2038 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx8:
2040 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2041 ; GFX7-NEXT: s_mov_b32 s6, 0
2042 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2043 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2044 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2045 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2046 ; GFX7-NEXT: v_mov_b32_e32 v0, v2
2047 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2049 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx8:
2051 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2052 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2053 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2054 ; GFX10-NEXT: v_mov_b32_e32 v0, v2
2055 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2057 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx8:
2059 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2060 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
2061 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2062 ; GFX11-NEXT: v_mov_b32_e32 v0, v2
2063 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2064 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2065 %element = extractelement <16 x i8> %vector, i32 8
2069 define i8 @extractelement_vgpr_v16i8_idx9(ptr addrspace(1) %ptr) {
2070 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx9:
2072 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2073 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2074 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2075 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v2
2076 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2078 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx9:
2080 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2081 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2082 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2083 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v2
2084 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2086 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx9:
2088 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2089 ; GFX7-NEXT: s_mov_b32 s6, 0
2090 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2091 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2092 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2093 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2094 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v2
2095 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2097 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx9:
2099 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2100 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2101 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2102 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v2
2103 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2105 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx9:
2107 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2108 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
2109 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2110 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v2
2111 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2112 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2113 %element = extractelement <16 x i8> %vector, i32 9
2117 define i8 @extractelement_vgpr_v16i8_idx10(ptr addrspace(1) %ptr) {
2118 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx10:
2120 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2121 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2122 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2123 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v2
2124 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2126 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx10:
2128 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2129 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2130 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2131 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v2
2132 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2134 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx10:
2136 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2137 ; GFX7-NEXT: s_mov_b32 s6, 0
2138 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2139 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2140 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2141 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2142 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v2
2143 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2145 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx10:
2147 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2148 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2149 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2150 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v2
2151 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2153 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx10:
2155 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2156 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
2157 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2158 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v2
2159 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2160 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2161 %element = extractelement <16 x i8> %vector, i32 10
2165 define i8 @extractelement_vgpr_v16i8_idx11(ptr addrspace(1) %ptr) {
2166 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx11:
2168 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2169 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2170 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2171 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v2
2172 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2174 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx11:
2176 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2177 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2178 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2179 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v2
2180 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2182 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx11:
2184 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2185 ; GFX7-NEXT: s_mov_b32 s6, 0
2186 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2187 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2188 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2189 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2190 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2
2191 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2193 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx11:
2195 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2196 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2197 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2198 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v2
2199 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2201 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx11:
2203 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2204 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
2205 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2206 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v2
2207 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2208 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2209 %element = extractelement <16 x i8> %vector, i32 11
2213 define i8 @extractelement_vgpr_v16i8_idx12(ptr addrspace(1) %ptr) {
2214 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx12:
2216 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2217 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2218 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2219 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
2220 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2222 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx12:
2224 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2225 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2226 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2227 ; GFX8-NEXT: v_mov_b32_e32 v0, v3
2228 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2230 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx12:
2232 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2233 ; GFX7-NEXT: s_mov_b32 s6, 0
2234 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2235 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2236 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2237 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2238 ; GFX7-NEXT: v_mov_b32_e32 v0, v3
2239 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2241 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx12:
2243 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2244 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2245 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2246 ; GFX10-NEXT: v_mov_b32_e32 v0, v3
2247 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2249 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx12:
2251 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2252 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
2253 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2254 ; GFX11-NEXT: v_mov_b32_e32 v0, v3
2255 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2256 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2257 %element = extractelement <16 x i8> %vector, i32 12
2261 define i8 @extractelement_vgpr_v16i8_idx13(ptr addrspace(1) %ptr) {
2262 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx13:
2264 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2265 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2266 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2267 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v3
2268 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2270 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx13:
2272 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2273 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2274 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2275 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v3
2276 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2278 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx13:
2280 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2281 ; GFX7-NEXT: s_mov_b32 s6, 0
2282 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2283 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2284 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2285 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2286 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3
2287 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2289 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx13:
2291 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2292 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2293 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2294 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v3
2295 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2297 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx13:
2299 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2300 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
2301 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2302 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v3
2303 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2304 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2305 %element = extractelement <16 x i8> %vector, i32 13
2309 define i8 @extractelement_vgpr_v16i8_idx14(ptr addrspace(1) %ptr) {
2310 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx14:
2312 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2313 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2314 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2315 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v3
2316 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2318 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx14:
2320 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2321 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2322 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2323 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v3
2324 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2326 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx14:
2328 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2329 ; GFX7-NEXT: s_mov_b32 s6, 0
2330 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2331 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2332 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2333 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2334 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v3
2335 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2337 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx14:
2339 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2340 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2341 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2342 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v3
2343 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2345 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx14:
2347 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2348 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
2349 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2350 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v3
2351 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2352 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2353 %element = extractelement <16 x i8> %vector, i32 14
2357 define i8 @extractelement_vgpr_v16i8_idx15(ptr addrspace(1) %ptr) {
2358 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx15:
2360 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2361 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2362 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2363 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v3
2364 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2366 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx15:
2368 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2369 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2370 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2371 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v3
2372 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2374 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx15:
2376 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2377 ; GFX7-NEXT: s_mov_b32 s6, 0
2378 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2379 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2380 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2381 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2382 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3
2383 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2385 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx15:
2387 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2388 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2389 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2390 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v3
2391 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2393 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx15:
2395 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2396 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
2397 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2398 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v3
2399 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2400 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2401 %element = extractelement <16 x i8> %vector, i32 15