1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
8 define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
9 ; GFX9-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
11 ; GFX9-NEXT: s_and_b32 s0, s4, 3
12 ; GFX9-NEXT: s_ashr_i32 s1, s0, 31
13 ; GFX9-NEXT: s_add_u32 s0, s2, s0
14 ; GFX9-NEXT: s_addc_u32 s1, s3, s1
15 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
16 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1]
17 ; GFX9-NEXT: s_waitcnt vmcnt(0)
18 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
19 ; GFX9-NEXT: ; return to shader part epilog
21 ; GFX8-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
23 ; GFX8-NEXT: s_and_b32 s0, s4, 3
24 ; GFX8-NEXT: s_ashr_i32 s1, s0, 31
25 ; GFX8-NEXT: s_add_u32 s0, s2, s0
26 ; GFX8-NEXT: s_addc_u32 s1, s3, s1
27 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
28 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
29 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
30 ; GFX8-NEXT: s_waitcnt vmcnt(0)
31 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
32 ; GFX8-NEXT: ; return to shader part epilog
34 ; GFX7-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
36 ; GFX7-NEXT: s_and_b32 s4, s4, 3
37 ; GFX7-NEXT: s_ashr_i32 s5, s4, 31
38 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
39 ; GFX7-NEXT: s_mov_b32 s0, s2
40 ; GFX7-NEXT: s_mov_b32 s1, s3
41 ; GFX7-NEXT: s_mov_b32 s2, 0
42 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
43 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
44 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
45 ; GFX7-NEXT: s_waitcnt vmcnt(0)
46 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
47 ; GFX7-NEXT: ; return to shader part epilog
49 ; GFX10-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
51 ; GFX10-NEXT: s_and_b32 s0, s4, 3
52 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
53 ; GFX10-NEXT: s_ashr_i32 s1, s0, 31
54 ; GFX10-NEXT: s_add_u32 s0, s2, s0
55 ; GFX10-NEXT: s_addc_u32 s1, s3, s1
56 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1]
57 ; GFX10-NEXT: s_waitcnt vmcnt(0)
58 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
59 ; GFX10-NEXT: ; return to shader part epilog
61 ; GFX11-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
63 ; GFX11-NEXT: s_and_b32 s0, s4, 3
64 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
65 ; GFX11-NEXT: s_ashr_i32 s1, s0, 31
66 ; GFX11-NEXT: s_add_u32 s0, s2, s0
67 ; GFX11-NEXT: s_addc_u32 s1, s3, s1
68 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1]
69 ; GFX11-NEXT: s_waitcnt vmcnt(0)
70 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
71 ; GFX11-NEXT: ; return to shader part epilog
72 %vector = load <4 x i8>, ptr addrspace(4) %ptr
73 %element = extractelement <4 x i8> %vector, i32 %idx
77 define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
78 ; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
80 ; GFX9-NEXT: s_and_b32 s0, s2, 3
81 ; GFX9-NEXT: s_ashr_i32 s1, s0, 31
82 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
83 ; GFX9-NEXT: v_mov_b32_e32 v2, s0
84 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
85 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
86 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
87 ; GFX9-NEXT: s_waitcnt vmcnt(0)
88 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
89 ; GFX9-NEXT: ; return to shader part epilog
91 ; GFX8-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
93 ; GFX8-NEXT: s_and_b32 s0, s2, 3
94 ; GFX8-NEXT: s_ashr_i32 s1, s0, 31
95 ; GFX8-NEXT: v_mov_b32_e32 v3, s1
96 ; GFX8-NEXT: v_mov_b32_e32 v2, s0
97 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
98 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
99 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
100 ; GFX8-NEXT: s_waitcnt vmcnt(0)
101 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
102 ; GFX8-NEXT: ; return to shader part epilog
104 ; GFX7-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
106 ; GFX7-NEXT: s_and_b32 s0, s2, 3
107 ; GFX7-NEXT: s_ashr_i32 s1, s0, 31
108 ; GFX7-NEXT: s_mov_b32 s2, 0
109 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
110 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
111 ; GFX7-NEXT: s_waitcnt vmcnt(0)
112 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
113 ; GFX7-NEXT: ; return to shader part epilog
115 ; GFX10-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
117 ; GFX10-NEXT: s_and_b32 s0, s2, 3
118 ; GFX10-NEXT: s_ashr_i32 s1, s0, 31
119 ; GFX10-NEXT: v_mov_b32_e32 v3, s1
120 ; GFX10-NEXT: v_mov_b32_e32 v2, s0
121 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
122 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
123 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
124 ; GFX10-NEXT: s_waitcnt vmcnt(0)
125 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
126 ; GFX10-NEXT: ; return to shader part epilog
128 ; GFX11-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
130 ; GFX11-NEXT: s_and_b32 s0, s2, 3
131 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
132 ; GFX11-NEXT: s_ashr_i32 s1, s0, 31
133 ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
134 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
135 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
136 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
137 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
138 ; GFX11-NEXT: s_waitcnt vmcnt(0)
139 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
140 ; GFX11-NEXT: ; return to shader part epilog
141 %vector = load <4 x i8>, ptr addrspace(1) %ptr
142 %element = extractelement <4 x i8> %vector, i32 %idx
146 define i8 @extractelement_vgpr_v4i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
147 ; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
149 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150 ; GFX9-NEXT: v_and_b32_e32 v2, 3, v2
151 ; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
152 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
153 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
154 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
155 ; GFX9-NEXT: s_waitcnt vmcnt(0)
156 ; GFX9-NEXT: s_setpc_b64 s[30:31]
158 ; GFX8-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
160 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161 ; GFX8-NEXT: v_and_b32_e32 v2, 3, v2
162 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
163 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
164 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
165 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
166 ; GFX8-NEXT: s_waitcnt vmcnt(0)
167 ; GFX8-NEXT: s_setpc_b64 s[30:31]
169 ; GFX7-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
171 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; GFX7-NEXT: v_and_b32_e32 v2, 3, v2
173 ; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
174 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
175 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
176 ; GFX7-NEXT: s_mov_b32 s6, 0
177 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
178 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
179 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
180 ; GFX7-NEXT: s_waitcnt vmcnt(0)
181 ; GFX7-NEXT: s_setpc_b64 s[30:31]
183 ; GFX10-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
185 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186 ; GFX10-NEXT: v_and_b32_e32 v2, 3, v2
187 ; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
188 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
189 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
190 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
191 ; GFX10-NEXT: s_waitcnt vmcnt(0)
192 ; GFX10-NEXT: s_setpc_b64 s[30:31]
194 ; GFX11-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
196 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197 ; GFX11-NEXT: v_and_b32_e32 v2, 3, v2
198 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
199 ; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
200 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
201 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
202 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
203 ; GFX11-NEXT: s_waitcnt vmcnt(0)
204 ; GFX11-NEXT: s_setpc_b64 s[30:31]
205 %vector = load <4 x i8>, ptr addrspace(1) %ptr
206 %element = extractelement <4 x i8> %vector, i32 %idx
210 define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
211 ; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
213 ; GFX9-NEXT: v_and_b32_e32 v2, 3, v0
214 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
215 ; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
216 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
217 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
218 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
219 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
220 ; GFX9-NEXT: s_waitcnt vmcnt(0)
221 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
222 ; GFX9-NEXT: ; return to shader part epilog
224 ; GFX8-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
226 ; GFX8-NEXT: v_and_b32_e32 v2, 3, v0
227 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
228 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
229 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
230 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
231 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
232 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
233 ; GFX8-NEXT: s_waitcnt vmcnt(0)
234 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
235 ; GFX8-NEXT: ; return to shader part epilog
237 ; GFX7-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
239 ; GFX7-NEXT: v_and_b32_e32 v0, 3, v0
240 ; GFX7-NEXT: s_mov_b32 s0, s2
241 ; GFX7-NEXT: s_mov_b32 s1, s3
242 ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
243 ; GFX7-NEXT: s_mov_b32 s2, 0
244 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
245 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
246 ; GFX7-NEXT: s_waitcnt vmcnt(0)
247 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
248 ; GFX7-NEXT: ; return to shader part epilog
250 ; GFX10-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
252 ; GFX10-NEXT: v_and_b32_e32 v2, 3, v0
253 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
254 ; GFX10-NEXT: v_mov_b32_e32 v1, s3
255 ; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
256 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
257 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
258 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
259 ; GFX10-NEXT: s_waitcnt vmcnt(0)
260 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
261 ; GFX10-NEXT: ; return to shader part epilog
263 ; GFX11-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
265 ; GFX11-NEXT: v_and_b32_e32 v2, 3, v0
266 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
267 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
268 ; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
269 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
270 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
271 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
272 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
273 ; GFX11-NEXT: s_waitcnt vmcnt(0)
274 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
275 ; GFX11-NEXT: ; return to shader part epilog
276 %vector = load <4 x i8>, ptr addrspace(4) %ptr
277 %element = extractelement <4 x i8> %vector, i32 %idx
281 define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(ptr addrspace(4) inreg %ptr) {
282 ; GFX9-LABEL: extractelement_sgpr_v4i8_idx0:
284 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
285 ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3]
286 ; GFX9-NEXT: s_waitcnt vmcnt(0)
287 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
288 ; GFX9-NEXT: ; return to shader part epilog
290 ; GFX8-LABEL: extractelement_sgpr_v4i8_idx0:
292 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
293 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
294 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
295 ; GFX8-NEXT: s_waitcnt vmcnt(0)
296 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
297 ; GFX8-NEXT: ; return to shader part epilog
299 ; GFX7-LABEL: extractelement_sgpr_v4i8_idx0:
301 ; GFX7-NEXT: s_mov_b32 s0, s2
302 ; GFX7-NEXT: s_mov_b32 s1, s3
303 ; GFX7-NEXT: s_mov_b32 s2, -1
304 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
305 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
306 ; GFX7-NEXT: s_waitcnt vmcnt(0)
307 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
308 ; GFX7-NEXT: ; return to shader part epilog
310 ; GFX10-LABEL: extractelement_sgpr_v4i8_idx0:
312 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
313 ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3]
314 ; GFX10-NEXT: s_waitcnt vmcnt(0)
315 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
316 ; GFX10-NEXT: ; return to shader part epilog
318 ; GFX11-LABEL: extractelement_sgpr_v4i8_idx0:
320 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
321 ; GFX11-NEXT: global_load_u8 v0, v0, s[2:3]
322 ; GFX11-NEXT: s_waitcnt vmcnt(0)
323 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
324 ; GFX11-NEXT: ; return to shader part epilog
325 %vector = load <4 x i8>, ptr addrspace(4) %ptr
326 %element = extractelement <4 x i8> %vector, i32 0
330 define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(ptr addrspace(4) inreg %ptr) {
331 ; GFX9-LABEL: extractelement_sgpr_v4i8_idx1:
333 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
334 ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:1
335 ; GFX9-NEXT: s_waitcnt vmcnt(0)
336 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
337 ; GFX9-NEXT: ; return to shader part epilog
339 ; GFX8-LABEL: extractelement_sgpr_v4i8_idx1:
341 ; GFX8-NEXT: s_add_u32 s0, s2, 1
342 ; GFX8-NEXT: s_addc_u32 s1, s3, 0
343 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
344 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
345 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
346 ; GFX8-NEXT: s_waitcnt vmcnt(0)
347 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
348 ; GFX8-NEXT: ; return to shader part epilog
350 ; GFX7-LABEL: extractelement_sgpr_v4i8_idx1:
352 ; GFX7-NEXT: s_mov_b32 s0, s2
353 ; GFX7-NEXT: s_mov_b32 s1, s3
354 ; GFX7-NEXT: s_mov_b32 s2, -1
355 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
356 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:1
357 ; GFX7-NEXT: s_waitcnt vmcnt(0)
358 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
359 ; GFX7-NEXT: ; return to shader part epilog
361 ; GFX10-LABEL: extractelement_sgpr_v4i8_idx1:
363 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
364 ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:1
365 ; GFX10-NEXT: s_waitcnt vmcnt(0)
366 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
367 ; GFX10-NEXT: ; return to shader part epilog
369 ; GFX11-LABEL: extractelement_sgpr_v4i8_idx1:
371 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
372 ; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:1
373 ; GFX11-NEXT: s_waitcnt vmcnt(0)
374 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
375 ; GFX11-NEXT: ; return to shader part epilog
376 %vector = load <4 x i8>, ptr addrspace(4) %ptr
377 %element = extractelement <4 x i8> %vector, i32 1
381 define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(ptr addrspace(4) inreg %ptr) {
382 ; GFX9-LABEL: extractelement_sgpr_v4i8_idx2:
384 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
385 ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:2
386 ; GFX9-NEXT: s_waitcnt vmcnt(0)
387 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
388 ; GFX9-NEXT: ; return to shader part epilog
390 ; GFX8-LABEL: extractelement_sgpr_v4i8_idx2:
392 ; GFX8-NEXT: s_add_u32 s0, s2, 2
393 ; GFX8-NEXT: s_addc_u32 s1, s3, 0
394 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
395 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
396 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
397 ; GFX8-NEXT: s_waitcnt vmcnt(0)
398 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
399 ; GFX8-NEXT: ; return to shader part epilog
401 ; GFX7-LABEL: extractelement_sgpr_v4i8_idx2:
403 ; GFX7-NEXT: s_mov_b32 s0, s2
404 ; GFX7-NEXT: s_mov_b32 s1, s3
405 ; GFX7-NEXT: s_mov_b32 s2, -1
406 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
407 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:2
408 ; GFX7-NEXT: s_waitcnt vmcnt(0)
409 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
410 ; GFX7-NEXT: ; return to shader part epilog
412 ; GFX10-LABEL: extractelement_sgpr_v4i8_idx2:
414 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
415 ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:2
416 ; GFX10-NEXT: s_waitcnt vmcnt(0)
417 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
418 ; GFX10-NEXT: ; return to shader part epilog
420 ; GFX11-LABEL: extractelement_sgpr_v4i8_idx2:
422 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
423 ; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:2
424 ; GFX11-NEXT: s_waitcnt vmcnt(0)
425 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
426 ; GFX11-NEXT: ; return to shader part epilog
427 %vector = load <4 x i8>, ptr addrspace(4) %ptr
428 %element = extractelement <4 x i8> %vector, i32 2
432 define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(ptr addrspace(4) inreg %ptr) {
433 ; GFX9-LABEL: extractelement_sgpr_v4i8_idx3:
435 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
436 ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:3
437 ; GFX9-NEXT: s_waitcnt vmcnt(0)
438 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
439 ; GFX9-NEXT: ; return to shader part epilog
441 ; GFX8-LABEL: extractelement_sgpr_v4i8_idx3:
443 ; GFX8-NEXT: s_add_u32 s0, s2, 3
444 ; GFX8-NEXT: s_addc_u32 s1, s3, 0
445 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
446 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
447 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
448 ; GFX8-NEXT: s_waitcnt vmcnt(0)
449 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
450 ; GFX8-NEXT: ; return to shader part epilog
452 ; GFX7-LABEL: extractelement_sgpr_v4i8_idx3:
454 ; GFX7-NEXT: s_mov_b32 s0, s2
455 ; GFX7-NEXT: s_mov_b32 s1, s3
456 ; GFX7-NEXT: s_mov_b32 s2, -1
457 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
458 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:3
459 ; GFX7-NEXT: s_waitcnt vmcnt(0)
460 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
461 ; GFX7-NEXT: ; return to shader part epilog
463 ; GFX10-LABEL: extractelement_sgpr_v4i8_idx3:
465 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
466 ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:3
467 ; GFX10-NEXT: s_waitcnt vmcnt(0)
468 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
469 ; GFX10-NEXT: ; return to shader part epilog
471 ; GFX11-LABEL: extractelement_sgpr_v4i8_idx3:
473 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
474 ; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:3
475 ; GFX11-NEXT: s_waitcnt vmcnt(0)
476 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
477 ; GFX11-NEXT: ; return to shader part epilog
478 %vector = load <4 x i8>, ptr addrspace(4) %ptr
479 %element = extractelement <4 x i8> %vector, i32 3
483 define i8 @extractelement_vgpr_v4i8_idx0(ptr addrspace(1) %ptr) {
484 ; GFX9-LABEL: extractelement_vgpr_v4i8_idx0:
486 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
487 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
488 ; GFX9-NEXT: s_waitcnt vmcnt(0)
489 ; GFX9-NEXT: s_setpc_b64 s[30:31]
491 ; GFX8-LABEL: extractelement_vgpr_v4i8_idx0:
493 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
495 ; GFX8-NEXT: s_waitcnt vmcnt(0)
496 ; GFX8-NEXT: s_setpc_b64 s[30:31]
498 ; GFX7-LABEL: extractelement_vgpr_v4i8_idx0:
500 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501 ; GFX7-NEXT: s_mov_b32 s6, 0
502 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
503 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
504 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
505 ; GFX7-NEXT: s_waitcnt vmcnt(0)
506 ; GFX7-NEXT: s_setpc_b64 s[30:31]
508 ; GFX10-LABEL: extractelement_vgpr_v4i8_idx0:
510 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
512 ; GFX10-NEXT: s_waitcnt vmcnt(0)
513 ; GFX10-NEXT: s_setpc_b64 s[30:31]
515 ; GFX11-LABEL: extractelement_vgpr_v4i8_idx0:
517 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
519 ; GFX11-NEXT: s_waitcnt vmcnt(0)
520 ; GFX11-NEXT: s_setpc_b64 s[30:31]
521 %vector = load <4 x i8>, ptr addrspace(1) %ptr
522 %element = extractelement <4 x i8> %vector, i32 0
526 define i8 @extractelement_vgpr_v4i8_idx1(ptr addrspace(1) %ptr) {
527 ; GFX9-LABEL: extractelement_vgpr_v4i8_idx1:
529 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1
531 ; GFX9-NEXT: s_waitcnt vmcnt(0)
532 ; GFX9-NEXT: s_setpc_b64 s[30:31]
534 ; GFX8-LABEL: extractelement_vgpr_v4i8_idx1:
536 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0
538 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
539 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
540 ; GFX8-NEXT: s_waitcnt vmcnt(0)
541 ; GFX8-NEXT: s_setpc_b64 s[30:31]
543 ; GFX7-LABEL: extractelement_vgpr_v4i8_idx1:
545 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546 ; GFX7-NEXT: s_mov_b32 s6, 0
547 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
548 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
549 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:1
550 ; GFX7-NEXT: s_waitcnt vmcnt(0)
551 ; GFX7-NEXT: s_setpc_b64 s[30:31]
553 ; GFX10-LABEL: extractelement_vgpr_v4i8_idx1:
555 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
556 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1
557 ; GFX10-NEXT: s_waitcnt vmcnt(0)
558 ; GFX10-NEXT: s_setpc_b64 s[30:31]
560 ; GFX11-LABEL: extractelement_vgpr_v4i8_idx1:
562 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
563 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1
564 ; GFX11-NEXT: s_waitcnt vmcnt(0)
565 ; GFX11-NEXT: s_setpc_b64 s[30:31]
566 %vector = load <4 x i8>, ptr addrspace(1) %ptr
567 %element = extractelement <4 x i8> %vector, i32 1
571 define i8 @extractelement_vgpr_v4i8_idx2(ptr addrspace(1) %ptr) {
572 ; GFX9-LABEL: extractelement_vgpr_v4i8_idx2:
574 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
575 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2
576 ; GFX9-NEXT: s_waitcnt vmcnt(0)
577 ; GFX9-NEXT: s_setpc_b64 s[30:31]
579 ; GFX8-LABEL: extractelement_vgpr_v4i8_idx2:
581 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 2, v0
583 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
584 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
585 ; GFX8-NEXT: s_waitcnt vmcnt(0)
586 ; GFX8-NEXT: s_setpc_b64 s[30:31]
588 ; GFX7-LABEL: extractelement_vgpr_v4i8_idx2:
590 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
591 ; GFX7-NEXT: s_mov_b32 s6, 0
592 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
593 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
594 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:2
595 ; GFX7-NEXT: s_waitcnt vmcnt(0)
596 ; GFX7-NEXT: s_setpc_b64 s[30:31]
598 ; GFX10-LABEL: extractelement_vgpr_v4i8_idx2:
600 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
601 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2
602 ; GFX10-NEXT: s_waitcnt vmcnt(0)
603 ; GFX10-NEXT: s_setpc_b64 s[30:31]
605 ; GFX11-LABEL: extractelement_vgpr_v4i8_idx2:
607 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
608 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2
609 ; GFX11-NEXT: s_waitcnt vmcnt(0)
610 ; GFX11-NEXT: s_setpc_b64 s[30:31]
611 %vector = load <4 x i8>, ptr addrspace(1) %ptr
612 %element = extractelement <4 x i8> %vector, i32 2
616 define i8 @extractelement_vgpr_v4i8_idx3(ptr addrspace(1) %ptr) {
617 ; GFX9-LABEL: extractelement_vgpr_v4i8_idx3:
619 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:3
621 ; GFX9-NEXT: s_waitcnt vmcnt(0)
622 ; GFX9-NEXT: s_setpc_b64 s[30:31]
624 ; GFX8-LABEL: extractelement_vgpr_v4i8_idx3:
626 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
627 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 3, v0
628 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
629 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
630 ; GFX8-NEXT: s_waitcnt vmcnt(0)
631 ; GFX8-NEXT: s_setpc_b64 s[30:31]
633 ; GFX7-LABEL: extractelement_vgpr_v4i8_idx3:
635 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
636 ; GFX7-NEXT: s_mov_b32 s6, 0
637 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
638 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
639 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:3
640 ; GFX7-NEXT: s_waitcnt vmcnt(0)
641 ; GFX7-NEXT: s_setpc_b64 s[30:31]
643 ; GFX10-LABEL: extractelement_vgpr_v4i8_idx3:
645 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:3
647 ; GFX10-NEXT: s_waitcnt vmcnt(0)
648 ; GFX10-NEXT: s_setpc_b64 s[30:31]
650 ; GFX11-LABEL: extractelement_vgpr_v4i8_idx3:
652 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
653 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:3
654 ; GFX11-NEXT: s_waitcnt vmcnt(0)
655 ; GFX11-NEXT: s_setpc_b64 s[30:31]
656 %vector = load <4 x i8>, ptr addrspace(1) %ptr
657 %element = extractelement <4 x i8> %vector, i32 3
661 define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
662 ; GFX9-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
664 ; GFX9-NEXT: s_and_b32 s0, s4, 7
665 ; GFX9-NEXT: s_ashr_i32 s1, s0, 31
666 ; GFX9-NEXT: s_add_u32 s0, s2, s0
667 ; GFX9-NEXT: s_addc_u32 s1, s3, s1
668 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
669 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1]
670 ; GFX9-NEXT: s_waitcnt vmcnt(0)
671 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
672 ; GFX9-NEXT: ; return to shader part epilog
674 ; GFX8-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
676 ; GFX8-NEXT: s_and_b32 s0, s4, 7
677 ; GFX8-NEXT: s_ashr_i32 s1, s0, 31
678 ; GFX8-NEXT: s_add_u32 s0, s2, s0
679 ; GFX8-NEXT: s_addc_u32 s1, s3, s1
680 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
681 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
682 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
683 ; GFX8-NEXT: s_waitcnt vmcnt(0)
684 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
685 ; GFX8-NEXT: ; return to shader part epilog
687 ; GFX7-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
689 ; GFX7-NEXT: s_and_b32 s4, s4, 7
690 ; GFX7-NEXT: s_ashr_i32 s5, s4, 31
691 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
692 ; GFX7-NEXT: s_mov_b32 s0, s2
693 ; GFX7-NEXT: s_mov_b32 s1, s3
694 ; GFX7-NEXT: s_mov_b32 s2, 0
695 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
696 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
697 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
698 ; GFX7-NEXT: s_waitcnt vmcnt(0)
699 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
700 ; GFX7-NEXT: ; return to shader part epilog
702 ; GFX10-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
704 ; GFX10-NEXT: s_and_b32 s0, s4, 7
705 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
706 ; GFX10-NEXT: s_ashr_i32 s1, s0, 31
707 ; GFX10-NEXT: s_add_u32 s0, s2, s0
708 ; GFX10-NEXT: s_addc_u32 s1, s3, s1
709 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1]
710 ; GFX10-NEXT: s_waitcnt vmcnt(0)
711 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
712 ; GFX10-NEXT: ; return to shader part epilog
714 ; GFX11-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
716 ; GFX11-NEXT: s_and_b32 s0, s4, 7
717 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
718 ; GFX11-NEXT: s_ashr_i32 s1, s0, 31
719 ; GFX11-NEXT: s_add_u32 s0, s2, s0
720 ; GFX11-NEXT: s_addc_u32 s1, s3, s1
721 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1]
722 ; GFX11-NEXT: s_waitcnt vmcnt(0)
723 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
724 ; GFX11-NEXT: ; return to shader part epilog
725 %vector = load <8 x i8>, ptr addrspace(4) %ptr
726 %element = extractelement <8 x i8> %vector, i32 %idx
730 define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
731 ; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
733 ; GFX9-NEXT: s_and_b32 s0, s2, 7
734 ; GFX9-NEXT: s_ashr_i32 s1, s0, 31
735 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
736 ; GFX9-NEXT: v_mov_b32_e32 v2, s0
737 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
738 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
739 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
740 ; GFX9-NEXT: s_waitcnt vmcnt(0)
741 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
742 ; GFX9-NEXT: ; return to shader part epilog
744 ; GFX8-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
746 ; GFX8-NEXT: s_and_b32 s0, s2, 7
747 ; GFX8-NEXT: s_ashr_i32 s1, s0, 31
748 ; GFX8-NEXT: v_mov_b32_e32 v3, s1
749 ; GFX8-NEXT: v_mov_b32_e32 v2, s0
750 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
751 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
752 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
753 ; GFX8-NEXT: s_waitcnt vmcnt(0)
754 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
755 ; GFX8-NEXT: ; return to shader part epilog
757 ; GFX7-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
759 ; GFX7-NEXT: s_and_b32 s0, s2, 7
760 ; GFX7-NEXT: s_ashr_i32 s1, s0, 31
761 ; GFX7-NEXT: s_mov_b32 s2, 0
762 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
763 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
764 ; GFX7-NEXT: s_waitcnt vmcnt(0)
765 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
766 ; GFX7-NEXT: ; return to shader part epilog
768 ; GFX10-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
770 ; GFX10-NEXT: s_and_b32 s0, s2, 7
771 ; GFX10-NEXT: s_ashr_i32 s1, s0, 31
772 ; GFX10-NEXT: v_mov_b32_e32 v3, s1
773 ; GFX10-NEXT: v_mov_b32_e32 v2, s0
774 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
775 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
776 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
777 ; GFX10-NEXT: s_waitcnt vmcnt(0)
778 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
779 ; GFX10-NEXT: ; return to shader part epilog
781 ; GFX11-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
783 ; GFX11-NEXT: s_and_b32 s0, s2, 7
784 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
785 ; GFX11-NEXT: s_ashr_i32 s1, s0, 31
786 ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
787 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
788 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
789 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
790 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
791 ; GFX11-NEXT: s_waitcnt vmcnt(0)
792 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
793 ; GFX11-NEXT: ; return to shader part epilog
794 %vector = load <8 x i8>, ptr addrspace(1) %ptr
795 %element = extractelement <8 x i8> %vector, i32 %idx
799 define i8 @extractelement_vgpr_v8i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
800 ; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
802 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
803 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
804 ; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
805 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
806 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
807 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
808 ; GFX9-NEXT: s_waitcnt vmcnt(0)
809 ; GFX9-NEXT: s_setpc_b64 s[30:31]
811 ; GFX8-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
813 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
814 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
815 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
816 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
817 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
818 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
819 ; GFX8-NEXT: s_waitcnt vmcnt(0)
820 ; GFX8-NEXT: s_setpc_b64 s[30:31]
822 ; GFX7-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
824 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
825 ; GFX7-NEXT: v_and_b32_e32 v2, 7, v2
826 ; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
827 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
828 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
829 ; GFX7-NEXT: s_mov_b32 s6, 0
830 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
831 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
832 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
833 ; GFX7-NEXT: s_waitcnt vmcnt(0)
834 ; GFX7-NEXT: s_setpc_b64 s[30:31]
836 ; GFX10-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
838 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
839 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
840 ; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
841 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
842 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
843 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
844 ; GFX10-NEXT: s_waitcnt vmcnt(0)
845 ; GFX10-NEXT: s_setpc_b64 s[30:31]
847 ; GFX11-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
849 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
850 ; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
851 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
852 ; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
853 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
854 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
855 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
856 ; GFX11-NEXT: s_waitcnt vmcnt(0)
857 ; GFX11-NEXT: s_setpc_b64 s[30:31]
858 %vector = load <8 x i8>, ptr addrspace(1) %ptr
859 %element = extractelement <8 x i8> %vector, i32 %idx
863 define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
864 ; GFX9-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
866 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v0
867 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
868 ; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
869 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
870 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
871 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
872 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
873 ; GFX9-NEXT: s_waitcnt vmcnt(0)
874 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
875 ; GFX9-NEXT: ; return to shader part epilog
877 ; GFX8-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
879 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v0
880 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
881 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
882 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
883 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
884 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
885 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
886 ; GFX8-NEXT: s_waitcnt vmcnt(0)
887 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
888 ; GFX8-NEXT: ; return to shader part epilog
890 ; GFX7-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
892 ; GFX7-NEXT: v_and_b32_e32 v0, 7, v0
893 ; GFX7-NEXT: s_mov_b32 s0, s2
894 ; GFX7-NEXT: s_mov_b32 s1, s3
895 ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
896 ; GFX7-NEXT: s_mov_b32 s2, 0
897 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
898 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
899 ; GFX7-NEXT: s_waitcnt vmcnt(0)
900 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
901 ; GFX7-NEXT: ; return to shader part epilog
903 ; GFX10-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
905 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v0
906 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
907 ; GFX10-NEXT: v_mov_b32_e32 v1, s3
908 ; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
909 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
910 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
911 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
912 ; GFX10-NEXT: s_waitcnt vmcnt(0)
913 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
914 ; GFX10-NEXT: ; return to shader part epilog
916 ; GFX11-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
918 ; GFX11-NEXT: v_and_b32_e32 v2, 7, v0
919 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
920 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
921 ; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
922 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
923 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
924 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
925 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
926 ; GFX11-NEXT: s_waitcnt vmcnt(0)
927 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
928 ; GFX11-NEXT: ; return to shader part epilog
929 %vector = load <8 x i8>, ptr addrspace(4) %ptr
930 %element = extractelement <8 x i8> %vector, i32 %idx
934 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(ptr addrspace(4) inreg %ptr) {
935 ; GFX9-LABEL: extractelement_sgpr_v8i8_idx0:
937 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
938 ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3]
939 ; GFX9-NEXT: s_waitcnt vmcnt(0)
940 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
941 ; GFX9-NEXT: ; return to shader part epilog
943 ; GFX8-LABEL: extractelement_sgpr_v8i8_idx0:
945 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
946 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
947 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
948 ; GFX8-NEXT: s_waitcnt vmcnt(0)
949 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
950 ; GFX8-NEXT: ; return to shader part epilog
952 ; GFX7-LABEL: extractelement_sgpr_v8i8_idx0:
954 ; GFX7-NEXT: s_mov_b32 s0, s2
955 ; GFX7-NEXT: s_mov_b32 s1, s3
956 ; GFX7-NEXT: s_mov_b32 s2, -1
957 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
958 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
959 ; GFX7-NEXT: s_waitcnt vmcnt(0)
960 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
961 ; GFX7-NEXT: ; return to shader part epilog
963 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx0:
965 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
966 ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3]
967 ; GFX10-NEXT: s_waitcnt vmcnt(0)
968 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
969 ; GFX10-NEXT: ; return to shader part epilog
971 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx0:
973 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
974 ; GFX11-NEXT: global_load_u8 v0, v0, s[2:3]
975 ; GFX11-NEXT: s_waitcnt vmcnt(0)
976 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
977 ; GFX11-NEXT: ; return to shader part epilog
978 %vector = load <8 x i8>, ptr addrspace(4) %ptr
979 %element = extractelement <8 x i8> %vector, i32 0
983 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(ptr addrspace(4) inreg %ptr) {
984 ; GFX9-LABEL: extractelement_sgpr_v8i8_idx1:
986 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
987 ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:1
988 ; GFX9-NEXT: s_waitcnt vmcnt(0)
989 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
990 ; GFX9-NEXT: ; return to shader part epilog
992 ; GFX8-LABEL: extractelement_sgpr_v8i8_idx1:
994 ; GFX8-NEXT: s_add_u32 s0, s2, 1
995 ; GFX8-NEXT: s_addc_u32 s1, s3, 0
996 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
997 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
998 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
999 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1000 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1001 ; GFX8-NEXT: ; return to shader part epilog
1003 ; GFX7-LABEL: extractelement_sgpr_v8i8_idx1:
1005 ; GFX7-NEXT: s_mov_b32 s0, s2
1006 ; GFX7-NEXT: s_mov_b32 s1, s3
1007 ; GFX7-NEXT: s_mov_b32 s2, -1
1008 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1009 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:1
1010 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1011 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
1012 ; GFX7-NEXT: ; return to shader part epilog
1014 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx1:
1016 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1017 ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:1
1018 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1019 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1020 ; GFX10-NEXT: ; return to shader part epilog
1022 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx1:
1024 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1025 ; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:1
1026 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1027 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1028 ; GFX11-NEXT: ; return to shader part epilog
1029 %vector = load <8 x i8>, ptr addrspace(4) %ptr
1030 %element = extractelement <8 x i8> %vector, i32 1
1034 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(ptr addrspace(4) inreg %ptr) {
1035 ; GFX9-LABEL: extractelement_sgpr_v8i8_idx2:
1037 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1038 ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:2
1039 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1040 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1041 ; GFX9-NEXT: ; return to shader part epilog
1043 ; GFX8-LABEL: extractelement_sgpr_v8i8_idx2:
1045 ; GFX8-NEXT: s_add_u32 s0, s2, 2
1046 ; GFX8-NEXT: s_addc_u32 s1, s3, 0
1047 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1048 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1049 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1050 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1051 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1052 ; GFX8-NEXT: ; return to shader part epilog
1054 ; GFX7-LABEL: extractelement_sgpr_v8i8_idx2:
1056 ; GFX7-NEXT: s_mov_b32 s0, s2
1057 ; GFX7-NEXT: s_mov_b32 s1, s3
1058 ; GFX7-NEXT: s_mov_b32 s2, -1
1059 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1060 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:2
1061 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1062 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
1063 ; GFX7-NEXT: ; return to shader part epilog
1065 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx2:
1067 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1068 ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:2
1069 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1070 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1071 ; GFX10-NEXT: ; return to shader part epilog
1073 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx2:
1075 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1076 ; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:2
1077 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1078 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1079 ; GFX11-NEXT: ; return to shader part epilog
1080 %vector = load <8 x i8>, ptr addrspace(4) %ptr
1081 %element = extractelement <8 x i8> %vector, i32 2
1085 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(ptr addrspace(4) inreg %ptr) {
1086 ; GFX9-LABEL: extractelement_sgpr_v8i8_idx3:
1088 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1089 ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:3
1090 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1091 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1092 ; GFX9-NEXT: ; return to shader part epilog
1094 ; GFX8-LABEL: extractelement_sgpr_v8i8_idx3:
1096 ; GFX8-NEXT: s_add_u32 s0, s2, 3
1097 ; GFX8-NEXT: s_addc_u32 s1, s3, 0
1098 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1099 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1100 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1101 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1102 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1103 ; GFX8-NEXT: ; return to shader part epilog
1105 ; GFX7-LABEL: extractelement_sgpr_v8i8_idx3:
1107 ; GFX7-NEXT: s_mov_b32 s0, s2
1108 ; GFX7-NEXT: s_mov_b32 s1, s3
1109 ; GFX7-NEXT: s_mov_b32 s2, -1
1110 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1111 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:3
1112 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1113 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
1114 ; GFX7-NEXT: ; return to shader part epilog
1116 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx3:
1118 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1119 ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:3
1120 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1121 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1122 ; GFX10-NEXT: ; return to shader part epilog
1124 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx3:
1126 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1127 ; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:3
1128 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1129 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1130 ; GFX11-NEXT: ; return to shader part epilog
1131 %vector = load <8 x i8>, ptr addrspace(4) %ptr
1132 %element = extractelement <8 x i8> %vector, i32 3
1136 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(ptr addrspace(4) inreg %ptr) {
1137 ; GFX9-LABEL: extractelement_sgpr_v8i8_idx4:
1139 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1140 ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:4
1141 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1142 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1143 ; GFX9-NEXT: ; return to shader part epilog
1145 ; GFX8-LABEL: extractelement_sgpr_v8i8_idx4:
1147 ; GFX8-NEXT: s_add_u32 s0, s2, 4
1148 ; GFX8-NEXT: s_addc_u32 s1, s3, 0
1149 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1150 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1151 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1152 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1153 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1154 ; GFX8-NEXT: ; return to shader part epilog
1156 ; GFX7-LABEL: extractelement_sgpr_v8i8_idx4:
1158 ; GFX7-NEXT: s_mov_b32 s0, s2
1159 ; GFX7-NEXT: s_mov_b32 s1, s3
1160 ; GFX7-NEXT: s_mov_b32 s2, -1
1161 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1162 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4
1163 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1164 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
1165 ; GFX7-NEXT: ; return to shader part epilog
1167 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx4:
1169 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1170 ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:4
1171 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1172 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1173 ; GFX10-NEXT: ; return to shader part epilog
1175 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx4:
1177 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1178 ; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:4
1179 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1180 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1181 ; GFX11-NEXT: ; return to shader part epilog
1182 %vector = load <8 x i8>, ptr addrspace(4) %ptr
1183 %element = extractelement <8 x i8> %vector, i32 4
1187 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(ptr addrspace(4) inreg %ptr) {
1188 ; GFX9-LABEL: extractelement_sgpr_v8i8_idx5:
1190 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1191 ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:5
1192 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1193 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1194 ; GFX9-NEXT: ; return to shader part epilog
1196 ; GFX8-LABEL: extractelement_sgpr_v8i8_idx5:
1198 ; GFX8-NEXT: s_add_u32 s0, s2, 5
1199 ; GFX8-NEXT: s_addc_u32 s1, s3, 0
1200 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1201 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1202 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1203 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1204 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1205 ; GFX8-NEXT: ; return to shader part epilog
1207 ; GFX7-LABEL: extractelement_sgpr_v8i8_idx5:
1209 ; GFX7-NEXT: s_mov_b32 s0, s2
1210 ; GFX7-NEXT: s_mov_b32 s1, s3
1211 ; GFX7-NEXT: s_mov_b32 s2, -1
1212 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1213 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:5
1214 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1215 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
1216 ; GFX7-NEXT: ; return to shader part epilog
1218 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx5:
1220 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1221 ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:5
1222 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1223 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1224 ; GFX10-NEXT: ; return to shader part epilog
1226 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx5:
1228 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1229 ; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:5
1230 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1231 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1232 ; GFX11-NEXT: ; return to shader part epilog
1233 %vector = load <8 x i8>, ptr addrspace(4) %ptr
1234 %element = extractelement <8 x i8> %vector, i32 5
1238 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(ptr addrspace(4) inreg %ptr) {
1239 ; GFX9-LABEL: extractelement_sgpr_v8i8_idx6:
1241 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1242 ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:6
1243 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1244 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1245 ; GFX9-NEXT: ; return to shader part epilog
1247 ; GFX8-LABEL: extractelement_sgpr_v8i8_idx6:
1249 ; GFX8-NEXT: s_add_u32 s0, s2, 6
1250 ; GFX8-NEXT: s_addc_u32 s1, s3, 0
1251 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1252 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1253 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1254 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1255 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1256 ; GFX8-NEXT: ; return to shader part epilog
1258 ; GFX7-LABEL: extractelement_sgpr_v8i8_idx6:
1260 ; GFX7-NEXT: s_mov_b32 s0, s2
1261 ; GFX7-NEXT: s_mov_b32 s1, s3
1262 ; GFX7-NEXT: s_mov_b32 s2, -1
1263 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1264 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:6
1265 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1266 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
1267 ; GFX7-NEXT: ; return to shader part epilog
1269 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx6:
1271 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1272 ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:6
1273 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1274 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1275 ; GFX10-NEXT: ; return to shader part epilog
1277 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx6:
1279 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1280 ; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:6
1281 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1282 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1283 ; GFX11-NEXT: ; return to shader part epilog
1284 %vector = load <8 x i8>, ptr addrspace(4) %ptr
1285 %element = extractelement <8 x i8> %vector, i32 6
1289 define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(ptr addrspace(4) inreg %ptr) {
1290 ; GFX9-LABEL: extractelement_sgpr_v8i8_idx7:
1292 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1293 ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:7
1294 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1295 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1296 ; GFX9-NEXT: ; return to shader part epilog
1298 ; GFX8-LABEL: extractelement_sgpr_v8i8_idx7:
1300 ; GFX8-NEXT: s_add_u32 s0, s2, 7
1301 ; GFX8-NEXT: s_addc_u32 s1, s3, 0
1302 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1303 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1304 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1305 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1306 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1307 ; GFX8-NEXT: ; return to shader part epilog
1309 ; GFX7-LABEL: extractelement_sgpr_v8i8_idx7:
1311 ; GFX7-NEXT: s_mov_b32 s0, s2
1312 ; GFX7-NEXT: s_mov_b32 s1, s3
1313 ; GFX7-NEXT: s_mov_b32 s2, -1
1314 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1315 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:7
1316 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1317 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
1318 ; GFX7-NEXT: ; return to shader part epilog
1320 ; GFX10-LABEL: extractelement_sgpr_v8i8_idx7:
1322 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1323 ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:7
1324 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1325 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1326 ; GFX10-NEXT: ; return to shader part epilog
1328 ; GFX11-LABEL: extractelement_sgpr_v8i8_idx7:
1330 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1331 ; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:7
1332 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1333 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1334 ; GFX11-NEXT: ; return to shader part epilog
1335 %vector = load <8 x i8>, ptr addrspace(4) %ptr
1336 %element = extractelement <8 x i8> %vector, i32 7
1340 define i8 @extractelement_vgpr_v8i8_idx0(ptr addrspace(1) %ptr) {
1341 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx0:
1343 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1344 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1345 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1346 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1348 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx0:
1350 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1351 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1352 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1353 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1355 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx0:
1357 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1358 ; GFX7-NEXT: s_mov_b32 s6, 0
1359 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1360 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1361 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
1362 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1363 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1365 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx0:
1367 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1368 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1369 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1370 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1372 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx0:
1374 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1375 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
1376 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1377 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1378 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1379 %element = extractelement <8 x i8> %vector, i32 0
1383 define i8 @extractelement_vgpr_v8i8_idx1(ptr addrspace(1) %ptr) {
1384 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx1:
1386 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1387 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1
1388 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1389 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1391 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx1:
1393 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1394 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0
1395 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1396 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1397 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1398 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1400 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx1:
1402 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1403 ; GFX7-NEXT: s_mov_b32 s6, 0
1404 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1405 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1406 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:1
1407 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1408 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1410 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx1:
1412 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1413 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1
1414 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1415 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1417 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx1:
1419 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1420 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1
1421 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1422 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1423 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1424 %element = extractelement <8 x i8> %vector, i32 1
1428 define i8 @extractelement_vgpr_v8i8_idx2(ptr addrspace(1) %ptr) {
1429 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx2:
1431 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1432 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2
1433 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1434 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1436 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx2:
1438 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1439 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 2, v0
1440 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1441 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1442 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1443 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1445 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx2:
1447 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1448 ; GFX7-NEXT: s_mov_b32 s6, 0
1449 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1450 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1451 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:2
1452 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1453 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1455 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx2:
1457 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1458 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2
1459 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1460 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1462 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx2:
1464 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1465 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2
1466 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1467 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1468 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1469 %element = extractelement <8 x i8> %vector, i32 2
1473 define i8 @extractelement_vgpr_v8i8_idx3(ptr addrspace(1) %ptr) {
1474 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx3:
1476 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1477 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:3
1478 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1479 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1481 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx3:
1483 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1484 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 3, v0
1485 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1486 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1487 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1488 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1490 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx3:
1492 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1493 ; GFX7-NEXT: s_mov_b32 s6, 0
1494 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1495 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1496 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:3
1497 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1498 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1500 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx3:
1502 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1503 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:3
1504 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1505 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1507 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx3:
1509 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1510 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:3
1511 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1512 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1513 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1514 %element = extractelement <8 x i8> %vector, i32 3
1518 define i8 @extractelement_vgpr_v8i8_idx4(ptr addrspace(1) %ptr) {
1519 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx4:
1521 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1522 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4
1523 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1524 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1526 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx4:
1528 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1529 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 4, v0
1530 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1531 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1532 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1533 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1535 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx4:
1537 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1538 ; GFX7-NEXT: s_mov_b32 s6, 0
1539 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1540 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1541 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:4
1542 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1543 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1545 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx4:
1547 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1548 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:4
1549 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1550 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1552 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx4:
1554 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1555 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4
1556 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1557 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1558 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1559 %element = extractelement <8 x i8> %vector, i32 4
1563 define i8 @extractelement_vgpr_v8i8_idx5(ptr addrspace(1) %ptr) {
1564 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx5:
1566 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1567 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:5
1568 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1569 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1571 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx5:
1573 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1574 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 5, v0
1575 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1576 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1577 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1578 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1580 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx5:
1582 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1583 ; GFX7-NEXT: s_mov_b32 s6, 0
1584 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1585 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1586 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:5
1587 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1588 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1590 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx5:
1592 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1593 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:5
1594 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1595 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1597 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx5:
1599 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1600 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:5
1601 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1602 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1603 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1604 %element = extractelement <8 x i8> %vector, i32 5
1608 define i8 @extractelement_vgpr_v8i8_idx6(ptr addrspace(1) %ptr) {
1609 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx6:
1611 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1612 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:6
1613 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1614 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1616 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx6:
1618 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1619 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 6, v0
1620 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1621 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1622 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1623 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1625 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx6:
1627 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1628 ; GFX7-NEXT: s_mov_b32 s6, 0
1629 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1630 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1631 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:6
1632 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1633 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1635 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx6:
1637 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:6
1639 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1640 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1642 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx6:
1644 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1645 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:6
1646 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1647 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1648 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1649 %element = extractelement <8 x i8> %vector, i32 6
1653 define i8 @extractelement_vgpr_v8i8_idx7(ptr addrspace(1) %ptr) {
1654 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx7:
1656 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1657 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:7
1658 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1659 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1661 ; GFX8-LABEL: extractelement_vgpr_v8i8_idx7:
1663 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1664 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 7, v0
1665 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1666 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1667 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1668 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1670 ; GFX7-LABEL: extractelement_vgpr_v8i8_idx7:
1672 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1673 ; GFX7-NEXT: s_mov_b32 s6, 0
1674 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1675 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1676 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:7
1677 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1678 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1680 ; GFX10-LABEL: extractelement_vgpr_v8i8_idx7:
1682 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1683 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:7
1684 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1685 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1687 ; GFX11-LABEL: extractelement_vgpr_v8i8_idx7:
1689 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1690 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:7
1691 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1692 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1693 %vector = load <8 x i8>, ptr addrspace(1) %ptr
1694 %element = extractelement <8 x i8> %vector, i32 7
1698 define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
1699 ; GFX9-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1701 ; GFX9-NEXT: s_and_b32 s0, s4, 15
1702 ; GFX9-NEXT: s_ashr_i32 s1, s0, 31
1703 ; GFX9-NEXT: s_add_u32 s0, s2, s0
1704 ; GFX9-NEXT: s_addc_u32 s1, s3, s1
1705 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1706 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1]
1707 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1708 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1709 ; GFX9-NEXT: ; return to shader part epilog
1711 ; GFX8-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1713 ; GFX8-NEXT: s_and_b32 s0, s4, 15
1714 ; GFX8-NEXT: s_ashr_i32 s1, s0, 31
1715 ; GFX8-NEXT: s_add_u32 s0, s2, s0
1716 ; GFX8-NEXT: s_addc_u32 s1, s3, s1
1717 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1718 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1719 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1720 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1721 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1722 ; GFX8-NEXT: ; return to shader part epilog
1724 ; GFX7-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1726 ; GFX7-NEXT: s_and_b32 s4, s4, 15
1727 ; GFX7-NEXT: s_ashr_i32 s5, s4, 31
1728 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
1729 ; GFX7-NEXT: s_mov_b32 s0, s2
1730 ; GFX7-NEXT: s_mov_b32 s1, s3
1731 ; GFX7-NEXT: s_mov_b32 s2, 0
1732 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1733 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
1734 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
1735 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1736 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
1737 ; GFX7-NEXT: ; return to shader part epilog
1739 ; GFX10-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1741 ; GFX10-NEXT: s_and_b32 s0, s4, 15
1742 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1743 ; GFX10-NEXT: s_ashr_i32 s1, s0, 31
1744 ; GFX10-NEXT: s_add_u32 s0, s2, s0
1745 ; GFX10-NEXT: s_addc_u32 s1, s3, s1
1746 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1]
1747 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1748 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1749 ; GFX10-NEXT: ; return to shader part epilog
1751 ; GFX11-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1753 ; GFX11-NEXT: s_and_b32 s0, s4, 15
1754 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1755 ; GFX11-NEXT: s_ashr_i32 s1, s0, 31
1756 ; GFX11-NEXT: s_add_u32 s0, s2, s0
1757 ; GFX11-NEXT: s_addc_u32 s1, s3, s1
1758 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1]
1759 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1760 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1761 ; GFX11-NEXT: ; return to shader part epilog
1762 %vector = load <16 x i8>, ptr addrspace(4) %ptr
1763 %element = extractelement <16 x i8> %vector, i32 %idx
1767 define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
1768 ; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1770 ; GFX9-NEXT: s_and_b32 s0, s2, 15
1771 ; GFX9-NEXT: s_ashr_i32 s1, s0, 31
1772 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
1773 ; GFX9-NEXT: v_mov_b32_e32 v2, s0
1774 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1775 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1776 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1777 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1778 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1779 ; GFX9-NEXT: ; return to shader part epilog
1781 ; GFX8-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1783 ; GFX8-NEXT: s_and_b32 s0, s2, 15
1784 ; GFX8-NEXT: s_ashr_i32 s1, s0, 31
1785 ; GFX8-NEXT: v_mov_b32_e32 v3, s1
1786 ; GFX8-NEXT: v_mov_b32_e32 v2, s0
1787 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
1788 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
1789 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1790 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1791 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1792 ; GFX8-NEXT: ; return to shader part epilog
1794 ; GFX7-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1796 ; GFX7-NEXT: s_and_b32 s0, s2, 15
1797 ; GFX7-NEXT: s_ashr_i32 s1, s0, 31
1798 ; GFX7-NEXT: s_mov_b32 s2, 0
1799 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1800 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
1801 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1802 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
1803 ; GFX7-NEXT: ; return to shader part epilog
1805 ; GFX10-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1807 ; GFX10-NEXT: s_and_b32 s0, s2, 15
1808 ; GFX10-NEXT: s_ashr_i32 s1, s0, 31
1809 ; GFX10-NEXT: v_mov_b32_e32 v3, s1
1810 ; GFX10-NEXT: v_mov_b32_e32 v2, s0
1811 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1812 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1813 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1814 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1815 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1816 ; GFX10-NEXT: ; return to shader part epilog
1818 ; GFX11-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1820 ; GFX11-NEXT: s_and_b32 s0, s2, 15
1821 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1822 ; GFX11-NEXT: s_ashr_i32 s1, s0, 31
1823 ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1824 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1825 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1826 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1827 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
1828 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1829 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1830 ; GFX11-NEXT: ; return to shader part epilog
1831 %vector = load <16 x i8>, ptr addrspace(1) %ptr
1832 %element = extractelement <16 x i8> %vector, i32 %idx
1836 define i8 @extractelement_vgpr_v16i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
1837 ; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1839 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1840 ; GFX9-NEXT: v_and_b32_e32 v2, 15, v2
1841 ; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1842 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1843 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1844 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1845 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1846 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1848 ; GFX8-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1850 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1851 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v2
1852 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1853 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
1854 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
1855 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1856 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1857 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1859 ; GFX7-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1861 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1862 ; GFX7-NEXT: v_and_b32_e32 v2, 15, v2
1863 ; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1864 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
1865 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
1866 ; GFX7-NEXT: s_mov_b32 s6, 0
1867 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1868 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1869 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
1870 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1871 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1873 ; GFX10-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1875 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1876 ; GFX10-NEXT: v_and_b32_e32 v2, 15, v2
1877 ; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1878 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1879 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1880 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1881 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1882 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1884 ; GFX11-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1886 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1887 ; GFX11-NEXT: v_and_b32_e32 v2, 15, v2
1888 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1889 ; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1890 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1891 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1892 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
1893 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1894 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1895 %vector = load <16 x i8>, ptr addrspace(1) %ptr
1896 %element = extractelement <16 x i8> %vector, i32 %idx
1900 define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
1901 ; GFX9-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
1903 ; GFX9-NEXT: v_and_b32_e32 v2, 15, v0
1904 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
1905 ; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1906 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
1907 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1908 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1909 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1910 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1911 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1912 ; GFX9-NEXT: ; return to shader part epilog
1914 ; GFX8-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
1916 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v0
1917 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
1918 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1919 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
1920 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
1921 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
1922 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1923 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1924 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1925 ; GFX8-NEXT: ; return to shader part epilog
1927 ; GFX7-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
1929 ; GFX7-NEXT: v_and_b32_e32 v0, 15, v0
1930 ; GFX7-NEXT: s_mov_b32 s0, s2
1931 ; GFX7-NEXT: s_mov_b32 s1, s3
1932 ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1933 ; GFX7-NEXT: s_mov_b32 s2, 0
1934 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1935 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
1936 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1937 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
1938 ; GFX7-NEXT: ; return to shader part epilog
1940 ; GFX10-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
1942 ; GFX10-NEXT: v_and_b32_e32 v2, 15, v0
1943 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
1944 ; GFX10-NEXT: v_mov_b32_e32 v1, s3
1945 ; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1946 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1947 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1948 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1949 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1950 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1951 ; GFX10-NEXT: ; return to shader part epilog
1953 ; GFX11-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
1955 ; GFX11-NEXT: v_and_b32_e32 v2, 15, v0
1956 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
1957 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1958 ; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1959 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1960 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1961 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1962 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
1963 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1964 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1965 ; GFX11-NEXT: ; return to shader part epilog
1966 %vector = load <16 x i8>, ptr addrspace(4) %ptr
1967 %element = extractelement <16 x i8> %vector, i32 %idx
1971 define i8 @extractelement_vgpr_v16i8_idx0(ptr addrspace(1) %ptr) {
1972 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx0:
1974 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1975 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1976 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1977 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1979 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx0:
1981 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1982 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1983 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1984 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1986 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx0:
1988 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1989 ; GFX7-NEXT: s_mov_b32 s6, 0
1990 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1991 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1992 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
1993 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1994 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1996 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx0:
1998 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1999 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
2000 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2001 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2003 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx0:
2005 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2006 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
2007 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2008 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2009 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2010 %element = extractelement <16 x i8> %vector, i32 0
2014 define i8 @extractelement_vgpr_v16i8_idx1(ptr addrspace(1) %ptr) {
2015 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx1:
2017 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2018 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1
2019 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2020 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2022 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx1:
2024 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2025 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0
2026 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2027 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2028 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2029 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2031 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx1:
2033 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2034 ; GFX7-NEXT: s_mov_b32 s6, 0
2035 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2036 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2037 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:1
2038 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2039 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2041 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx1:
2043 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2044 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1
2045 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2046 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2048 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx1:
2050 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2051 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1
2052 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2053 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2054 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2055 %element = extractelement <16 x i8> %vector, i32 1
2059 define i8 @extractelement_vgpr_v16i8_idx2(ptr addrspace(1) %ptr) {
2060 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx2:
2062 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2063 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2
2064 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2065 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2067 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx2:
2069 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2070 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 2, v0
2071 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2072 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2073 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2074 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2076 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx2:
2078 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2079 ; GFX7-NEXT: s_mov_b32 s6, 0
2080 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2081 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2082 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:2
2083 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2084 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2086 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx2:
2088 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2089 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2
2090 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2091 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2093 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx2:
2095 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2096 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2
2097 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2098 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2099 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2100 %element = extractelement <16 x i8> %vector, i32 2
2104 define i8 @extractelement_vgpr_v16i8_idx3(ptr addrspace(1) %ptr) {
2105 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx3:
2107 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2108 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:3
2109 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2110 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2112 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx3:
2114 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2115 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 3, v0
2116 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2117 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2118 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2119 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2121 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx3:
2123 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2124 ; GFX7-NEXT: s_mov_b32 s6, 0
2125 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2126 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2127 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:3
2128 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2129 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2131 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx3:
2133 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2134 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:3
2135 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2136 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2138 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx3:
2140 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2141 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:3
2142 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2143 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2144 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2145 %element = extractelement <16 x i8> %vector, i32 3
2149 define i8 @extractelement_vgpr_v16i8_idx4(ptr addrspace(1) %ptr) {
2150 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx4:
2152 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2153 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4
2154 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2155 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2157 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx4:
2159 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2160 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 4, v0
2161 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2162 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2163 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2164 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2166 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx4:
2168 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2169 ; GFX7-NEXT: s_mov_b32 s6, 0
2170 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2171 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2172 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:4
2173 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2174 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2176 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx4:
2178 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2179 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:4
2180 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2181 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2183 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx4:
2185 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2186 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4
2187 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2188 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2189 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2190 %element = extractelement <16 x i8> %vector, i32 4
2194 define i8 @extractelement_vgpr_v16i8_idx5(ptr addrspace(1) %ptr) {
2195 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx5:
2197 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2198 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:5
2199 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2200 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2202 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx5:
2204 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2205 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 5, v0
2206 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2207 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2208 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2209 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2211 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx5:
2213 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2214 ; GFX7-NEXT: s_mov_b32 s6, 0
2215 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2216 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2217 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:5
2218 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2219 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2221 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx5:
2223 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2224 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:5
2225 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2226 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2228 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx5:
2230 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2231 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:5
2232 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2233 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2234 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2235 %element = extractelement <16 x i8> %vector, i32 5
2239 define i8 @extractelement_vgpr_v16i8_idx6(ptr addrspace(1) %ptr) {
2240 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx6:
2242 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2243 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:6
2244 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2245 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2247 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx6:
2249 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2250 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 6, v0
2251 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2252 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2253 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2254 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2256 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx6:
2258 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2259 ; GFX7-NEXT: s_mov_b32 s6, 0
2260 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2261 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2262 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:6
2263 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2264 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2266 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx6:
2268 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2269 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:6
2270 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2271 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2273 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx6:
2275 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2276 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:6
2277 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2278 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2279 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2280 %element = extractelement <16 x i8> %vector, i32 6
2284 define i8 @extractelement_vgpr_v16i8_idx7(ptr addrspace(1) %ptr) {
2285 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx7:
2287 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2288 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:7
2289 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2290 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2292 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx7:
2294 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2295 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 7, v0
2296 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2297 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2298 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2299 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2301 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx7:
2303 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2304 ; GFX7-NEXT: s_mov_b32 s6, 0
2305 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2306 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2307 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:7
2308 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2309 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2311 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx7:
2313 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2314 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:7
2315 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2316 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2318 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx7:
2320 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2321 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:7
2322 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2323 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2324 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2325 %element = extractelement <16 x i8> %vector, i32 7
2329 define i8 @extractelement_vgpr_v16i8_idx8(ptr addrspace(1) %ptr) {
2330 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx8:
2332 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2333 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:8
2334 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2335 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2337 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx8:
2339 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2340 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 8, v0
2341 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2342 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2343 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2344 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2346 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx8:
2348 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2349 ; GFX7-NEXT: s_mov_b32 s6, 0
2350 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2351 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2352 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:8
2353 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2354 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2356 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx8:
2358 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2359 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:8
2360 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2361 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2363 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx8:
2365 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2366 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:8
2367 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2368 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2369 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2370 %element = extractelement <16 x i8> %vector, i32 8
2374 define i8 @extractelement_vgpr_v16i8_idx9(ptr addrspace(1) %ptr) {
2375 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx9:
2377 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2378 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:9
2379 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2380 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2382 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx9:
2384 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2385 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 9, v0
2386 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2387 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2388 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2389 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2391 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx9:
2393 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2394 ; GFX7-NEXT: s_mov_b32 s6, 0
2395 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2396 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2397 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:9
2398 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2399 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2401 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx9:
2403 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2404 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:9
2405 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2406 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2408 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx9:
2410 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2411 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:9
2412 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2413 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2414 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2415 %element = extractelement <16 x i8> %vector, i32 9
2419 define i8 @extractelement_vgpr_v16i8_idx10(ptr addrspace(1) %ptr) {
2420 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx10:
2422 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2423 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:10
2424 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2425 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2427 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx10:
2429 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2430 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 10, v0
2431 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2432 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2433 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2434 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2436 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx10:
2438 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2439 ; GFX7-NEXT: s_mov_b32 s6, 0
2440 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2441 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2442 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:10
2443 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2444 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2446 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx10:
2448 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2449 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:10
2450 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2451 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2453 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx10:
2455 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2456 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:10
2457 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2458 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2459 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2460 %element = extractelement <16 x i8> %vector, i32 10
2464 define i8 @extractelement_vgpr_v16i8_idx11(ptr addrspace(1) %ptr) {
2465 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx11:
2467 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2468 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:11
2469 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2470 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2472 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx11:
2474 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2475 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 11, v0
2476 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2477 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2478 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2479 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2481 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx11:
2483 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2484 ; GFX7-NEXT: s_mov_b32 s6, 0
2485 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2486 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2487 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:11
2488 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2489 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2491 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx11:
2493 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2494 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:11
2495 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2496 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2498 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx11:
2500 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2501 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:11
2502 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2503 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2504 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2505 %element = extractelement <16 x i8> %vector, i32 11
2509 define i8 @extractelement_vgpr_v16i8_idx12(ptr addrspace(1) %ptr) {
2510 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx12:
2512 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2513 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:12
2514 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2515 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2517 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx12:
2519 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2520 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 12, v0
2521 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2522 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2523 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2524 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2526 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx12:
2528 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2529 ; GFX7-NEXT: s_mov_b32 s6, 0
2530 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2531 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2532 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:12
2533 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2534 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2536 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx12:
2538 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2539 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:12
2540 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2541 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2543 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx12:
2545 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2546 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:12
2547 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2548 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2549 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2550 %element = extractelement <16 x i8> %vector, i32 12
2554 define i8 @extractelement_vgpr_v16i8_idx13(ptr addrspace(1) %ptr) {
2555 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx13:
2557 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2558 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:13
2559 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2560 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2562 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx13:
2564 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2565 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 13, v0
2566 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2567 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2568 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2569 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2571 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx13:
2573 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2574 ; GFX7-NEXT: s_mov_b32 s6, 0
2575 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2576 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2577 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:13
2578 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2579 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2581 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx13:
2583 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2584 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:13
2585 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2586 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2588 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx13:
2590 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2591 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:13
2592 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2593 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2594 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2595 %element = extractelement <16 x i8> %vector, i32 13
2599 define i8 @extractelement_vgpr_v16i8_idx14(ptr addrspace(1) %ptr) {
2600 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx14:
2602 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2603 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:14
2604 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2605 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2607 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx14:
2609 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2610 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 14, v0
2611 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2612 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2613 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2614 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2616 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx14:
2618 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2619 ; GFX7-NEXT: s_mov_b32 s6, 0
2620 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2621 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2622 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:14
2623 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2624 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2626 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx14:
2628 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2629 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:14
2630 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2631 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2633 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx14:
2635 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2636 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:14
2637 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2638 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2639 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2640 %element = extractelement <16 x i8> %vector, i32 14
2644 define i8 @extractelement_vgpr_v16i8_idx15(ptr addrspace(1) %ptr) {
2645 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx15:
2647 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2648 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:15
2649 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2650 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2652 ; GFX8-LABEL: extractelement_vgpr_v16i8_idx15:
2654 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2655 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 15, v0
2656 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2657 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2658 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2659 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2661 ; GFX7-LABEL: extractelement_vgpr_v16i8_idx15:
2663 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2664 ; GFX7-NEXT: s_mov_b32 s6, 0
2665 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
2666 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
2667 ; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:15
2668 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2669 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2671 ; GFX10-LABEL: extractelement_vgpr_v16i8_idx15:
2673 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2674 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:15
2675 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2676 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2678 ; GFX11-LABEL: extractelement_vgpr_v16i8_idx15:
2680 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2681 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:15
2682 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2683 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2684 %vector = load <16 x i8>, ptr addrspace(1) %ptr
2685 %element = extractelement <16 x i8> %vector, i32 15
2688 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: