1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
8 define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
9 ; GCN-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
11 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
12 ; GCN-NEXT: s_lshr_b32 s2, s4, 1
13 ; GCN-NEXT: s_cmp_eq_u32 s2, 1
14 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
15 ; GCN-NEXT: s_cselect_b32 s0, s1, s0
16 ; GCN-NEXT: s_and_b32 s1, s4, 1
17 ; GCN-NEXT: s_lshl_b32 s1, s1, 4
18 ; GCN-NEXT: s_lshr_b32 s0, s0, s1
19 ; GCN-NEXT: ; return to shader part epilog
21 ; GFX10-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
23 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
24 ; GFX10-NEXT: s_lshr_b32 s2, s4, 1
25 ; GFX10-NEXT: s_cmp_eq_u32 s2, 1
26 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
27 ; GFX10-NEXT: s_cselect_b32 s0, s1, s0
28 ; GFX10-NEXT: s_and_b32 s1, s4, 1
29 ; GFX10-NEXT: s_lshl_b32 s1, s1, 4
30 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1
31 ; GFX10-NEXT: ; return to shader part epilog
33 ; GFX11-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
35 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
36 ; GFX11-NEXT: s_lshr_b32 s2, s4, 1
37 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
38 ; GFX11-NEXT: s_cmp_eq_u32 s2, 1
39 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
40 ; GFX11-NEXT: s_cselect_b32 s0, s1, s0
41 ; GFX11-NEXT: s_and_b32 s1, s4, 1
42 ; GFX11-NEXT: s_lshl_b32 s1, s1, 4
43 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
44 ; GFX11-NEXT: s_lshr_b32 s0, s0, s1
45 ; GFX11-NEXT: ; return to shader part epilog
46 %vector = load <4 x i16>, ptr addrspace(4) %ptr
47 %element = extractelement <4 x i16> %vector, i32 %idx
51 define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
52 ; GFX9-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
54 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
55 ; GFX9-NEXT: s_lshr_b32 s0, s2, 1
56 ; GFX9-NEXT: s_and_b32 s1, s2, 1
57 ; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1
58 ; GFX9-NEXT: s_lshl_b32 s0, s1, 4
59 ; GFX9-NEXT: s_waitcnt vmcnt(0)
60 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
61 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0
62 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
63 ; GFX9-NEXT: ; return to shader part epilog
65 ; GFX8-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
67 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
68 ; GFX8-NEXT: s_lshr_b32 s0, s2, 1
69 ; GFX8-NEXT: s_and_b32 s1, s2, 1
70 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1
71 ; GFX8-NEXT: s_lshl_b32 s0, s1, 4
72 ; GFX8-NEXT: s_waitcnt vmcnt(0)
73 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
74 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0
75 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
76 ; GFX8-NEXT: ; return to shader part epilog
78 ; GFX7-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
80 ; GFX7-NEXT: s_mov_b32 s6, 0
81 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
82 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
83 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
84 ; GFX7-NEXT: s_lshr_b32 s0, s2, 1
85 ; GFX7-NEXT: s_and_b32 s1, s2, 1
86 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1
87 ; GFX7-NEXT: s_lshl_b32 s0, s1, 4
88 ; GFX7-NEXT: s_waitcnt vmcnt(0)
89 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
90 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0
91 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
92 ; GFX7-NEXT: ; return to shader part epilog
94 ; GFX10-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
96 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
97 ; GFX10-NEXT: s_lshr_b32 s0, s2, 1
98 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1
99 ; GFX10-NEXT: s_and_b32 s0, s2, 1
100 ; GFX10-NEXT: s_lshl_b32 s0, s0, 4
101 ; GFX10-NEXT: s_waitcnt vmcnt(0)
102 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
103 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, s0, v0
104 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
105 ; GFX10-NEXT: ; return to shader part epilog
107 ; GFX11-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
109 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
110 ; GFX11-NEXT: s_lshr_b32 s0, s2, 1
111 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
112 ; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1
113 ; GFX11-NEXT: s_and_b32 s0, s2, 1
114 ; GFX11-NEXT: s_lshl_b32 s0, s0, 4
115 ; GFX11-NEXT: s_waitcnt vmcnt(0)
116 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
117 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
118 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, s0, v0
119 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
120 ; GFX11-NEXT: ; return to shader part epilog
121 %vector = load <4 x i16>, ptr addrspace(1) %ptr
122 %element = extractelement <4 x i16> %vector, i32 %idx
126 define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
127 ; GFX9-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
129 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
131 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v2
132 ; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
133 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
134 ; GFX9-NEXT: s_waitcnt vmcnt(0)
135 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
136 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v2
137 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0
138 ; GFX9-NEXT: s_setpc_b64 s[30:31]
140 ; GFX8-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
142 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
144 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v2
145 ; GFX8-NEXT: v_and_b32_e32 v2, 1, v2
146 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
147 ; GFX8-NEXT: s_waitcnt vmcnt(0)
148 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
149 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 4, v2
150 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0
151 ; GFX8-NEXT: s_setpc_b64 s[30:31]
153 ; GFX7-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
155 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156 ; GFX7-NEXT: s_mov_b32 s6, 0
157 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
158 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
159 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
160 ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 1, v2
161 ; GFX7-NEXT: v_and_b32_e32 v2, 1, v2
162 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
163 ; GFX7-NEXT: s_waitcnt vmcnt(0)
164 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
165 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v2
166 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0
167 ; GFX7-NEXT: s_setpc_b64 s[30:31]
169 ; GFX10-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
171 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
173 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 1, v2
174 ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
175 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
176 ; GFX10-NEXT: s_waitcnt vmcnt(0)
177 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
178 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 4, v2
179 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0
180 ; GFX10-NEXT: s_setpc_b64 s[30:31]
182 ; GFX11-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
184 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
186 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 1, v2
187 ; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
188 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
189 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
190 ; GFX11-NEXT: s_waitcnt vmcnt(0)
191 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_lshlrev_b32 v1, 4, v2
192 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
193 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v1, v0
194 ; GFX11-NEXT: s_setpc_b64 s[30:31]
195 %vector = load <4 x i16>, ptr addrspace(1) %ptr
196 %element = extractelement <4 x i16> %vector, i32 %idx
200 define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
201 ; GCN-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
203 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
204 ; GCN-NEXT: v_lshrrev_b32_e32 v1, 1, v0
205 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
206 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
207 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0
208 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
209 ; GCN-NEXT: v_mov_b32_e32 v2, s0
210 ; GCN-NEXT: v_mov_b32_e32 v3, s1
211 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
212 ; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1
213 ; GCN-NEXT: v_readfirstlane_b32 s0, v0
214 ; GCN-NEXT: ; return to shader part epilog
216 ; GFX10-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
218 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
219 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v0
220 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
221 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
222 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
223 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
224 ; GFX10-NEXT: v_mov_b32_e32 v2, s1
225 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v2, vcc_lo
226 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v0, v1
227 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
228 ; GFX10-NEXT: ; return to shader part epilog
230 ; GFX11-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
232 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
233 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 1, v0
234 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
235 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
236 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
237 ; GFX11-NEXT: v_mov_b32_e32 v2, s1
238 ; GFX11-NEXT: v_dual_cndmask_b32 v1, s0, v2 :: v_dual_and_b32 v0, 1, v0
239 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
240 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
241 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v0, v1
242 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
243 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
244 ; GFX11-NEXT: ; return to shader part epilog
245 %vector = load <4 x i16>, ptr addrspace(4) %ptr
246 %element = extractelement <4 x i16> %vector, i32 %idx
250 define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx0(ptr addrspace(4) inreg %ptr) {
251 ; GCN-LABEL: extractelement_sgpr_v4i16_idx0:
253 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
254 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
255 ; GCN-NEXT: ; return to shader part epilog
257 ; GFX10-LABEL: extractelement_sgpr_v4i16_idx0:
259 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
260 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
261 ; GFX10-NEXT: ; return to shader part epilog
263 ; GFX11-LABEL: extractelement_sgpr_v4i16_idx0:
265 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
266 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
267 ; GFX11-NEXT: ; return to shader part epilog
268 %vector = load <4 x i16>, ptr addrspace(4) %ptr
269 %element = extractelement <4 x i16> %vector, i32 0
273 define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx1(ptr addrspace(4) inreg %ptr) {
274 ; GCN-LABEL: extractelement_sgpr_v4i16_idx1:
276 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
277 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
278 ; GCN-NEXT: s_lshr_b32 s0, s0, 16
279 ; GCN-NEXT: ; return to shader part epilog
281 ; GFX10-LABEL: extractelement_sgpr_v4i16_idx1:
283 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
284 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
285 ; GFX10-NEXT: s_lshr_b32 s0, s0, 16
286 ; GFX10-NEXT: ; return to shader part epilog
288 ; GFX11-LABEL: extractelement_sgpr_v4i16_idx1:
290 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
291 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
292 ; GFX11-NEXT: s_lshr_b32 s0, s0, 16
293 ; GFX11-NEXT: ; return to shader part epilog
294 %vector = load <4 x i16>, ptr addrspace(4) %ptr
295 %element = extractelement <4 x i16> %vector, i32 1
299 define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx2(ptr addrspace(4) inreg %ptr) {
300 ; GCN-LABEL: extractelement_sgpr_v4i16_idx2:
302 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
303 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
304 ; GCN-NEXT: s_mov_b32 s0, s1
305 ; GCN-NEXT: ; return to shader part epilog
307 ; GFX10-LABEL: extractelement_sgpr_v4i16_idx2:
309 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
310 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
311 ; GFX10-NEXT: s_mov_b32 s0, s1
312 ; GFX10-NEXT: ; return to shader part epilog
314 ; GFX11-LABEL: extractelement_sgpr_v4i16_idx2:
316 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
317 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
318 ; GFX11-NEXT: s_mov_b32 s0, s1
319 ; GFX11-NEXT: ; return to shader part epilog
320 %vector = load <4 x i16>, ptr addrspace(4) %ptr
321 %element = extractelement <4 x i16> %vector, i32 2
325 define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx3(ptr addrspace(4) inreg %ptr) {
326 ; GCN-LABEL: extractelement_sgpr_v4i16_idx3:
328 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
329 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
330 ; GCN-NEXT: s_lshr_b32 s0, s1, 16
331 ; GCN-NEXT: ; return to shader part epilog
333 ; GFX10-LABEL: extractelement_sgpr_v4i16_idx3:
335 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
336 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
337 ; GFX10-NEXT: s_lshr_b32 s0, s1, 16
338 ; GFX10-NEXT: ; return to shader part epilog
340 ; GFX11-LABEL: extractelement_sgpr_v4i16_idx3:
342 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
343 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
344 ; GFX11-NEXT: s_lshr_b32 s0, s1, 16
345 ; GFX11-NEXT: ; return to shader part epilog
346 %vector = load <4 x i16>, ptr addrspace(4) %ptr
347 %element = extractelement <4 x i16> %vector, i32 3
351 define i16 @extractelement_vgpr_v4i16_idx0(ptr addrspace(1) %ptr) {
352 ; GFX9-LABEL: extractelement_vgpr_v4i16_idx0:
354 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
356 ; GFX9-NEXT: s_waitcnt vmcnt(0)
357 ; GFX9-NEXT: s_setpc_b64 s[30:31]
359 ; GFX8-LABEL: extractelement_vgpr_v4i16_idx0:
361 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
363 ; GFX8-NEXT: s_waitcnt vmcnt(0)
364 ; GFX8-NEXT: s_setpc_b64 s[30:31]
366 ; GFX7-LABEL: extractelement_vgpr_v4i16_idx0:
368 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369 ; GFX7-NEXT: s_mov_b32 s6, 0
370 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
371 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
372 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
373 ; GFX7-NEXT: s_waitcnt vmcnt(0)
374 ; GFX7-NEXT: s_setpc_b64 s[30:31]
376 ; GFX10-LABEL: extractelement_vgpr_v4i16_idx0:
378 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
380 ; GFX10-NEXT: s_waitcnt vmcnt(0)
381 ; GFX10-NEXT: s_setpc_b64 s[30:31]
383 ; GFX11-LABEL: extractelement_vgpr_v4i16_idx0:
385 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
386 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
387 ; GFX11-NEXT: s_waitcnt vmcnt(0)
388 ; GFX11-NEXT: s_setpc_b64 s[30:31]
389 %vector = load <4 x i16>, ptr addrspace(1) %ptr
390 %element = extractelement <4 x i16> %vector, i32 0
394 define i16 @extractelement_vgpr_v4i16_idx1(ptr addrspace(1) %ptr) {
395 ; GFX9-LABEL: extractelement_vgpr_v4i16_idx1:
397 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
398 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
399 ; GFX9-NEXT: s_waitcnt vmcnt(0)
400 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
401 ; GFX9-NEXT: s_setpc_b64 s[30:31]
403 ; GFX8-LABEL: extractelement_vgpr_v4i16_idx1:
405 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
407 ; GFX8-NEXT: s_waitcnt vmcnt(0)
408 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
409 ; GFX8-NEXT: s_setpc_b64 s[30:31]
411 ; GFX7-LABEL: extractelement_vgpr_v4i16_idx1:
413 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414 ; GFX7-NEXT: s_mov_b32 s6, 0
415 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
416 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
417 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
418 ; GFX7-NEXT: s_waitcnt vmcnt(0)
419 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
420 ; GFX7-NEXT: s_setpc_b64 s[30:31]
422 ; GFX10-LABEL: extractelement_vgpr_v4i16_idx1:
424 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
426 ; GFX10-NEXT: s_waitcnt vmcnt(0)
427 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
428 ; GFX10-NEXT: s_setpc_b64 s[30:31]
430 ; GFX11-LABEL: extractelement_vgpr_v4i16_idx1:
432 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
434 ; GFX11-NEXT: s_waitcnt vmcnt(0)
435 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
436 ; GFX11-NEXT: s_setpc_b64 s[30:31]
437 %vector = load <4 x i16>, ptr addrspace(1) %ptr
438 %element = extractelement <4 x i16> %vector, i32 1
442 define i16 @extractelement_vgpr_v4i16_idx2(ptr addrspace(1) %ptr) {
443 ; GFX9-LABEL: extractelement_vgpr_v4i16_idx2:
445 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
446 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
447 ; GFX9-NEXT: s_waitcnt vmcnt(0)
448 ; GFX9-NEXT: v_mov_b32_e32 v0, v1
449 ; GFX9-NEXT: s_setpc_b64 s[30:31]
451 ; GFX8-LABEL: extractelement_vgpr_v4i16_idx2:
453 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
455 ; GFX8-NEXT: s_waitcnt vmcnt(0)
456 ; GFX8-NEXT: v_mov_b32_e32 v0, v1
457 ; GFX8-NEXT: s_setpc_b64 s[30:31]
459 ; GFX7-LABEL: extractelement_vgpr_v4i16_idx2:
461 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
462 ; GFX7-NEXT: s_mov_b32 s6, 0
463 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
464 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
465 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
466 ; GFX7-NEXT: s_waitcnt vmcnt(0)
467 ; GFX7-NEXT: v_mov_b32_e32 v0, v1
468 ; GFX7-NEXT: s_setpc_b64 s[30:31]
470 ; GFX10-LABEL: extractelement_vgpr_v4i16_idx2:
472 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
473 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
474 ; GFX10-NEXT: s_waitcnt vmcnt(0)
475 ; GFX10-NEXT: v_mov_b32_e32 v0, v1
476 ; GFX10-NEXT: s_setpc_b64 s[30:31]
478 ; GFX11-LABEL: extractelement_vgpr_v4i16_idx2:
480 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
482 ; GFX11-NEXT: s_waitcnt vmcnt(0)
483 ; GFX11-NEXT: v_mov_b32_e32 v0, v1
484 ; GFX11-NEXT: s_setpc_b64 s[30:31]
485 %vector = load <4 x i16>, ptr addrspace(1) %ptr
486 %element = extractelement <4 x i16> %vector, i32 2
490 define i16 @extractelement_vgpr_v4i16_idx3(ptr addrspace(1) %ptr) {
491 ; GFX9-LABEL: extractelement_vgpr_v4i16_idx3:
493 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
495 ; GFX9-NEXT: s_waitcnt vmcnt(0)
496 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v1
497 ; GFX9-NEXT: s_setpc_b64 s[30:31]
499 ; GFX8-LABEL: extractelement_vgpr_v4i16_idx3:
501 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
503 ; GFX8-NEXT: s_waitcnt vmcnt(0)
504 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v1
505 ; GFX8-NEXT: s_setpc_b64 s[30:31]
507 ; GFX7-LABEL: extractelement_vgpr_v4i16_idx3:
509 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510 ; GFX7-NEXT: s_mov_b32 s6, 0
511 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
512 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
513 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
514 ; GFX7-NEXT: s_waitcnt vmcnt(0)
515 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v1
516 ; GFX7-NEXT: s_setpc_b64 s[30:31]
518 ; GFX10-LABEL: extractelement_vgpr_v4i16_idx3:
520 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
522 ; GFX10-NEXT: s_waitcnt vmcnt(0)
523 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v1
524 ; GFX10-NEXT: s_setpc_b64 s[30:31]
526 ; GFX11-LABEL: extractelement_vgpr_v4i16_idx3:
528 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
530 ; GFX11-NEXT: s_waitcnt vmcnt(0)
531 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1
532 ; GFX11-NEXT: s_setpc_b64 s[30:31]
533 %vector = load <4 x i16>, ptr addrspace(1) %ptr
534 %element = extractelement <4 x i16> %vector, i32 3
538 define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
539 ; GCN-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
541 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
542 ; GCN-NEXT: s_lshr_b32 s5, s4, 1
543 ; GCN-NEXT: s_cmp_eq_u32 s5, 1
544 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
545 ; GCN-NEXT: s_cselect_b32 s0, s1, s0
546 ; GCN-NEXT: s_cmp_eq_u32 s5, 2
547 ; GCN-NEXT: s_cselect_b32 s0, s2, s0
548 ; GCN-NEXT: s_cmp_eq_u32 s5, 3
549 ; GCN-NEXT: s_cselect_b32 s0, s3, s0
550 ; GCN-NEXT: s_and_b32 s1, s4, 1
551 ; GCN-NEXT: s_lshl_b32 s1, s1, 4
552 ; GCN-NEXT: s_lshr_b32 s0, s0, s1
553 ; GCN-NEXT: ; return to shader part epilog
555 ; GFX10-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
557 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
558 ; GFX10-NEXT: s_lshr_b32 s5, s4, 1
559 ; GFX10-NEXT: s_cmp_eq_u32 s5, 1
560 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
561 ; GFX10-NEXT: s_cselect_b32 s0, s1, s0
562 ; GFX10-NEXT: s_cmp_eq_u32 s5, 2
563 ; GFX10-NEXT: s_cselect_b32 s0, s2, s0
564 ; GFX10-NEXT: s_cmp_eq_u32 s5, 3
565 ; GFX10-NEXT: s_cselect_b32 s0, s3, s0
566 ; GFX10-NEXT: s_and_b32 s1, s4, 1
567 ; GFX10-NEXT: s_lshl_b32 s1, s1, 4
568 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1
569 ; GFX10-NEXT: ; return to shader part epilog
571 ; GFX11-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
573 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
574 ; GFX11-NEXT: s_lshr_b32 s5, s4, 1
575 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
576 ; GFX11-NEXT: s_cmp_eq_u32 s5, 1
577 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
578 ; GFX11-NEXT: s_cselect_b32 s0, s1, s0
579 ; GFX11-NEXT: s_cmp_eq_u32 s5, 2
580 ; GFX11-NEXT: s_cselect_b32 s0, s2, s0
581 ; GFX11-NEXT: s_cmp_eq_u32 s5, 3
582 ; GFX11-NEXT: s_cselect_b32 s0, s3, s0
583 ; GFX11-NEXT: s_and_b32 s1, s4, 1
584 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
585 ; GFX11-NEXT: s_lshl_b32 s1, s1, 4
586 ; GFX11-NEXT: s_lshr_b32 s0, s0, s1
587 ; GFX11-NEXT: ; return to shader part epilog
588 %vector = load <8 x i16>, ptr addrspace(4) %ptr
589 %element = extractelement <8 x i16> %vector, i32 %idx
593 define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
594 ; GFX9-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
596 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
597 ; GFX9-NEXT: s_lshr_b32 s0, s2, 1
598 ; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1
599 ; GFX9-NEXT: s_and_b32 s1, s2, 1
600 ; GFX9-NEXT: s_waitcnt vmcnt(0)
601 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
602 ; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2
603 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
604 ; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3
605 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
606 ; GFX9-NEXT: s_lshl_b32 s0, s1, 4
607 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0
608 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
609 ; GFX9-NEXT: ; return to shader part epilog
611 ; GFX8-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
613 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
614 ; GFX8-NEXT: s_lshr_b32 s0, s2, 1
615 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1
616 ; GFX8-NEXT: s_and_b32 s1, s2, 1
617 ; GFX8-NEXT: s_waitcnt vmcnt(0)
618 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
619 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2
620 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
621 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3
622 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
623 ; GFX8-NEXT: s_lshl_b32 s0, s1, 4
624 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0
625 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
626 ; GFX8-NEXT: ; return to shader part epilog
628 ; GFX7-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
630 ; GFX7-NEXT: s_mov_b32 s6, 0
631 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
632 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
633 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
634 ; GFX7-NEXT: s_lshr_b32 s0, s2, 1
635 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1
636 ; GFX7-NEXT: s_and_b32 s1, s2, 1
637 ; GFX7-NEXT: s_waitcnt vmcnt(0)
638 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
639 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2
640 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
641 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3
642 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
643 ; GFX7-NEXT: s_lshl_b32 s0, s1, 4
644 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0
645 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
646 ; GFX7-NEXT: ; return to shader part epilog
648 ; GFX10-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
650 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
651 ; GFX10-NEXT: s_lshr_b32 s0, s2, 1
652 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1
653 ; GFX10-NEXT: s_waitcnt vmcnt(0)
654 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
655 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 2
656 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
657 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 3
658 ; GFX10-NEXT: s_and_b32 s0, s2, 1
659 ; GFX10-NEXT: s_lshl_b32 s0, s0, 4
660 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
661 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, s0, v0
662 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
663 ; GFX10-NEXT: ; return to shader part epilog
665 ; GFX11-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
667 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
668 ; GFX11-NEXT: s_lshr_b32 s0, s2, 1
669 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
670 ; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1
671 ; GFX11-NEXT: s_waitcnt vmcnt(0)
672 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
673 ; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 2
674 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
675 ; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 3
676 ; GFX11-NEXT: s_and_b32 s0, s2, 1
677 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
678 ; GFX11-NEXT: s_lshl_b32 s0, s0, 4
679 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
680 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
681 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, s0, v0
682 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
683 ; GFX11-NEXT: ; return to shader part epilog
684 %vector = load <8 x i16>, ptr addrspace(1) %ptr
685 %element = extractelement <8 x i16> %vector, i32 %idx
689 define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
690 ; GFX9-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
692 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
693 ; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off
694 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v2
695 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
696 ; GFX9-NEXT: v_and_b32_e32 v1, 1, v2
697 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v1
698 ; GFX9-NEXT: s_waitcnt vmcnt(0)
699 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc
700 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
701 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
702 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
703 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc
704 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0
705 ; GFX9-NEXT: s_setpc_b64 s[30:31]
707 ; GFX8-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
709 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
710 ; GFX8-NEXT: flat_load_dwordx4 v[3:6], v[0:1]
711 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v2
712 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
713 ; GFX8-NEXT: v_and_b32_e32 v1, 1, v2
714 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 4, v1
715 ; GFX8-NEXT: s_waitcnt vmcnt(0)
716 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc
717 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
718 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
719 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
720 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc
721 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0
722 ; GFX8-NEXT: s_setpc_b64 s[30:31]
724 ; GFX7-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
726 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
727 ; GFX7-NEXT: s_mov_b32 s6, 0
728 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
729 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
730 ; GFX7-NEXT: buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64
731 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 1, v2
732 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
733 ; GFX7-NEXT: v_and_b32_e32 v1, 1, v2
734 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1
735 ; GFX7-NEXT: s_waitcnt vmcnt(0)
736 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc
737 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
738 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
739 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
740 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc
741 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0
742 ; GFX7-NEXT: s_setpc_b64 s[30:31]
744 ; GFX10-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
746 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
747 ; GFX10-NEXT: global_load_dwordx4 v[3:6], v[0:1], off
748 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v2
749 ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
750 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
751 ; GFX10-NEXT: s_waitcnt vmcnt(0)
752 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc_lo
753 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
754 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
755 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
756 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v6, vcc_lo
757 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 4, v2
758 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0
759 ; GFX10-NEXT: s_setpc_b64 s[30:31]
761 ; GFX11-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
763 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
764 ; GFX11-NEXT: global_load_b128 v[3:6], v[0:1], off
765 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v2
766 ; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
767 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
768 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
769 ; GFX11-NEXT: s_waitcnt vmcnt(0)
770 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc_lo
771 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
772 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
773 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
774 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
775 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v6, vcc_lo
776 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 4, v2
777 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v1, v0
778 ; GFX11-NEXT: s_setpc_b64 s[30:31]
779 %vector = load <8 x i16>, ptr addrspace(1) %ptr
780 %element = extractelement <8 x i16> %vector, i32 %idx
784 define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
785 ; GCN-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
787 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
788 ; GCN-NEXT: v_lshrrev_b32_e32 v1, 1, v0
789 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
790 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
791 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0
792 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
793 ; GCN-NEXT: v_mov_b32_e32 v2, s0
794 ; GCN-NEXT: v_mov_b32_e32 v3, s1
795 ; GCN-NEXT: v_mov_b32_e32 v4, s2
796 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
797 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1
798 ; GCN-NEXT: v_mov_b32_e32 v5, s3
799 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
800 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1
801 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc
802 ; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1
803 ; GCN-NEXT: v_readfirstlane_b32 s0, v0
804 ; GCN-NEXT: ; return to shader part epilog
806 ; GFX10-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
808 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
809 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v0
810 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
811 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
812 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
813 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
814 ; GFX10-NEXT: v_mov_b32_e32 v2, s1
815 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo
816 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
817 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s2, vcc_lo
818 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
819 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo
820 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v0, v1
821 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
822 ; GFX10-NEXT: ; return to shader part epilog
824 ; GFX11-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
826 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
827 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 1, v0
828 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
829 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
830 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
831 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
832 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
833 ; GFX11-NEXT: v_mov_b32_e32 v2, s1
834 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
835 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo
836 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
837 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s2, vcc_lo
838 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
839 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
840 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo
841 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v0, v1
842 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
843 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
844 ; GFX11-NEXT: ; return to shader part epilog
845 %vector = load <8 x i16>, ptr addrspace(4) %ptr
846 %element = extractelement <8 x i16> %vector, i32 %idx
850 define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx0(ptr addrspace(4) inreg %ptr) {
851 ; GCN-LABEL: extractelement_sgpr_v8i16_idx0:
853 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
854 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
855 ; GCN-NEXT: ; return to shader part epilog
857 ; GFX10-LABEL: extractelement_sgpr_v8i16_idx0:
859 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
860 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
861 ; GFX10-NEXT: ; return to shader part epilog
863 ; GFX11-LABEL: extractelement_sgpr_v8i16_idx0:
865 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
866 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
867 ; GFX11-NEXT: ; return to shader part epilog
868 %vector = load <8 x i16>, ptr addrspace(4) %ptr
869 %element = extractelement <8 x i16> %vector, i32 0
873 define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx1(ptr addrspace(4) inreg %ptr) {
874 ; GCN-LABEL: extractelement_sgpr_v8i16_idx1:
876 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
877 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
878 ; GCN-NEXT: s_lshr_b32 s0, s0, 16
879 ; GCN-NEXT: ; return to shader part epilog
881 ; GFX10-LABEL: extractelement_sgpr_v8i16_idx1:
883 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
884 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
885 ; GFX10-NEXT: s_lshr_b32 s0, s0, 16
886 ; GFX10-NEXT: ; return to shader part epilog
888 ; GFX11-LABEL: extractelement_sgpr_v8i16_idx1:
890 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
891 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
892 ; GFX11-NEXT: s_lshr_b32 s0, s0, 16
893 ; GFX11-NEXT: ; return to shader part epilog
894 %vector = load <8 x i16>, ptr addrspace(4) %ptr
895 %element = extractelement <8 x i16> %vector, i32 1
899 define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx2(ptr addrspace(4) inreg %ptr) {
900 ; GCN-LABEL: extractelement_sgpr_v8i16_idx2:
902 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
903 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
904 ; GCN-NEXT: s_mov_b32 s0, s1
905 ; GCN-NEXT: ; return to shader part epilog
907 ; GFX10-LABEL: extractelement_sgpr_v8i16_idx2:
909 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
910 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
911 ; GFX10-NEXT: s_mov_b32 s0, s1
912 ; GFX10-NEXT: ; return to shader part epilog
914 ; GFX11-LABEL: extractelement_sgpr_v8i16_idx2:
916 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
917 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
918 ; GFX11-NEXT: s_mov_b32 s0, s1
919 ; GFX11-NEXT: ; return to shader part epilog
920 %vector = load <8 x i16>, ptr addrspace(4) %ptr
921 %element = extractelement <8 x i16> %vector, i32 2
925 define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx3(ptr addrspace(4) inreg %ptr) {
926 ; GCN-LABEL: extractelement_sgpr_v8i16_idx3:
928 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
929 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
930 ; GCN-NEXT: s_lshr_b32 s0, s1, 16
931 ; GCN-NEXT: ; return to shader part epilog
933 ; GFX10-LABEL: extractelement_sgpr_v8i16_idx3:
935 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
936 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
937 ; GFX10-NEXT: s_lshr_b32 s0, s1, 16
938 ; GFX10-NEXT: ; return to shader part epilog
940 ; GFX11-LABEL: extractelement_sgpr_v8i16_idx3:
942 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
943 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
944 ; GFX11-NEXT: s_lshr_b32 s0, s1, 16
945 ; GFX11-NEXT: ; return to shader part epilog
946 %vector = load <8 x i16>, ptr addrspace(4) %ptr
947 %element = extractelement <8 x i16> %vector, i32 3
951 define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx4(ptr addrspace(4) inreg %ptr) {
952 ; GCN-LABEL: extractelement_sgpr_v8i16_idx4:
954 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
955 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
956 ; GCN-NEXT: s_mov_b32 s0, s2
957 ; GCN-NEXT: ; return to shader part epilog
959 ; GFX10-LABEL: extractelement_sgpr_v8i16_idx4:
961 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
962 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
963 ; GFX10-NEXT: s_mov_b32 s0, s2
964 ; GFX10-NEXT: ; return to shader part epilog
966 ; GFX11-LABEL: extractelement_sgpr_v8i16_idx4:
968 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
969 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
970 ; GFX11-NEXT: s_mov_b32 s0, s2
971 ; GFX11-NEXT: ; return to shader part epilog
972 %vector = load <8 x i16>, ptr addrspace(4) %ptr
973 %element = extractelement <8 x i16> %vector, i32 4
977 define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx5(ptr addrspace(4) inreg %ptr) {
978 ; GCN-LABEL: extractelement_sgpr_v8i16_idx5:
980 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
981 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
982 ; GCN-NEXT: s_lshr_b32 s0, s2, 16
983 ; GCN-NEXT: ; return to shader part epilog
985 ; GFX10-LABEL: extractelement_sgpr_v8i16_idx5:
987 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
988 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
989 ; GFX10-NEXT: s_lshr_b32 s0, s2, 16
990 ; GFX10-NEXT: ; return to shader part epilog
992 ; GFX11-LABEL: extractelement_sgpr_v8i16_idx5:
994 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
995 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
996 ; GFX11-NEXT: s_lshr_b32 s0, s2, 16
997 ; GFX11-NEXT: ; return to shader part epilog
998 %vector = load <8 x i16>, ptr addrspace(4) %ptr
999 %element = extractelement <8 x i16> %vector, i32 5
1003 define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx6(ptr addrspace(4) inreg %ptr) {
1004 ; GCN-LABEL: extractelement_sgpr_v8i16_idx6:
1006 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
1007 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1008 ; GCN-NEXT: s_mov_b32 s0, s3
1009 ; GCN-NEXT: ; return to shader part epilog
1011 ; GFX10-LABEL: extractelement_sgpr_v8i16_idx6:
1013 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
1014 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1015 ; GFX10-NEXT: s_mov_b32 s0, s3
1016 ; GFX10-NEXT: ; return to shader part epilog
1018 ; GFX11-LABEL: extractelement_sgpr_v8i16_idx6:
1020 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
1021 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1022 ; GFX11-NEXT: s_mov_b32 s0, s3
1023 ; GFX11-NEXT: ; return to shader part epilog
1024 %vector = load <8 x i16>, ptr addrspace(4) %ptr
1025 %element = extractelement <8 x i16> %vector, i32 6
1029 define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx7(ptr addrspace(4) inreg %ptr) {
1030 ; GCN-LABEL: extractelement_sgpr_v8i16_idx7:
1032 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
1033 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1034 ; GCN-NEXT: s_lshr_b32 s0, s3, 16
1035 ; GCN-NEXT: ; return to shader part epilog
1037 ; GFX10-LABEL: extractelement_sgpr_v8i16_idx7:
1039 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
1040 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1041 ; GFX10-NEXT: s_lshr_b32 s0, s3, 16
1042 ; GFX10-NEXT: ; return to shader part epilog
1044 ; GFX11-LABEL: extractelement_sgpr_v8i16_idx7:
1046 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
1047 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1048 ; GFX11-NEXT: s_lshr_b32 s0, s3, 16
1049 ; GFX11-NEXT: ; return to shader part epilog
1050 %vector = load <8 x i16>, ptr addrspace(4) %ptr
1051 %element = extractelement <8 x i16> %vector, i32 7
1055 define i16 @extractelement_vgpr_v8i16_idx0(ptr addrspace(1) %ptr) {
1056 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx0:
1058 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1059 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1060 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1061 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1063 ; GFX8-LABEL: extractelement_vgpr_v8i16_idx0:
1065 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1066 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1067 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1068 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1070 ; GFX7-LABEL: extractelement_vgpr_v8i16_idx0:
1072 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1073 ; GFX7-NEXT: s_mov_b32 s6, 0
1074 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1075 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1076 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1077 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1078 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1080 ; GFX10-LABEL: extractelement_vgpr_v8i16_idx0:
1082 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1083 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1084 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1085 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1087 ; GFX11-LABEL: extractelement_vgpr_v8i16_idx0:
1089 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1090 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1091 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1092 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1093 %vector = load <8 x i16>, ptr addrspace(1) %ptr
1094 %element = extractelement <8 x i16> %vector, i32 0
1098 define i16 @extractelement_vgpr_v8i16_idx1(ptr addrspace(1) %ptr) {
1099 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx1:
1101 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1102 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1103 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1104 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1105 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1107 ; GFX8-LABEL: extractelement_vgpr_v8i16_idx1:
1109 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1110 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1111 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1112 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1113 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1115 ; GFX7-LABEL: extractelement_vgpr_v8i16_idx1:
1117 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118 ; GFX7-NEXT: s_mov_b32 s6, 0
1119 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1120 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1121 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1122 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1123 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1124 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1126 ; GFX10-LABEL: extractelement_vgpr_v8i16_idx1:
1128 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1129 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1130 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1131 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1132 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1134 ; GFX11-LABEL: extractelement_vgpr_v8i16_idx1:
1136 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1137 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1138 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1139 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1140 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1141 %vector = load <8 x i16>, ptr addrspace(1) %ptr
1142 %element = extractelement <8 x i16> %vector, i32 1
1146 define i16 @extractelement_vgpr_v8i16_idx2(ptr addrspace(1) %ptr) {
1147 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx2:
1149 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1150 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1151 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1152 ; GFX9-NEXT: v_mov_b32_e32 v0, v1
1153 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1155 ; GFX8-LABEL: extractelement_vgpr_v8i16_idx2:
1157 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1158 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1159 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1160 ; GFX8-NEXT: v_mov_b32_e32 v0, v1
1161 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1163 ; GFX7-LABEL: extractelement_vgpr_v8i16_idx2:
1165 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1166 ; GFX7-NEXT: s_mov_b32 s6, 0
1167 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1168 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1169 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1170 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1171 ; GFX7-NEXT: v_mov_b32_e32 v0, v1
1172 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1174 ; GFX10-LABEL: extractelement_vgpr_v8i16_idx2:
1176 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1177 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1178 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1179 ; GFX10-NEXT: v_mov_b32_e32 v0, v1
1180 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1182 ; GFX11-LABEL: extractelement_vgpr_v8i16_idx2:
1184 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1185 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1186 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1187 ; GFX11-NEXT: v_mov_b32_e32 v0, v1
1188 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1189 %vector = load <8 x i16>, ptr addrspace(1) %ptr
1190 %element = extractelement <8 x i16> %vector, i32 2
1194 define i16 @extractelement_vgpr_v8i16_idx3(ptr addrspace(1) %ptr) {
1195 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx3:
1197 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1198 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1199 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1200 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1201 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1203 ; GFX8-LABEL: extractelement_vgpr_v8i16_idx3:
1205 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1206 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1207 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1208 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1209 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1211 ; GFX7-LABEL: extractelement_vgpr_v8i16_idx3:
1213 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1214 ; GFX7-NEXT: s_mov_b32 s6, 0
1215 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1216 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1217 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1218 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1219 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1220 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1222 ; GFX10-LABEL: extractelement_vgpr_v8i16_idx3:
1224 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1225 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1226 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1227 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1228 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1230 ; GFX11-LABEL: extractelement_vgpr_v8i16_idx3:
1232 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1233 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1234 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1235 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1236 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1237 %vector = load <8 x i16>, ptr addrspace(1) %ptr
1238 %element = extractelement <8 x i16> %vector, i32 3
1242 define i16 @extractelement_vgpr_v8i16_idx4(ptr addrspace(1) %ptr) {
1243 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx4:
1245 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1246 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1247 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1248 ; GFX9-NEXT: v_mov_b32_e32 v0, v2
1249 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1251 ; GFX8-LABEL: extractelement_vgpr_v8i16_idx4:
1253 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1254 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1255 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1256 ; GFX8-NEXT: v_mov_b32_e32 v0, v2
1257 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1259 ; GFX7-LABEL: extractelement_vgpr_v8i16_idx4:
1261 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1262 ; GFX7-NEXT: s_mov_b32 s6, 0
1263 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1264 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1265 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1266 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1267 ; GFX7-NEXT: v_mov_b32_e32 v0, v2
1268 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1270 ; GFX10-LABEL: extractelement_vgpr_v8i16_idx4:
1272 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1274 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1275 ; GFX10-NEXT: v_mov_b32_e32 v0, v2
1276 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1278 ; GFX11-LABEL: extractelement_vgpr_v8i16_idx4:
1280 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1281 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1282 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1283 ; GFX11-NEXT: v_mov_b32_e32 v0, v2
1284 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1285 %vector = load <8 x i16>, ptr addrspace(1) %ptr
1286 %element = extractelement <8 x i16> %vector, i32 4
1290 define i16 @extractelement_vgpr_v8i16_idx5(ptr addrspace(1) %ptr) {
1291 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx5:
1293 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1294 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1295 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1296 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v2
1297 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1299 ; GFX8-LABEL: extractelement_vgpr_v8i16_idx5:
1301 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1302 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1303 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1304 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v2
1305 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1307 ; GFX7-LABEL: extractelement_vgpr_v8i16_idx5:
1309 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1310 ; GFX7-NEXT: s_mov_b32 s6, 0
1311 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1312 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1313 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1314 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1315 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v2
1316 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1318 ; GFX10-LABEL: extractelement_vgpr_v8i16_idx5:
1320 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1321 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1322 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1323 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v2
1324 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1326 ; GFX11-LABEL: extractelement_vgpr_v8i16_idx5:
1328 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1329 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1330 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1331 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v2
1332 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1333 %vector = load <8 x i16>, ptr addrspace(1) %ptr
1334 %element = extractelement <8 x i16> %vector, i32 5
1338 define i16 @extractelement_vgpr_v8i16_idx6(ptr addrspace(1) %ptr) {
1339 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx6:
1341 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1342 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1343 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1344 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
1345 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1347 ; GFX8-LABEL: extractelement_vgpr_v8i16_idx6:
1349 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1350 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1351 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1352 ; GFX8-NEXT: v_mov_b32_e32 v0, v3
1353 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1355 ; GFX7-LABEL: extractelement_vgpr_v8i16_idx6:
1357 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1358 ; GFX7-NEXT: s_mov_b32 s6, 0
1359 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1360 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1361 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1362 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1363 ; GFX7-NEXT: v_mov_b32_e32 v0, v3
1364 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1366 ; GFX10-LABEL: extractelement_vgpr_v8i16_idx6:
1368 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1369 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1370 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1371 ; GFX10-NEXT: v_mov_b32_e32 v0, v3
1372 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1374 ; GFX11-LABEL: extractelement_vgpr_v8i16_idx6:
1376 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1377 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1378 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1379 ; GFX11-NEXT: v_mov_b32_e32 v0, v3
1380 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1381 %vector = load <8 x i16>, ptr addrspace(1) %ptr
1382 %element = extractelement <8 x i16> %vector, i32 6
1386 define i16 @extractelement_vgpr_v8i16_idx7(ptr addrspace(1) %ptr) {
1387 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx7:
1389 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1390 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1391 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1392 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v3
1393 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1395 ; GFX8-LABEL: extractelement_vgpr_v8i16_idx7:
1397 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1398 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1399 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1400 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v3
1401 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1403 ; GFX7-LABEL: extractelement_vgpr_v8i16_idx7:
1405 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1406 ; GFX7-NEXT: s_mov_b32 s6, 0
1407 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1408 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
1409 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1410 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1411 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v3
1412 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1414 ; GFX10-LABEL: extractelement_vgpr_v8i16_idx7:
1416 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1417 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1418 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1419 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v3
1420 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1422 ; GFX11-LABEL: extractelement_vgpr_v8i16_idx7:
1424 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1425 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1426 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1427 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v3
1428 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1429 %vector = load <8 x i16>, ptr addrspace(1) %ptr
1430 %element = extractelement <8 x i16> %vector, i32 7