1 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s
3 ; GCN-LABEL: {{^}}float4_extelt:
5 ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1
6 ; GCN-DAG: v_cmp_ne_u32_e64 [[C2:[^,]+]], [[IDX]], 2
7 ; GCN-DAG: v_cmp_ne_u32_e64 [[C3:[^,]+]], [[IDX]], 3
8 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]]
9 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2.0, [[V1]], [[C2]]
10 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4.0, [[V2]], [[C3]]
11 ; GCN: store_dword v[{{[0-9:]+}}], [[V3]]
12 define amdgpu_kernel void @float4_extelt(float addrspace(1)* %out, i32 %sel) {
14 %ext = extractelement <4 x float> <float 0.0, float 1.0, float 2.0, float 4.0>, i32 %sel
15 store float %ext, float addrspace(1)* %out
19 ; GCN-LABEL: {{^}}int4_extelt:
21 ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1
22 ; GCN-DAG: v_cmp_ne_u32_e64 [[C2:[^,]+]], [[IDX]], 2
23 ; GCN-DAG: v_cmp_ne_u32_e64 [[C3:[^,]+]], [[IDX]], 3
24 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1, [[C1]]
25 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2, [[V1]], [[C2]]
26 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4, [[V2]], [[C3]]
27 ; GCN: store_dword v[{{[0-9:]+}}], [[V3]]
28 define amdgpu_kernel void @int4_extelt(i32 addrspace(1)* %out, i32 %sel) {
30 %ext = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 4>, i32 %sel
31 store i32 %ext, i32 addrspace(1)* %out
35 ; GCN-LABEL: {{^}}double4_extelt:
37 ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1
38 ; GCN-DAG: v_cmp_eq_u32_e64 [[C2:[^,]+]], [[IDX]], 2
39 ; GCN-DAG: v_cmp_eq_u32_e64 [[C3:[^,]+]], [[IDX]], 3
40 ; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]]
41 ; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]]
42 ; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]]
43 ; GCN: store_dwordx2 v[{{[0-9:]+}}]
44 define amdgpu_kernel void @double4_extelt(double addrspace(1)* %out, i32 %sel) {
46 %ext = extractelement <4 x double> <double 0.01, double 1.01, double 2.01, double 4.01>, i32 %sel
47 store double %ext, double addrspace(1)* %out
51 ; GCN-LABEL: {{^}}half4_extelt:
53 ; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x40003c00
54 ; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x44004200
55 ; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 4
56 ; GCN: s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]]
57 ; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]]
58 ; GCN: store_short v[{{[0-9:]+}}], v[[VRL]]
59 define amdgpu_kernel void @half4_extelt(half addrspace(1)* %out, i32 %sel) {
61 %ext = extractelement <4 x half> <half 1.0, half 2.0, half 3.0, half 4.0>, i32 %sel
62 store half %ext, half addrspace(1)* %out
66 ; GCN-LABEL: {{^}}float2_extelt:
68 ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1
69 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]]
70 ; GCN: store_dword v[{{[0-9:]+}}], [[V1]]
71 define amdgpu_kernel void @float2_extelt(float addrspace(1)* %out, i32 %sel) {
73 %ext = extractelement <2 x float> <float 0.0, float 1.0>, i32 %sel
74 store float %ext, float addrspace(1)* %out
78 ; GCN-LABEL: {{^}}double2_extelt:
80 ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1
81 ; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]]
82 ; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]]
83 ; GCN: store_dwordx2 v[{{[0-9:]+}}]
84 define amdgpu_kernel void @double2_extelt(double addrspace(1)* %out, i32 %sel) {
86 %ext = extractelement <2 x double> <double 0.01, double 1.01>, i32 %sel
87 store double %ext, double addrspace(1)* %out
91 ; GCN-LABEL: {{^}}half8_extelt:
93 ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1
94 ; GCN-DAG: v_cmp_ne_u32_e64 [[C2:[^,]+]], [[IDX]], 2
95 ; GCN-DAG: v_cmp_ne_u32_e64 [[C3:[^,]+]], [[IDX]], 3
96 ; GCN-DAG: v_cmp_ne_u32_e64 [[C4:[^,]+]], [[IDX]], 4
97 ; GCN-DAG: v_cmp_ne_u32_e64 [[C5:[^,]+]], [[IDX]], 5
98 ; GCN-DAG: v_cmp_ne_u32_e64 [[C6:[^,]+]], [[IDX]], 6
99 ; GCN-DAG: v_cmp_ne_u32_e64 [[C7:[^,]+]], [[IDX]], 7
100 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]]
101 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]]
102 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]]
103 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]]
104 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]]
105 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]]
106 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]]
107 ; GCN: store_short v[{{[0-9:]+}}], [[V7]]
108 define amdgpu_kernel void @half8_extelt(half addrspace(1)* %out, i32 %sel) {
110 %ext = extractelement <8 x half> <half 1.0, half 2.0, half 3.0, half 4.0, half 5.0, half 6.0, half 7.0, half 8.0>, i32 %sel
111 store half %ext, half addrspace(1)* %out
115 ; GCN-LABEL: {{^}}short8_extelt:
117 ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1
118 ; GCN-DAG: v_cmp_ne_u32_e64 [[C2:[^,]+]], [[IDX]], 2
119 ; GCN-DAG: v_cmp_ne_u32_e64 [[C3:[^,]+]], [[IDX]], 3
120 ; GCN-DAG: v_cmp_ne_u32_e64 [[C4:[^,]+]], [[IDX]], 4
121 ; GCN-DAG: v_cmp_ne_u32_e64 [[C5:[^,]+]], [[IDX]], 5
122 ; GCN-DAG: v_cmp_ne_u32_e64 [[C6:[^,]+]], [[IDX]], 6
123 ; GCN-DAG: v_cmp_ne_u32_e64 [[C7:[^,]+]], [[IDX]], 7
124 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]]
125 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]]
126 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]]
127 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]]
128 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]]
129 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]]
130 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]]
131 ; GCN: store_short v[{{[0-9:]+}}], [[V7]]
132 define amdgpu_kernel void @short8_extelt(i16 addrspace(1)* %out, i32 %sel) {
134 %ext = extractelement <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i32 %sel
135 store i16 %ext, i16 addrspace(1)* %out
139 ; GCN-LABEL: {{^}}float8_extelt:
141 ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1
142 ; GCN-DAG: v_cmp_ne_u32_e64 [[C2:[^,]+]], [[IDX]], 2
143 ; GCN-DAG: v_cmp_ne_u32_e64 [[C3:[^,]+]], [[IDX]], 3
144 ; GCN-DAG: v_cmp_ne_u32_e64 [[C4:[^,]+]], [[IDX]], 4
145 ; GCN-DAG: v_cmp_ne_u32_e64 [[C5:[^,]+]], [[IDX]], 5
146 ; GCN-DAG: v_cmp_ne_u32_e64 [[C6:[^,]+]], [[IDX]], 6
147 ; GCN-DAG: v_cmp_ne_u32_e64 [[C7:[^,]+]], [[IDX]], 7
148 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]]
149 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]]
150 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]]
151 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]]
152 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]]
153 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]]
154 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]]
155 ; GCN: store_dword v[{{[0-9:]+}}], [[V7]]
156 define amdgpu_kernel void @float8_extelt(float addrspace(1)* %out, i32 %sel) {
158 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
159 store float %ext, float addrspace(1)* %out
163 ; GCN-LABEL: {{^}}float16_extelt:
165 ; GCN-DAG: s_mov_b32 m0,
166 ; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0
167 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
168 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
169 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
170 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000
171 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000
172 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000
173 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000
174 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000
175 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000
176 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000
177 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000
178 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000
179 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000
180 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000
181 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000
182 ; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]]
183 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]
184 define amdgpu_kernel void @float16_extelt(float addrspace(1)* %out, i32 %sel) {
186 %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel
187 store float %ext, float addrspace(1)* %out
191 ; GCN-LABEL: {{^}}double16_extelt:
192 ; GCN: buffer_store_dword
193 ; GCN: buffer_store_dword
194 ; GCN: buffer_store_dword
195 ; GCN: buffer_store_dword
196 ; GCN: buffer_store_dword
197 ; GCN: buffer_store_dword
198 ; GCN: buffer_store_dword
199 ; GCN: buffer_store_dword
200 ; GCN: buffer_store_dword
201 ; GCN: buffer_store_dword
202 ; GCN: buffer_store_dword
203 ; GCN: buffer_store_dword
204 ; GCN: buffer_store_dword
205 ; GCN: buffer_store_dword
206 ; GCN: buffer_store_dword
207 ; GCN: buffer_store_dword
208 ; GCN: buffer_store_dword
209 ; GCN: buffer_store_dword
210 ; GCN: buffer_store_dword
211 ; GCN: buffer_store_dword
212 ; GCN: buffer_store_dword
213 ; GCN: buffer_store_dword
214 ; GCN: buffer_store_dword
215 ; GCN: buffer_store_dword
216 ; GCN: buffer_store_dword
217 ; GCN: buffer_store_dword
218 ; GCN: buffer_store_dword
219 ; GCN: buffer_store_dword
220 ; GCN: buffer_store_dword
221 ; GCN: buffer_store_dword
222 ; GCN: buffer_store_dword
223 ; GCN: buffer_store_dword
224 ; GCN: buffer_load_dword
225 ; GCN: buffer_load_dword
227 define amdgpu_kernel void @double16_extelt(double addrspace(1)* %out, i32 %sel) {
229 %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel
230 store double %ext, double addrspace(1)* %out
234 ; GCN-LABEL: {{^}}byte8_extelt:
236 ; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x4030201
237 ; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x8070605
238 ; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 3
239 ; GCN: s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]]
240 ; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]]
241 ; GCN: store_byte v[{{[0-9:]+}}], v[[VRL]]
242 define amdgpu_kernel void @byte8_extelt(i8 addrspace(1)* %out, i32 %sel) {
244 %ext = extractelement <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i32 %sel
245 store i8 %ext, i8 addrspace(1)* %out
249 ; GCN-LABEL: {{^}}byte16_extelt:
251 ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1
252 ; GCN-DAG: v_cmp_ne_u32_e64 [[C2:[^,]+]], [[IDX]], 2
253 ; GCN-DAG: v_cmp_ne_u32_e64 [[C3:[^,]+]], [[IDX]], 3
254 ; GCN-DAG: v_cmp_ne_u32_e64 [[C4:[^,]+]], [[IDX]], 4
255 ; GCN-DAG: v_cmp_ne_u32_e64 [[C5:[^,]+]], [[IDX]], 5
256 ; GCN-DAG: v_cmp_ne_u32_e64 [[C6:[^,]+]], [[IDX]], 6
257 ; GCN-DAG: v_cmp_ne_u32_e64 [[C7:[^,]+]], [[IDX]], 7
258 ; GCN-DAG: v_cmp_ne_u32_e64 [[C8:[^,]+]], [[IDX]], 8
259 ; GCN-DAG: v_cmp_ne_u32_e64 [[C9:[^,]+]], [[IDX]], 9
260 ; GCN-DAG: v_cmp_ne_u32_e64 [[C10:[^,]+]], [[IDX]], 10
261 ; GCN-DAG: v_cmp_ne_u32_e64 [[C11:[^,]+]], [[IDX]], 11
262 ; GCN-DAG: v_cmp_ne_u32_e64 [[C12:[^,]+]], [[IDX]], 12
263 ; GCN-DAG: v_cmp_ne_u32_e64 [[C13:[^,]+]], [[IDX]], 13
264 ; GCN-DAG: v_cmp_ne_u32_e64 [[C14:[^,]+]], [[IDX]], 14
265 ; GCN-DAG: v_cmp_ne_u32_e64 [[C15:[^,]+]], [[IDX]], 15
266 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]]
267 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]]
268 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]]
269 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]]
270 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]]
271 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]]
272 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]]
273 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V8:v[0-9]+]], {{[^,]+}}, [[V7]], [[C8]]
274 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V9:v[0-9]+]], {{[^,]+}}, [[V8]], [[C8]]
275 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V10:v[0-9]+]], {{[^,]+}}, [[V9]], [[C10]]
276 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V11:v[0-9]+]], {{[^,]+}}, [[V10]], [[C11]]
277 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V12:v[0-9]+]], {{[^,]+}}, [[V11]], [[C12]]
278 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V13:v[0-9]+]], {{[^,]+}}, [[V12]], [[C13]]
279 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V14:v[0-9]+]], {{[^,]+}}, [[V13]], [[C14]]
280 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V15:v[0-9]+]], {{[^,]+}}, [[V14]], [[C15]]
281 ; GCN: store_byte v[{{[0-9:]+}}], [[V15]]
282 define amdgpu_kernel void @byte16_extelt(i8 addrspace(1)* %out, i32 %sel) {
284 %ext = extractelement <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, i32 %sel
285 store i8 %ext, i8 addrspace(1)* %out
289 ; GCN-LABEL: {{^}}bit4_extelt:
290 ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
291 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
292 ; GCN-DAG: buffer_store_byte [[ZERO]],
293 ; GCN-DAG: buffer_store_byte [[ONE]],
294 ; GCN-DAG: buffer_store_byte [[ZERO]],
295 ; GCN-DAG: buffer_store_byte [[ONE]],
296 ; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]],
297 ; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[LOAD]]
298 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[RES]]
299 define amdgpu_kernel void @bit4_extelt(i32 addrspace(1)* %out, i32 %sel) {
301 %ext = extractelement <4 x i1> <i1 0, i1 1, i1 0, i1 1>, i32 %sel
302 %zext = zext i1 %ext to i32
303 store i32 %zext, i32 addrspace(1)* %out
307 ; GCN-LABEL: {{^}}bit128_extelt:
309 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 1, 0,
310 ; GCN-DAG: v_mov_b32_e32 [[LASTIDX:v[0-9]+]], 0x7f
311 ; GCN-DAG: v_cmp_ne_u32_e32 [[CL:[^,]+]], s{{[0-9]+}}, [[LASTIDX]]
312 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, v{{[0-9]+}}, [[CL]]
313 ; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[VL]]
314 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]
315 define amdgpu_kernel void @bit128_extelt(i32 addrspace(1)* %out, i32 %sel) {
317 %ext = extractelement <128 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, i32 %sel
318 %zext = zext i1 %ext to i32
319 store i32 %zext, i32 addrspace(1)* %out