1 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s
3 ; GCN-LABEL: {{^}}float4_extelt:
5 ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
6 ; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
7 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2
8 ; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
9 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
10 ; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
11 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]]
12 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2.0, [[V1]], [[C2]]
13 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4.0, [[V2]], [[C3]]
14 ; GCN: store_dword v[{{[0-9:]+}}], [[V3]]
15 define amdgpu_kernel void @float4_extelt(ptr addrspace(1) %out, i32 %sel) {
17 %ext = extractelement <4 x float> <float 0.0, float 1.0, float 2.0, float 4.0>, i32 %sel
18 store float %ext, ptr addrspace(1) %out
22 ; GCN-LABEL: {{^}}int4_extelt:
24 ; GCN-DAG: s_cmp_lg_u32 [[IDX:s[0-9]+]], 2
25 ; GCN-DAG: s_cmp_eq_u32 [[IDX]], 1
26 ; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
27 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
28 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1, [[C1]]
29 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2, [[V1]], vcc
30 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4, [[V2]], vcc
31 ; GCN: store_dword v[{{[0-9:]+}}], [[V3]]
32 define amdgpu_kernel void @int4_extelt(ptr addrspace(1) %out, i32 %sel) {
34 %ext = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 4>, i32 %sel
35 store i32 %ext, ptr addrspace(1) %out
39 ; GCN-LABEL: {{^}}double4_extelt:
41 ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1
42 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3f847ae1
43 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x47ae147b
44 ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2
45 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0xe147ae14, s{{[0-9]+}}
46 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x4000147a, s{{[0-9]+}}
47 ; GCN-DAG: s_cmp_eq_u32 s{{[[0-9]+}}, 3
48 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x40100a3d, s{{[0-9]+}}
49 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x70a3d70a, s{{[0-9]+}}
50 ; GCN: store_dwordx2 v[{{[0-9:]+}}]
51 define amdgpu_kernel void @double4_extelt(ptr addrspace(1) %out, i32 %sel) {
53 %ext = extractelement <4 x double> <double 0.01, double 1.01, double 2.01, double 4.01>, i32 %sel
54 store double %ext, ptr addrspace(1) %out
58 ; GCN-LABEL: {{^}}double5_extelt:
60 ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1
61 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3f847ae1
62 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x47ae147b
63 ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2
64 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0xe147ae14, s{{[0-9]+}}
65 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x4000147a, s{{[0-9]+}}
66 ; GCN-DAG: s_cmp_eq_u32 s{{[[0-9]+}}, 3
67 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x40100a3d, s{{[0-9]+}}
68 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x70a3d70a, s{{[0-9]+}}
69 ; GCN-DAG: s_cmp_eq_u32 s{{[[0-9]+}}, 4
70 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x40140a3d, s{{[0-9]+}}
71 ; GCN: store_dwordx2 v[{{[0-9:]+}}]
72 define amdgpu_kernel void @double5_extelt(ptr addrspace(1) %out, i32 %sel) {
74 %ext = extractelement <5 x double> <double 0.01, double 1.01, double 2.01, double 4.01, double 5.01>, i32 %sel
75 store double %ext, ptr addrspace(1) %out
79 ; GCN-LABEL: {{^}}half4_extelt:
81 ; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x40003c00
82 ; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x44004200
83 ; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 4
84 ; GCN: s_lshr_b64 s[[[RL:[0-9]+]]:{{[0-9]+}}], s[[[SL]]:[[SH]]], [[SEL]]
85 ; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]]
86 ; GCN: store_short v[{{[0-9:]+}}], v[[VRL]]
87 define amdgpu_kernel void @half4_extelt(ptr addrspace(1) %out, i32 %sel) {
89 %ext = extractelement <4 x half> <half 1.0, half 2.0, half 3.0, half 4.0>, i32 %sel
90 store half %ext, ptr addrspace(1) %out
94 ; GCN-LABEL: {{^}}float2_extelt:
96 ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
97 ; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
98 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]]
99 ; GCN: store_dword v[{{[0-9:]+}}], [[V1]]
100 define amdgpu_kernel void @float2_extelt(ptr addrspace(1) %out, i32 %sel) {
102 %ext = extractelement <2 x float> <float 0.0, float 1.0>, i32 %sel
103 store float %ext, ptr addrspace(1) %out
107 ; GCN-LABEL: {{^}}double2_extelt:
109 ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1
110 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3f847ae1
111 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x47ae147b
112 ; GCN: store_dwordx2 v[{{[0-9:]+}}]
113 define amdgpu_kernel void @double2_extelt(ptr addrspace(1) %out, i32 %sel) {
115 %ext = extractelement <2 x double> <double 0.01, double 1.01>, i32 %sel
116 store double %ext, ptr addrspace(1) %out
120 ; GCN-LABEL: {{^}}half8_extelt:
122 ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
123 ; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
124 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2
125 ; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
126 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
127 ; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
128 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4
129 ; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0
130 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5
131 ; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0
132 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6
133 ; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0
134 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7
135 ; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0
136 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]]
137 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]]
138 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]]
139 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]]
140 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]]
141 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]]
142 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]]
143 ; GCN: store_short v[{{[0-9:]+}}], [[V7]]
144 define amdgpu_kernel void @half8_extelt(ptr addrspace(1) %out, i32 %sel) {
146 %ext = extractelement <8 x half> <half 1.0, half 2.0, half 3.0, half 4.0, half 5.0, half 6.0, half 7.0, half 8.0>, i32 %sel
147 store half %ext, ptr addrspace(1) %out
151 ; GCN-LABEL: {{^}}short8_extelt:
153 ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
154 ; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
155 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2
156 ; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
157 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
158 ; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
159 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4
160 ; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0
161 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5
162 ; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0
163 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6
164 ; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0
165 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7
166 ; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0
167 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]]
168 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]]
169 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]]
170 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]]
171 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]]
172 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]]
173 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]]
174 ; GCN: store_short v[{{[0-9:]+}}], [[V7]]
175 define amdgpu_kernel void @short8_extelt(ptr addrspace(1) %out, i32 %sel) {
177 %ext = extractelement <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i32 %sel
178 store i16 %ext, ptr addrspace(1) %out
182 ; GCN-LABEL: {{^}}float8_extelt:
183 ; GCN-DAG: s_load_dwordx2 s[2:3], s[0:1], 0x24
184 ; GCN-DAG: s_load_dword [[S0:s[0-9]+]], s[0:1], 0x2c
185 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
186 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
187 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
188 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
189 ; GCN-DAG: s_waitcnt lgkmcnt(0)
190 ; GCN-DAG: s_mov_b32 m0, [[S0]]
191 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000
192 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000
193 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000
194 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000
195 ; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], v{{[0-9]+}}
196 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
197 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
198 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[RES]]
199 define amdgpu_kernel void @float8_extelt(ptr addrspace(1) %out, i32 %sel) {
201 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
202 store float %ext, ptr addrspace(1) %out
206 ; GCN-LABEL: {{^}}double8_extelt:
209 ; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
210 ; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]]
211 ; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
212 ; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
213 ; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
214 ; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]]
215 define amdgpu_kernel void @double8_extelt(ptr addrspace(1) %out, i32 %sel) {
217 %ext = extractelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, i32 %sel
218 store double %ext, ptr addrspace(1) %out
222 ; GCN-LABEL: {{^}}double7_extelt:
225 ; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
226 ; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]]
227 ; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
228 ; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
229 ; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
230 ; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]]
231 define amdgpu_kernel void @double7_extelt(ptr addrspace(1) %out, i32 %sel) {
233 %ext = extractelement <7 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, i32 %sel
234 store double %ext, ptr addrspace(1) %out
238 ; GCN-LABEL: {{^}}float16_extelt:
240 ; GCN-DAG: s_mov_b32 m0,
241 ; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0
242 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
243 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
244 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
245 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000
246 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000
247 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000
248 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000
249 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000
250 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000
251 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000
252 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000
253 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000
254 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000
255 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000
256 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000
257 ; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]]
258 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]
259 define amdgpu_kernel void @float16_extelt(ptr addrspace(1) %out, i32 %sel) {
261 %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel
262 store float %ext, ptr addrspace(1) %out
266 ; GCN-LABEL: {{^}}double15_extelt:
269 ; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
270 ; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]]
271 ; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
272 ; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
273 ; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
274 ; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]]
275 define amdgpu_kernel void @double15_extelt(ptr addrspace(1) %out, i32 %sel) {
277 %ext = extractelement <15 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>, i32 %sel
278 store double %ext, ptr addrspace(1) %out
282 ; GCN-LABEL: {{^}}double16_extelt:
285 ; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
286 ; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]]
287 ; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
288 ; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
289 ; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
290 ; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]]
291 define amdgpu_kernel void @double16_extelt(ptr addrspace(1) %out, i32 %sel) {
293 %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel
294 store double %ext, ptr addrspace(1) %out
298 ; GCN-LABEL: {{^}}float32_extelt:
300 ; GCN-DAG: s_mov_b32 m0,
301 ; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0
302 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
303 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
304 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
305 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000
306 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000
307 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000
308 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000
309 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000
310 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000
311 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000
312 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000
313 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000
314 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000
315 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000
316 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000
317 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41880000
318 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
319 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41980000
320 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000
321 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000
322 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000
323 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b80000
324 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c00000
325 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c80000
326 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d00000
327 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d80000
328 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e00000
329 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e80000
330 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f00000
331 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f80000
332 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x42000000
333 ; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]]
334 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]
335 define amdgpu_kernel void @float32_extelt(ptr addrspace(1) %out, i32 %sel) {
337 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel
338 store float %ext, ptr addrspace(1) %out
342 ; GCN-LABEL: {{^}}byte8_extelt:
344 ; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x4030201
345 ; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x8070605
346 ; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 3
347 ; GCN: s_lshr_b64 s[[[RL:[0-9]+]]:{{[0-9]+}}], s[[[SL]]:[[SH]]], [[SEL]]
348 ; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]]
349 ; GCN: store_byte v[{{[0-9:]+}}], v[[VRL]]
350 define amdgpu_kernel void @byte8_extelt(ptr addrspace(1) %out, i32 %sel) {
352 %ext = extractelement <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i32 %sel
353 store i8 %ext, ptr addrspace(1) %out
357 ; GCN-LABEL: {{^}}byte16_extelt:
359 ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
360 ; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
361 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2
362 ; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
363 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
364 ; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
365 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4
366 ; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0
367 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5
368 ; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0
369 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6
370 ; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0
371 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7
372 ; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0
373 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 8
374 ; GCN-DAG: s_cselect_b64 [[C8:[^,]+]], -1, 0
375 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 9
376 ; GCN-DAG: s_cselect_b64 [[C9:[^,]+]], -1, 0
377 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 10
378 ; GCN-DAG: s_cselect_b64 [[C10:[^,]+]], -1, 0
379 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 11
380 ; GCN-DAG: s_cselect_b64 [[C11:[^,]+]], -1, 0
381 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 12
382 ; GCN-DAG: s_cselect_b64 [[C12:[^,]+]], -1, 0
383 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 13
384 ; GCN-DAG: s_cselect_b64 [[C13:[^,]+]], -1, 0
385 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 14
386 ; GCN-DAG: s_cselect_b64 [[C14:[^,]+]], -1, 0
387 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 15
388 ; GCN-DAG: s_cselect_b64 [[C15:[^,]+]], -1, 0
389 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]]
390 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]]
391 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]]
392 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]]
393 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]]
394 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]]
395 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]]
396 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V8:v[0-9]+]], {{[^,]+}}, [[V7]], [[C8]]
397 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V9:v[0-9]+]], {{[^,]+}}, [[V8]], [[C8]]
398 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V10:v[0-9]+]], {{[^,]+}}, [[V9]], [[C10]]
399 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V11:v[0-9]+]], {{[^,]+}}, [[V10]], [[C11]]
400 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V12:v[0-9]+]], {{[^,]+}}, [[V11]], [[C12]]
401 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V13:v[0-9]+]], {{[^,]+}}, [[V12]], [[C13]]
402 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V14:v[0-9]+]], {{[^,]+}}, [[V13]], [[C14]]
403 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V15:v[0-9]+]], {{[^,]+}}, [[V14]], [[C15]]
404 ; GCN: store_byte v[{{[0-9:]+}}], [[V15]]
405 define amdgpu_kernel void @byte16_extelt(ptr addrspace(1) %out, i32 %sel) {
407 %ext = extractelement <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, i32 %sel
408 store i8 %ext, ptr addrspace(1) %out
412 ; GCN-LABEL: {{^}}bit4_extelt:
413 ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
414 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
415 ; GCN-DAG: buffer_store_byte [[ZERO]],
416 ; GCN-DAG: buffer_store_byte [[ONE]],
417 ; GCN-DAG: buffer_store_byte [[ZERO]],
418 ; GCN-DAG: buffer_store_byte [[ONE]],
419 ; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]],
420 ; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[LOAD]]
421 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[RES]]
422 define amdgpu_kernel void @bit4_extelt(ptr addrspace(1) %out, i32 %sel) {
424 %ext = extractelement <4 x i1> <i1 0, i1 1, i1 0, i1 1>, i32 %sel
425 %zext = zext i1 %ext to i32
426 store i32 %zext, ptr addrspace(1) %out
430 ; GCN-LABEL: {{^}}bit128_extelt:
432 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1
433 ; GCN: s_cmpk_lg_i32 {{s[0-9]+}}, 0x7f
434 ; GCN: s_cselect_b64 [[CL:[^,]+]], -1, 0
435 ; GCN: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, [[V1]], [[CL]]
436 ; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[VL]]
437 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]
438 define amdgpu_kernel void @bit128_extelt(ptr addrspace(1) %out, i32 %sel) {
440 %ext = extractelement <128 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, i32 %sel
441 %zext = zext i1 %ext to i32
442 store i32 %zext, ptr addrspace(1) %out
446 ; GCN-LABEL: {{^}}float32_extelt_vec:
448 ; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC1:[^,]+]], 1, v0
449 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 1.0, 2.0, [[CC1]]
450 ; GCN-DAG: v_mov_b32_e32 [[LASTVAL:v[0-9]+]], 0x42000000
451 ; GCN-DAG: v_cmp_ne_u32_e32 [[LASTCC:[^,]+]], 31, v0
452 ; GCN-DAG: v_cndmask_b32_e{{32|64}} v0, [[LASTVAL]], v{{[0-9]+}}, [[LASTCC]]
453 define float @float32_extelt_vec(i32 %sel) {
455 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel
459 ; GCN-LABEL: {{^}}double16_extelt_vec:
461 ; GCN-DAG: v_mov_b32_e32 [[V1HI:v[0-9]+]], 0x3ff19999
462 ; GCN-DAG: v_mov_b32_e32 [[V1LO:v[0-9]+]], 0x9999999a
463 ; GCN-DAG: v_mov_b32_e32 [[V2HI:v[0-9]+]], 0x4000cccc
464 ; GCN-DAG: v_mov_b32_e32 [[V2LO:v[0-9]+]], 0xcccccccd
465 ; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC1:[^,]+]], 1, v0
466 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[R1HI:v[0-9]+]], [[V1HI]], [[V2HI]], [[CC1]]
467 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[R1LO:v[0-9]+]], [[V1LO]], [[V2LO]], [[CC1]]
468 define double @double16_extelt_vec(i32 %sel) {
470 %ext = extractelement <16 x double> <double 1.1, double 2.1, double 3.1, double 4.1, double 5.1, double 6.1, double 7.1, double 8.1, double 9.1, double 10.1, double 11.1, double 12.1, double 13.1, double 14.1, double 15.1, double 16.1>, i32 %sel