1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512
15 define <2 x i64> @ext_i2_2i64(i2 %a0) {
16 ; SSE2-SSSE3-LABEL: ext_i2_2i64:
17 ; SSE2-SSSE3: # %bb.0:
18 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
19 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
20 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
21 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
22 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
23 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
24 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
25 ; SSE2-SSSE3-NEXT: retq
27 ; AVX1-LABEL: ext_i2_2i64:
29 ; AVX1-NEXT: vmovd %edi, %xmm0
30 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
31 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [1,2]
32 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
36 ; AVX2-LABEL: ext_i2_2i64:
38 ; AVX2-NEXT: vmovd %edi, %xmm0
39 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
40 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [1,2]
41 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
42 ; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
45 ; AVX512-LABEL: ext_i2_2i64:
47 ; AVX512-NEXT: kmovd %edi, %k1
48 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
49 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
51 %1 = bitcast i2 %a0 to <2 x i1>
52 %2 = sext <2 x i1> %1 to <2 x i64>
56 define <4 x i32> @ext_i4_4i32(i4 %a0) {
57 ; SSE2-SSSE3-LABEL: ext_i4_4i32:
58 ; SSE2-SSSE3: # %bb.0:
59 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
60 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
61 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
62 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
63 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
64 ; SSE2-SSSE3-NEXT: retq
66 ; AVX1-LABEL: ext_i4_4i32:
68 ; AVX1-NEXT: vmovd %edi, %xmm0
69 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
70 ; AVX1-NEXT: vpmovsxbd {{.*#+}} xmm1 = [1,2,4,8]
71 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
72 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
75 ; AVX2-LABEL: ext_i4_4i32:
77 ; AVX2-NEXT: vmovd %edi, %xmm0
78 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
79 ; AVX2-NEXT: vpmovsxbd {{.*#+}} xmm1 = [1,2,4,8]
80 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
81 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
84 ; AVX512-LABEL: ext_i4_4i32:
86 ; AVX512-NEXT: kmovd %edi, %k1
87 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
88 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
90 %1 = bitcast i4 %a0 to <4 x i1>
91 %2 = sext <4 x i1> %1 to <4 x i32>
95 define <8 x i16> @ext_i8_8i16(i8 %a0) {
96 ; SSE2-SSSE3-LABEL: ext_i8_8i16:
97 ; SSE2-SSSE3: # %bb.0:
98 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
99 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
100 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
101 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
102 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
103 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
104 ; SSE2-SSSE3-NEXT: retq
106 ; AVX1-LABEL: ext_i8_8i16:
108 ; AVX1-NEXT: vmovd %edi, %xmm0
109 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
110 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
111 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
112 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
113 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
116 ; AVX2-LABEL: ext_i8_8i16:
118 ; AVX2-NEXT: vmovd %edi, %xmm0
119 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
120 ; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
121 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
122 ; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
125 ; AVX512-LABEL: ext_i8_8i16:
127 ; AVX512-NEXT: kmovd %edi, %k0
128 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
130 %1 = bitcast i8 %a0 to <8 x i1>
131 %2 = sext <8 x i1> %1 to <8 x i16>
135 define <16 x i8> @ext_i16_16i8(i16 %a0) {
136 ; SSE2-LABEL: ext_i16_16i8:
138 ; SSE2-NEXT: movd %edi, %xmm0
139 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
140 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
141 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
142 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
143 ; SSE2-NEXT: pand %xmm1, %xmm0
144 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
147 ; SSSE3-LABEL: ext_i16_16i8:
149 ; SSSE3-NEXT: movd %edi, %xmm0
150 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
151 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
152 ; SSSE3-NEXT: pand %xmm1, %xmm0
153 ; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
156 ; AVX1-LABEL: ext_i16_16i8:
158 ; AVX1-NEXT: vmovd %edi, %xmm0
159 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
160 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
161 ; AVX1-NEXT: # xmm1 = mem[0,0]
162 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
163 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
166 ; AVX2-LABEL: ext_i16_16i8:
168 ; AVX2-NEXT: vmovd %edi, %xmm0
169 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
170 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
171 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
172 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
175 ; AVX512-LABEL: ext_i16_16i8:
177 ; AVX512-NEXT: kmovd %edi, %k0
178 ; AVX512-NEXT: vpmovm2b %k0, %xmm0
180 %1 = bitcast i16 %a0 to <16 x i1>
181 %2 = sext <16 x i1> %1 to <16 x i8>
189 define <4 x i64> @ext_i4_4i64(i4 %a0) {
190 ; SSE2-SSSE3-LABEL: ext_i4_4i64:
191 ; SSE2-SSSE3: # %bb.0:
192 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
193 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
194 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
195 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
196 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
197 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
198 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
199 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
200 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
201 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
202 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
203 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
204 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
205 ; SSE2-SSSE3-NEXT: retq
207 ; AVX1-LABEL: ext_i4_4i64:
209 ; AVX1-NEXT: vmovd %edi, %xmm0
210 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
211 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
212 ; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8]
213 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
214 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm1
215 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
216 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
217 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
220 ; AVX2-LABEL: ext_i4_4i64:
222 ; AVX2-NEXT: vmovd %edi, %xmm0
223 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
224 ; AVX2-NEXT: vpmovsxbq {{.*#+}} ymm1 = [1,2,4,8]
225 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
226 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
229 ; AVX512-LABEL: ext_i4_4i64:
231 ; AVX512-NEXT: kmovd %edi, %k1
232 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
233 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
235 %1 = bitcast i4 %a0 to <4 x i1>
236 %2 = sext <4 x i1> %1 to <4 x i64>
240 define <8 x i32> @ext_i8_8i32(i8 %a0) {
241 ; SSE2-SSSE3-LABEL: ext_i8_8i32:
242 ; SSE2-SSSE3: # %bb.0:
243 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
244 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
245 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
246 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
247 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
248 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
249 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
250 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
251 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
252 ; SSE2-SSSE3-NEXT: retq
254 ; AVX1-LABEL: ext_i8_8i32:
256 ; AVX1-NEXT: vmovd %edi, %xmm0
257 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
258 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
259 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
260 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
261 ; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
264 ; AVX2-LABEL: ext_i8_8i32:
266 ; AVX2-NEXT: vmovd %edi, %xmm0
267 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
268 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
269 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
270 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
273 ; AVX512-LABEL: ext_i8_8i32:
275 ; AVX512-NEXT: kmovd %edi, %k1
276 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
277 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
279 %1 = bitcast i8 %a0 to <8 x i1>
280 %2 = sext <8 x i1> %1 to <8 x i32>
284 define <16 x i16> @ext_i16_16i16(i16 %a0) {
285 ; SSE2-SSSE3-LABEL: ext_i16_16i16:
286 ; SSE2-SSSE3: # %bb.0:
287 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
288 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
289 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
290 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
291 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
292 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
293 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
294 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
295 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
296 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
297 ; SSE2-SSSE3-NEXT: retq
299 ; AVX1-LABEL: ext_i16_16i16:
301 ; AVX1-NEXT: vmovd %edi, %xmm0
302 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
303 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
304 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
305 ; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
306 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
307 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1
308 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
309 ; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
310 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
313 ; AVX2-LABEL: ext_i16_16i16:
315 ; AVX2-NEXT: vmovd %edi, %xmm0
316 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
317 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
318 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
319 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
322 ; AVX512-LABEL: ext_i16_16i16:
324 ; AVX512-NEXT: kmovd %edi, %k0
325 ; AVX512-NEXT: vpmovm2w %k0, %ymm0
327 %1 = bitcast i16 %a0 to <16 x i1>
328 %2 = sext <16 x i1> %1 to <16 x i16>
332 define <32 x i8> @ext_i32_32i8(i32 %a0) {
333 ; SSE2-SSSE3-LABEL: ext_i32_32i8:
334 ; SSE2-SSSE3: # %bb.0:
335 ; SSE2-SSSE3-NEXT: movd %edi, %xmm1
336 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
337 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
338 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
339 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
340 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
341 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
342 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
343 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
344 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
345 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
346 ; SSE2-SSSE3-NEXT: retq
348 ; AVX1-LABEL: ext_i32_32i8:
350 ; AVX1-NEXT: vmovd %edi, %xmm0
351 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
352 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
353 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
354 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
355 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
356 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
357 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
358 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
359 ; AVX1-NEXT: # xmm2 = mem[0,0]
360 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
361 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
362 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
365 ; AVX2-LABEL: ext_i32_32i8:
367 ; AVX2-NEXT: vmovd %edi, %xmm0
368 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
369 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
370 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
371 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
372 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
375 ; AVX512-LABEL: ext_i32_32i8:
377 ; AVX512-NEXT: kmovd %edi, %k0
378 ; AVX512-NEXT: vpmovm2b %k0, %ymm0
380 %1 = bitcast i32 %a0 to <32 x i1>
381 %2 = sext <32 x i1> %1 to <32 x i8>
389 define <8 x i64> @ext_i8_8i64(i8 %a0) {
390 ; SSE2-SSSE3-LABEL: ext_i8_8i64:
391 ; SSE2-SSSE3: # %bb.0:
392 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
393 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
394 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
395 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
396 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
397 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
398 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
399 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
400 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
401 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
402 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
403 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
404 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
405 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
406 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
407 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
408 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
409 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
410 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
411 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
412 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
413 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
414 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
415 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
416 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
417 ; SSE2-SSSE3-NEXT: retq
419 ; AVX1-LABEL: ext_i8_8i64:
421 ; AVX1-NEXT: vmovd %edi, %xmm0
422 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
423 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
424 ; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
425 ; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
426 ; AVX1-NEXT: vpcmpeqq %xmm0, %xmm2, %xmm0
427 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
428 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
429 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
430 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [16,32,64,128]
431 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
432 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm2
433 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
434 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
435 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
438 ; AVX2-LABEL: ext_i8_8i64:
440 ; AVX2-NEXT: vmovd %edi, %xmm0
441 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm1
442 ; AVX2-NEXT: vpmovsxbq {{.*#+}} ymm0 = [1,2,4,8]
443 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
444 ; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
445 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = [16,32,64,128]
446 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
447 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
450 ; AVX512-LABEL: ext_i8_8i64:
452 ; AVX512-NEXT: kmovd %edi, %k1
453 ; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
455 %1 = bitcast i8 %a0 to <8 x i1>
456 %2 = sext <8 x i1> %1 to <8 x i64>
460 define <16 x i32> @ext_i16_16i32(i16 %a0) {
461 ; SSE2-SSSE3-LABEL: ext_i16_16i32:
462 ; SSE2-SSSE3: # %bb.0:
463 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
464 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
465 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
466 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
467 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
468 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
469 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
470 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
471 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
472 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
473 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
474 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
475 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
476 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
477 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
478 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
479 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
480 ; SSE2-SSSE3-NEXT: retq
482 ; AVX1-LABEL: ext_i16_16i32:
484 ; AVX1-NEXT: vmovd %edi, %xmm0
485 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
486 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
487 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
488 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
489 ; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
490 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
491 ; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
492 ; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
495 ; AVX2-LABEL: ext_i16_16i32:
497 ; AVX2-NEXT: vmovd %edi, %xmm0
498 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm1
499 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
500 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
501 ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
502 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
503 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
504 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
507 ; AVX512-LABEL: ext_i16_16i32:
509 ; AVX512-NEXT: kmovd %edi, %k1
510 ; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
512 %1 = bitcast i16 %a0 to <16 x i1>
513 %2 = sext <16 x i1> %1 to <16 x i32>
517 define <32 x i16> @ext_i32_32i16(i32 %a0) {
518 ; SSE2-SSSE3-LABEL: ext_i32_32i16:
519 ; SSE2-SSSE3: # %bb.0:
520 ; SSE2-SSSE3-NEXT: movd %edi, %xmm2
521 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7]
522 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
523 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
524 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
525 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
526 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
527 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
528 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
529 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
530 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
531 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
532 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
533 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
534 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
535 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
536 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
537 ; SSE2-SSSE3-NEXT: retq
539 ; AVX1-LABEL: ext_i32_32i16:
541 ; AVX1-NEXT: vmovd %edi, %xmm1
542 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7]
543 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
544 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
545 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
546 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
547 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
548 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,512,1024,2048,4096,8192,16384,32768]
549 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
550 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
551 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
552 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
553 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
554 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
555 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
556 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
557 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
558 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
559 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
562 ; AVX2-LABEL: ext_i32_32i16:
564 ; AVX2-NEXT: vmovd %edi, %xmm0
565 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
566 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
567 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
568 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
569 ; AVX2-NEXT: shrl $16, %edi
570 ; AVX2-NEXT: vmovd %edi, %xmm2
571 ; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
572 ; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
573 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
576 ; AVX512-LABEL: ext_i32_32i16:
578 ; AVX512-NEXT: kmovd %edi, %k0
579 ; AVX512-NEXT: vpmovm2w %k0, %zmm0
581 %1 = bitcast i32 %a0 to <32 x i1>
582 %2 = sext <32 x i1> %1 to <32 x i16>
586 define <64 x i8> @ext_i64_64i8(i64 %a0) {
587 ; SSE2-SSSE3-LABEL: ext_i64_64i8:
588 ; SSE2-SSSE3: # %bb.0:
589 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
590 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
591 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
592 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
593 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
594 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
595 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
596 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
597 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
598 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
599 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
600 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
601 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
602 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
603 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
604 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
605 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
606 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
607 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
608 ; SSE2-SSSE3-NEXT: retq
610 ; AVX1-LABEL: ext_i64_64i8:
612 ; AVX1-NEXT: vmovq %rdi, %xmm0
613 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
614 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
615 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
616 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
617 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
618 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
619 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
620 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
621 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
622 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
623 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
624 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
625 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
626 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
627 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
628 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
629 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
630 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
631 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
632 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
635 ; AVX2-LABEL: ext_i64_64i8:
637 ; AVX2-NEXT: vmovq %rdi, %xmm0
638 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
639 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
640 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
641 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
642 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
643 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
644 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
645 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
648 ; AVX512-LABEL: ext_i64_64i8:
650 ; AVX512-NEXT: kmovq %rdi, %k0
651 ; AVX512-NEXT: vpmovm2b %k0, %zmm0
653 %1 = bitcast i64 %a0 to <64 x i1>
654 %2 = sext <64 x i1> %1 to <64 x i8>