1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512
15 define <2 x i64> @ext_i2_2i64(i2 %a0) {
16 ; SSE2-SSSE3-LABEL: ext_i2_2i64:
17 ; SSE2-SSSE3: # %bb.0:
18 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
19 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
20 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
21 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
22 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
23 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
24 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
25 ; SSE2-SSSE3-NEXT: retq
27 ; AVX1-LABEL: ext_i2_2i64:
29 ; AVX1-NEXT: vmovd %edi, %xmm0
30 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
31 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
32 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
36 ; AVX2-LABEL: ext_i2_2i64:
38 ; AVX2-NEXT: vmovd %edi, %xmm0
39 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
40 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
41 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
42 ; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
45 ; AVX512-LABEL: ext_i2_2i64:
47 ; AVX512-NEXT: kmovd %edi, %k1
48 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
49 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
51 %1 = bitcast i2 %a0 to <2 x i1>
52 %2 = sext <2 x i1> %1 to <2 x i64>
56 define <4 x i32> @ext_i4_4i32(i4 %a0) {
57 ; SSE2-SSSE3-LABEL: ext_i4_4i32:
58 ; SSE2-SSSE3: # %bb.0:
59 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
60 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
61 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
62 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
63 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
64 ; SSE2-SSSE3-NEXT: retq
66 ; AVX1-LABEL: ext_i4_4i32:
68 ; AVX1-NEXT: vmovd %edi, %xmm0
69 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
70 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
71 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
72 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
75 ; AVX2-LABEL: ext_i4_4i32:
77 ; AVX2-NEXT: vmovd %edi, %xmm0
78 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
79 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
80 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
81 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
84 ; AVX512-LABEL: ext_i4_4i32:
86 ; AVX512-NEXT: kmovd %edi, %k1
87 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
88 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
90 %1 = bitcast i4 %a0 to <4 x i1>
91 %2 = sext <4 x i1> %1 to <4 x i32>
95 define <8 x i16> @ext_i8_8i16(i8 %a0) {
96 ; SSE2-SSSE3-LABEL: ext_i8_8i16:
97 ; SSE2-SSSE3: # %bb.0:
98 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
99 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
100 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
101 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
102 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
103 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
104 ; SSE2-SSSE3-NEXT: retq
106 ; AVX1-LABEL: ext_i8_8i16:
108 ; AVX1-NEXT: vmovd %edi, %xmm0
109 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
110 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
111 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
112 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
113 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
116 ; AVX2-LABEL: ext_i8_8i16:
118 ; AVX2-NEXT: vmovd %edi, %xmm0
119 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
120 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
121 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
122 ; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
125 ; AVX512-LABEL: ext_i8_8i16:
127 ; AVX512-NEXT: kmovd %edi, %k0
128 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
130 %1 = bitcast i8 %a0 to <8 x i1>
131 %2 = sext <8 x i1> %1 to <8 x i16>
135 define <16 x i8> @ext_i16_16i8(i16 %a0) {
136 ; SSE2-LABEL: ext_i16_16i8:
138 ; SSE2-NEXT: movd %edi, %xmm0
139 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
140 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
141 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
142 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
143 ; SSE2-NEXT: pand %xmm1, %xmm0
144 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
147 ; SSSE3-LABEL: ext_i16_16i8:
149 ; SSSE3-NEXT: movd %edi, %xmm0
150 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
151 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
152 ; SSSE3-NEXT: pand %xmm1, %xmm0
153 ; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
156 ; AVX1-LABEL: ext_i16_16i8:
158 ; AVX1-NEXT: vmovd %edi, %xmm0
159 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
160 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
161 ; AVX1-NEXT: # xmm1 = mem[0,0]
162 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
163 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
166 ; AVX2-LABEL: ext_i16_16i8:
168 ; AVX2-NEXT: vmovd %edi, %xmm0
169 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
170 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
171 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
172 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
175 ; AVX512-LABEL: ext_i16_16i8:
177 ; AVX512-NEXT: kmovd %edi, %k0
178 ; AVX512-NEXT: vpmovm2b %k0, %xmm0
180 %1 = bitcast i16 %a0 to <16 x i1>
181 %2 = sext <16 x i1> %1 to <16 x i8>
189 define <4 x i64> @ext_i4_4i64(i4 %a0) {
190 ; SSE2-SSSE3-LABEL: ext_i4_4i64:
191 ; SSE2-SSSE3: # %bb.0:
192 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
193 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
194 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
195 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
196 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
197 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
198 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
199 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
200 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
201 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
202 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
203 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
204 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
205 ; SSE2-SSSE3-NEXT: retq
207 ; AVX1-LABEL: ext_i4_4i64:
209 ; AVX1-NEXT: vmovd %edi, %xmm0
210 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
211 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
212 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
213 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
214 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
215 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
216 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
219 ; AVX2-LABEL: ext_i4_4i64:
221 ; AVX2-NEXT: vmovd %edi, %xmm0
222 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
223 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
224 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
225 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
228 ; AVX512-LABEL: ext_i4_4i64:
230 ; AVX512-NEXT: kmovd %edi, %k1
231 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
232 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
234 %1 = bitcast i4 %a0 to <4 x i1>
235 %2 = sext <4 x i1> %1 to <4 x i64>
239 define <8 x i32> @ext_i8_8i32(i8 %a0) {
240 ; SSE2-SSSE3-LABEL: ext_i8_8i32:
241 ; SSE2-SSSE3: # %bb.0:
242 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
243 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
244 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
245 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
246 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
247 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
248 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
249 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
250 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
251 ; SSE2-SSSE3-NEXT: retq
253 ; AVX1-LABEL: ext_i8_8i32:
255 ; AVX1-NEXT: vmovd %edi, %xmm0
256 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
257 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
258 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
259 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
260 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
261 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
262 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
265 ; AVX2-LABEL: ext_i8_8i32:
267 ; AVX2-NEXT: vmovd %edi, %xmm0
268 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
269 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
270 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
271 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
274 ; AVX512-LABEL: ext_i8_8i32:
276 ; AVX512-NEXT: kmovd %edi, %k1
277 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
278 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
280 %1 = bitcast i8 %a0 to <8 x i1>
281 %2 = sext <8 x i1> %1 to <8 x i32>
285 define <16 x i16> @ext_i16_16i16(i16 %a0) {
286 ; SSE2-SSSE3-LABEL: ext_i16_16i16:
287 ; SSE2-SSSE3: # %bb.0:
288 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
289 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
290 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
291 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
292 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
293 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
294 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
295 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
296 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
297 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
298 ; SSE2-SSSE3-NEXT: retq
300 ; AVX1-LABEL: ext_i16_16i16:
302 ; AVX1-NEXT: vmovd %edi, %xmm0
303 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
304 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
305 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
306 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
307 ; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
308 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
309 ; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
310 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
313 ; AVX2-LABEL: ext_i16_16i16:
315 ; AVX2-NEXT: vmovd %edi, %xmm0
316 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
317 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
318 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
319 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
322 ; AVX512-LABEL: ext_i16_16i16:
324 ; AVX512-NEXT: kmovd %edi, %k0
325 ; AVX512-NEXT: vpmovm2w %k0, %ymm0
327 %1 = bitcast i16 %a0 to <16 x i1>
328 %2 = sext <16 x i1> %1 to <16 x i16>
332 define <32 x i8> @ext_i32_32i8(i32 %a0) {
333 ; SSE2-SSSE3-LABEL: ext_i32_32i8:
334 ; SSE2-SSSE3: # %bb.0:
335 ; SSE2-SSSE3-NEXT: movd %edi, %xmm1
336 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
337 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
338 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
339 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
340 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
341 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
342 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
343 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
344 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
345 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
346 ; SSE2-SSSE3-NEXT: retq
348 ; AVX1-LABEL: ext_i32_32i8:
350 ; AVX1-NEXT: vmovd %edi, %xmm0
351 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
352 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
353 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
354 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
355 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
356 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
357 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
358 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
359 ; AVX1-NEXT: # xmm2 = mem[0,0]
360 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
361 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
362 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
365 ; AVX2-LABEL: ext_i32_32i8:
367 ; AVX2-NEXT: vmovd %edi, %xmm0
368 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
369 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
370 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
371 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
372 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
375 ; AVX512-LABEL: ext_i32_32i8:
377 ; AVX512-NEXT: kmovd %edi, %k0
378 ; AVX512-NEXT: vpmovm2b %k0, %ymm0
380 %1 = bitcast i32 %a0 to <32 x i1>
381 %2 = sext <32 x i1> %1 to <32 x i8>
389 define <8 x i64> @ext_i8_8i64(i8 %a0) {
390 ; SSE2-SSSE3-LABEL: ext_i8_8i64:
391 ; SSE2-SSSE3: # %bb.0:
392 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
393 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
394 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
395 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
396 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
397 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
398 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
399 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
400 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
401 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
402 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
403 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
404 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
405 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
406 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
407 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
408 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
409 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
410 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
411 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
412 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
413 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
414 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
415 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
416 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
417 ; SSE2-SSSE3-NEXT: retq
419 ; AVX1-LABEL: ext_i8_8i64:
421 ; AVX1-NEXT: vmovd %edi, %xmm0
422 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
423 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
424 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
425 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
426 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
427 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
428 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
429 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
430 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
431 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
432 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
433 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
436 ; AVX2-LABEL: ext_i8_8i64:
438 ; AVX2-NEXT: vmovd %edi, %xmm0
439 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm1
440 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
441 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
442 ; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
443 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
444 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
445 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
448 ; AVX512-LABEL: ext_i8_8i64:
450 ; AVX512-NEXT: kmovd %edi, %k1
451 ; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
453 %1 = bitcast i8 %a0 to <8 x i1>
454 %2 = sext <8 x i1> %1 to <8 x i64>
458 define <16 x i32> @ext_i16_16i32(i16 %a0) {
459 ; SSE2-SSSE3-LABEL: ext_i16_16i32:
460 ; SSE2-SSSE3: # %bb.0:
461 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
462 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
463 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
464 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
465 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
466 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
467 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
468 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
469 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
470 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
471 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
472 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
473 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
474 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
475 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
476 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
477 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
478 ; SSE2-SSSE3-NEXT: retq
480 ; AVX1-LABEL: ext_i16_16i32:
482 ; AVX1-NEXT: vmovd %edi, %xmm0
483 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
484 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
485 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
486 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
487 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
488 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
489 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
490 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
491 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
492 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
493 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
494 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
497 ; AVX2-LABEL: ext_i16_16i32:
499 ; AVX2-NEXT: vmovd %edi, %xmm0
500 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm1
501 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
502 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
503 ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
504 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
505 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
506 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
509 ; AVX512-LABEL: ext_i16_16i32:
511 ; AVX512-NEXT: kmovd %edi, %k1
512 ; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
514 %1 = bitcast i16 %a0 to <16 x i1>
515 %2 = sext <16 x i1> %1 to <16 x i32>
519 define <32 x i16> @ext_i32_32i16(i32 %a0) {
520 ; SSE2-SSSE3-LABEL: ext_i32_32i16:
521 ; SSE2-SSSE3: # %bb.0:
522 ; SSE2-SSSE3-NEXT: movd %edi, %xmm2
523 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7]
524 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
525 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
526 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
527 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
528 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
529 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
530 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
531 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
532 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
533 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
534 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
535 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
536 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
537 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
538 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
539 ; SSE2-SSSE3-NEXT: retq
541 ; AVX1-LABEL: ext_i32_32i16:
543 ; AVX1-NEXT: vmovd %edi, %xmm1
544 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7]
545 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
546 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
547 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
548 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
549 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
550 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,512,1024,2048,4096,8192,16384,32768]
551 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
552 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128]
553 ; AVX1-NEXT: vpcmpeqw %xmm5, %xmm0, %xmm0
554 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
555 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
556 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
557 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
558 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
559 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
560 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
561 ; AVX1-NEXT: vpcmpeqw %xmm5, %xmm1, %xmm1
562 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
565 ; AVX2-LABEL: ext_i32_32i16:
567 ; AVX2-NEXT: vmovd %edi, %xmm0
568 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
569 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
570 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
571 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
572 ; AVX2-NEXT: shrl $16, %edi
573 ; AVX2-NEXT: vmovd %edi, %xmm2
574 ; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
575 ; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
576 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
579 ; AVX512-LABEL: ext_i32_32i16:
581 ; AVX512-NEXT: kmovd %edi, %k0
582 ; AVX512-NEXT: vpmovm2w %k0, %zmm0
584 %1 = bitcast i32 %a0 to <32 x i1>
585 %2 = sext <32 x i1> %1 to <32 x i16>
589 define <64 x i8> @ext_i64_64i8(i64 %a0) {
590 ; SSE2-SSSE3-LABEL: ext_i64_64i8:
591 ; SSE2-SSSE3: # %bb.0:
592 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
593 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
594 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
595 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
596 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
597 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
598 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
599 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
600 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
601 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
602 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
603 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
604 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
605 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
606 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
607 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
608 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
609 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
610 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
611 ; SSE2-SSSE3-NEXT: retq
613 ; AVX1-LABEL: ext_i64_64i8:
615 ; AVX1-NEXT: vmovq %rdi, %xmm0
616 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
617 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
618 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
619 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
620 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
621 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
622 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
623 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
624 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
625 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
626 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
627 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
628 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
629 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
630 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
631 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
632 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
633 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
634 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
635 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
638 ; AVX2-LABEL: ext_i64_64i8:
640 ; AVX2-NEXT: vmovq %rdi, %xmm0
641 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
642 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
643 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
644 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
645 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
646 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
647 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
648 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
651 ; AVX512-LABEL: ext_i64_64i8:
653 ; AVX512-NEXT: kmovq %rdi, %k0
654 ; AVX512-NEXT: vpmovm2b %k0, %zmm0
656 %1 = bitcast i64 %a0 to <64 x i1>
657 %2 = sext <64 x i1> %1 to <64 x i8>