1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX512
13 define <2 x i64> @ext_i2_2i64(i2 %a0) {
14 ; SSE2-SSSE3-LABEL: ext_i2_2i64:
15 ; SSE2-SSSE3: # %bb.0:
16 ; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
17 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
18 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
19 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
20 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
21 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
22 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
23 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
24 ; SSE2-SSSE3-NEXT: retq
26 ; AVX1-LABEL: ext_i2_2i64:
28 ; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
29 ; AVX1-NEXT: vmovq %rdi, %xmm0
30 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
31 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
32 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
36 ; AVX2-LABEL: ext_i2_2i64:
38 ; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
39 ; AVX2-NEXT: vmovq %rdi, %xmm0
40 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
41 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
42 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
43 ; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
46 ; AVX512-LABEL: ext_i2_2i64:
48 ; AVX512-NEXT: kmovd %edi, %k1
49 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
50 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
52 %1 = bitcast i2 %a0 to <2 x i1>
53 %2 = sext <2 x i1> %1 to <2 x i64>
57 define <4 x i32> @ext_i4_4i32(i4 %a0) {
58 ; SSE2-SSSE3-LABEL: ext_i4_4i32:
59 ; SSE2-SSSE3: # %bb.0:
60 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
61 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
62 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
63 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
64 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
65 ; SSE2-SSSE3-NEXT: retq
67 ; AVX1-LABEL: ext_i4_4i32:
69 ; AVX1-NEXT: vmovd %edi, %xmm0
70 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
71 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
72 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
73 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
76 ; AVX2-LABEL: ext_i4_4i32:
78 ; AVX2-NEXT: vmovd %edi, %xmm0
79 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
80 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
81 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
82 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
85 ; AVX512-LABEL: ext_i4_4i32:
87 ; AVX512-NEXT: kmovd %edi, %k1
88 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
89 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
91 %1 = bitcast i4 %a0 to <4 x i1>
92 %2 = sext <4 x i1> %1 to <4 x i32>
96 define <8 x i16> @ext_i8_8i16(i8 %a0) {
97 ; SSE2-SSSE3-LABEL: ext_i8_8i16:
98 ; SSE2-SSSE3: # %bb.0:
99 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
100 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
101 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
102 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
103 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
104 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
105 ; SSE2-SSSE3-NEXT: retq
107 ; AVX1-LABEL: ext_i8_8i16:
109 ; AVX1-NEXT: vmovd %edi, %xmm0
110 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
111 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
112 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
113 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
114 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
117 ; AVX2-LABEL: ext_i8_8i16:
119 ; AVX2-NEXT: vmovd %edi, %xmm0
120 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
121 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
122 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
123 ; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
126 ; AVX512-LABEL: ext_i8_8i16:
128 ; AVX512-NEXT: kmovd %edi, %k0
129 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
131 %1 = bitcast i8 %a0 to <8 x i1>
132 %2 = sext <8 x i1> %1 to <8 x i16>
136 define <16 x i8> @ext_i16_16i8(i16 %a0) {
137 ; SSE2-LABEL: ext_i16_16i8:
139 ; SSE2-NEXT: movd %edi, %xmm0
140 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
141 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
142 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
143 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
144 ; SSE2-NEXT: pand %xmm1, %xmm0
145 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
148 ; SSSE3-LABEL: ext_i16_16i8:
150 ; SSSE3-NEXT: movd %edi, %xmm0
151 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
152 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
153 ; SSSE3-NEXT: pand %xmm1, %xmm0
154 ; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
157 ; AVX1-LABEL: ext_i16_16i8:
159 ; AVX1-NEXT: vmovd %edi, %xmm0
160 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
161 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [-1.7939930131212661E-307,-1.7939930131212661E-307]
162 ; AVX1-NEXT: # xmm1 = mem[0,0]
163 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
164 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
167 ; AVX2-LABEL: ext_i16_16i8:
169 ; AVX2-NEXT: vmovd %edi, %xmm0
170 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
171 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
172 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
173 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
176 ; AVX512-LABEL: ext_i16_16i8:
178 ; AVX512-NEXT: kmovd %edi, %k0
179 ; AVX512-NEXT: vpmovm2b %k0, %xmm0
181 %1 = bitcast i16 %a0 to <16 x i1>
182 %2 = sext <16 x i1> %1 to <16 x i8>
190 define <4 x i64> @ext_i4_4i64(i4 %a0) {
191 ; SSE2-SSSE3-LABEL: ext_i4_4i64:
192 ; SSE2-SSSE3: # %bb.0:
193 ; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
194 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
195 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
196 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
197 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
198 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
199 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
200 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
201 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
202 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
203 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
204 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
205 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
206 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
207 ; SSE2-SSSE3-NEXT: retq
209 ; AVX1-LABEL: ext_i4_4i64:
211 ; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
212 ; AVX1-NEXT: vmovq %rdi, %xmm0
213 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
214 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
215 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
216 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
217 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
218 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
219 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
220 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
221 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
222 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
223 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
226 ; AVX2-LABEL: ext_i4_4i64:
228 ; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
229 ; AVX2-NEXT: vmovq %rdi, %xmm0
230 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
231 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
232 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
233 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
236 ; AVX512-LABEL: ext_i4_4i64:
238 ; AVX512-NEXT: kmovd %edi, %k1
239 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
240 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
242 %1 = bitcast i4 %a0 to <4 x i1>
243 %2 = sext <4 x i1> %1 to <4 x i64>
247 define <8 x i32> @ext_i8_8i32(i8 %a0) {
248 ; SSE2-SSSE3-LABEL: ext_i8_8i32:
249 ; SSE2-SSSE3: # %bb.0:
250 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
251 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
252 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
253 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
254 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
255 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
256 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
257 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
258 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
259 ; SSE2-SSSE3-NEXT: retq
261 ; AVX1-LABEL: ext_i8_8i32:
263 ; AVX1-NEXT: vmovd %edi, %xmm0
264 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
265 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
266 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
267 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
268 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
269 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
270 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
271 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
272 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
273 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
274 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
277 ; AVX2-LABEL: ext_i8_8i32:
279 ; AVX2-NEXT: vmovd %edi, %xmm0
280 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
281 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
282 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
283 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
286 ; AVX512-LABEL: ext_i8_8i32:
288 ; AVX512-NEXT: kmovd %edi, %k1
289 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
290 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
292 %1 = bitcast i8 %a0 to <8 x i1>
293 %2 = sext <8 x i1> %1 to <8 x i32>
297 define <16 x i16> @ext_i16_16i16(i16 %a0) {
298 ; SSE2-SSSE3-LABEL: ext_i16_16i16:
299 ; SSE2-SSSE3: # %bb.0:
300 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
301 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
302 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
303 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
304 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
305 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
306 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
307 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
308 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
309 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
310 ; SSE2-SSSE3-NEXT: retq
312 ; AVX1-LABEL: ext_i16_16i16:
314 ; AVX1-NEXT: vmovd %edi, %xmm0
315 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
316 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
317 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
318 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
319 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
320 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
321 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
322 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
323 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
324 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
325 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
326 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
329 ; AVX2-LABEL: ext_i16_16i16:
331 ; AVX2-NEXT: vmovd %edi, %xmm0
332 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
333 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
334 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
335 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
338 ; AVX512-LABEL: ext_i16_16i16:
340 ; AVX512-NEXT: kmovd %edi, %k0
341 ; AVX512-NEXT: vpmovm2w %k0, %ymm0
343 %1 = bitcast i16 %a0 to <16 x i1>
344 %2 = sext <16 x i1> %1 to <16 x i16>
348 define <32 x i8> @ext_i32_32i8(i32 %a0) {
349 ; SSE2-SSSE3-LABEL: ext_i32_32i8:
350 ; SSE2-SSSE3: # %bb.0:
351 ; SSE2-SSSE3-NEXT: movd %edi, %xmm1
352 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
353 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
354 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
355 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
356 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
357 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
358 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
359 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
360 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
361 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
362 ; SSE2-SSSE3-NEXT: retq
364 ; AVX1-LABEL: ext_i32_32i8:
366 ; AVX1-NEXT: vmovd %edi, %xmm0
367 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
368 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
369 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
370 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
371 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
372 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
373 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
374 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
375 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
376 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
377 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
378 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
379 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
380 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
381 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
384 ; AVX2-LABEL: ext_i32_32i8:
386 ; AVX2-NEXT: vmovd %edi, %xmm0
387 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
388 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
389 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
390 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
391 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
394 ; AVX512-LABEL: ext_i32_32i8:
396 ; AVX512-NEXT: kmovd %edi, %k0
397 ; AVX512-NEXT: vpmovm2b %k0, %ymm0
399 %1 = bitcast i32 %a0 to <32 x i1>
400 %2 = sext <32 x i1> %1 to <32 x i8>
408 define <8 x i64> @ext_i8_8i64(i8 %a0) {
409 ; SSE2-SSSE3-LABEL: ext_i8_8i64:
410 ; SSE2-SSSE3: # %bb.0:
411 ; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
412 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
413 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
414 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
415 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
416 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
417 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
418 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
419 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
420 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
421 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
422 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
423 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
424 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
425 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
426 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
427 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
428 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
429 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
430 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
431 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
432 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
433 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
434 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
435 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
436 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
437 ; SSE2-SSSE3-NEXT: retq
439 ; AVX1-LABEL: ext_i8_8i64:
441 ; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
442 ; AVX1-NEXT: vmovq %rdi, %xmm0
443 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
444 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
445 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
446 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
447 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
448 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
449 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
450 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
451 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
452 ; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
453 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
454 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
455 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
456 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
457 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
458 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
459 ; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
460 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
463 ; AVX2-LABEL: ext_i8_8i64:
465 ; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
466 ; AVX2-NEXT: vmovq %rdi, %xmm0
467 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
468 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
469 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
470 ; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
471 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
472 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
473 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
476 ; AVX512-LABEL: ext_i8_8i64:
478 ; AVX512-NEXT: kmovd %edi, %k1
479 ; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
481 %1 = bitcast i8 %a0 to <8 x i1>
482 %2 = sext <8 x i1> %1 to <8 x i64>
486 define <16 x i32> @ext_i16_16i32(i16 %a0) {
487 ; SSE2-SSSE3-LABEL: ext_i16_16i32:
488 ; SSE2-SSSE3: # %bb.0:
489 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
490 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
491 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
492 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
493 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
494 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
495 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
496 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
497 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
498 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
499 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
500 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
501 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
502 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
503 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
504 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
505 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
506 ; SSE2-SSSE3-NEXT: retq
508 ; AVX1-LABEL: ext_i16_16i32:
510 ; AVX1-NEXT: vmovd %edi, %xmm0
511 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
512 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
513 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
514 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
515 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
516 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
517 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
518 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
519 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
520 ; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
521 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
522 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
523 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
524 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
525 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
526 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
527 ; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
528 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
531 ; AVX2-LABEL: ext_i16_16i32:
533 ; AVX2-NEXT: vmovd %edi, %xmm0
534 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
535 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
536 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
537 ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
538 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
539 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
540 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
543 ; AVX512-LABEL: ext_i16_16i32:
545 ; AVX512-NEXT: kmovd %edi, %k1
546 ; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
548 %1 = bitcast i16 %a0 to <16 x i1>
549 %2 = sext <16 x i1> %1 to <16 x i32>
553 define <32 x i16> @ext_i32_32i16(i32 %a0) {
554 ; SSE2-SSSE3-LABEL: ext_i32_32i16:
555 ; SSE2-SSSE3: # %bb.0:
556 ; SSE2-SSSE3-NEXT: movd %edi, %xmm2
557 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7]
558 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
559 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
560 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
561 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
562 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
563 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
564 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
565 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
566 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
567 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
568 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
569 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
570 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
571 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
572 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
573 ; SSE2-SSSE3-NEXT: retq
575 ; AVX1-LABEL: ext_i32_32i16:
577 ; AVX1-NEXT: vmovd %edi, %xmm1
578 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
579 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
580 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
581 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
582 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
583 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
584 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
585 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
586 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
587 ; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
588 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
589 ; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
590 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
591 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7]
592 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
593 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
594 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
595 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
596 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
597 ; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
598 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1
599 ; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
600 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
603 ; AVX2-LABEL: ext_i32_32i16:
605 ; AVX2-NEXT: vmovd %edi, %xmm0
606 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
607 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
608 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
609 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
610 ; AVX2-NEXT: shrl $16, %edi
611 ; AVX2-NEXT: vmovd %edi, %xmm2
612 ; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
613 ; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
614 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
617 ; AVX512-LABEL: ext_i32_32i16:
619 ; AVX512-NEXT: kmovd %edi, %k0
620 ; AVX512-NEXT: vpmovm2w %k0, %zmm0
622 %1 = bitcast i32 %a0 to <32 x i1>
623 %2 = sext <32 x i1> %1 to <32 x i16>
627 define <64 x i8> @ext_i64_64i8(i64 %a0) {
628 ; SSE2-SSSE3-LABEL: ext_i64_64i8:
629 ; SSE2-SSSE3: # %bb.0:
630 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
631 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
632 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
633 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
634 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
635 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
636 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
637 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
638 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
639 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
640 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
641 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
642 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
643 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
644 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
645 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
646 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
647 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
648 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
649 ; SSE2-SSSE3-NEXT: retq
651 ; AVX1-LABEL: ext_i64_64i8:
653 ; AVX1-NEXT: vmovq %rdi, %xmm0
654 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
655 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
656 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
657 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
658 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
659 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
660 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-1.7939930131212661E-307,-1.7939930131212661E-307,-1.7939930131212661E-307,-1.7939930131212661E-307]
661 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
662 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
663 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
664 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
665 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
666 ; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
667 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
668 ; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
669 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
670 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
671 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
672 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
673 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
674 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
675 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
676 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
677 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
678 ; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
679 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
680 ; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
681 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
684 ; AVX2-LABEL: ext_i64_64i8:
686 ; AVX2-NEXT: vmovq %rdi, %xmm0
687 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
688 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
689 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
690 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
691 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
692 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
693 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
694 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
697 ; AVX512-LABEL: ext_i64_64i8:
699 ; AVX512-NEXT: kmovq %rdi, %k0
700 ; AVX512-NEXT: vpmovm2b %k0, %zmm0
702 %1 = bitcast i64 %a0 to <64 x i1>
703 %2 = sext <64 x i1> %1 to <64 x i8>