1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512
15 define <2 x i64> @ext_i2_2i64(i2 %a0) {
16 ; SSE2-SSSE3-LABEL: ext_i2_2i64:
17 ; SSE2-SSSE3: # %bb.0:
18 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
19 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
20 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
21 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
22 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
23 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
24 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
25 ; SSE2-SSSE3-NEXT: retq
27 ; AVX1-LABEL: ext_i2_2i64:
29 ; AVX1-NEXT: vmovd %edi, %xmm0
30 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
31 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
32 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
36 ; AVX2-LABEL: ext_i2_2i64:
38 ; AVX2-NEXT: vmovd %edi, %xmm0
39 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
40 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
41 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
42 ; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
45 ; AVX512-LABEL: ext_i2_2i64:
47 ; AVX512-NEXT: kmovd %edi, %k1
48 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
49 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
51 %1 = bitcast i2 %a0 to <2 x i1>
52 %2 = sext <2 x i1> %1 to <2 x i64>
56 define <4 x i32> @ext_i4_4i32(i4 %a0) {
57 ; SSE2-SSSE3-LABEL: ext_i4_4i32:
58 ; SSE2-SSSE3: # %bb.0:
59 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
60 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
61 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
62 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
63 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
64 ; SSE2-SSSE3-NEXT: retq
66 ; AVX1-LABEL: ext_i4_4i32:
68 ; AVX1-NEXT: vmovd %edi, %xmm0
69 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
70 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
71 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
72 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
75 ; AVX2-LABEL: ext_i4_4i32:
77 ; AVX2-NEXT: vmovd %edi, %xmm0
78 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
79 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
80 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
81 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
84 ; AVX512-LABEL: ext_i4_4i32:
86 ; AVX512-NEXT: kmovd %edi, %k1
87 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
88 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
90 %1 = bitcast i4 %a0 to <4 x i1>
91 %2 = sext <4 x i1> %1 to <4 x i32>
95 define <8 x i16> @ext_i8_8i16(i8 %a0) {
96 ; SSE2-SSSE3-LABEL: ext_i8_8i16:
97 ; SSE2-SSSE3: # %bb.0:
98 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
99 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
100 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
101 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
102 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
103 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
104 ; SSE2-SSSE3-NEXT: retq
106 ; AVX1-LABEL: ext_i8_8i16:
108 ; AVX1-NEXT: vmovd %edi, %xmm0
109 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
110 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
111 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
112 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
113 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
116 ; AVX2-LABEL: ext_i8_8i16:
118 ; AVX2-NEXT: vmovd %edi, %xmm0
119 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
120 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
121 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
122 ; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
125 ; AVX512-LABEL: ext_i8_8i16:
127 ; AVX512-NEXT: kmovd %edi, %k0
128 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
130 %1 = bitcast i8 %a0 to <8 x i1>
131 %2 = sext <8 x i1> %1 to <8 x i16>
135 define <16 x i8> @ext_i16_16i8(i16 %a0) {
136 ; SSE2-LABEL: ext_i16_16i8:
138 ; SSE2-NEXT: movd %edi, %xmm0
139 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
140 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
141 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
142 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
143 ; SSE2-NEXT: pand %xmm1, %xmm0
144 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
147 ; SSSE3-LABEL: ext_i16_16i8:
149 ; SSSE3-NEXT: movd %edi, %xmm0
150 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
151 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
152 ; SSSE3-NEXT: pand %xmm1, %xmm0
153 ; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
156 ; AVX1-LABEL: ext_i16_16i8:
158 ; AVX1-NEXT: vmovd %edi, %xmm0
159 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
160 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
161 ; AVX1-NEXT: # xmm1 = mem[0,0]
162 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
163 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
166 ; AVX2-LABEL: ext_i16_16i8:
168 ; AVX2-NEXT: vmovd %edi, %xmm0
169 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
170 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
171 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
172 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
175 ; AVX512-LABEL: ext_i16_16i8:
177 ; AVX512-NEXT: kmovd %edi, %k0
178 ; AVX512-NEXT: vpmovm2b %k0, %xmm0
180 %1 = bitcast i16 %a0 to <16 x i1>
181 %2 = sext <16 x i1> %1 to <16 x i8>
189 define <4 x i64> @ext_i4_4i64(i4 %a0) {
190 ; SSE2-SSSE3-LABEL: ext_i4_4i64:
191 ; SSE2-SSSE3: # %bb.0:
192 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
193 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
194 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
195 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
196 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
197 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
198 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
199 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
200 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
201 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
202 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
203 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
204 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
205 ; SSE2-SSSE3-NEXT: retq
207 ; AVX1-LABEL: ext_i4_4i64:
209 ; AVX1-NEXT: vmovd %edi, %xmm0
210 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
211 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
212 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
213 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
214 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
215 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
216 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
219 ; AVX2-LABEL: ext_i4_4i64:
221 ; AVX2-NEXT: vmovd %edi, %xmm0
222 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
223 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
224 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
225 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
228 ; AVX512-LABEL: ext_i4_4i64:
230 ; AVX512-NEXT: kmovd %edi, %k1
231 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
232 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
234 %1 = bitcast i4 %a0 to <4 x i1>
235 %2 = sext <4 x i1> %1 to <4 x i64>
239 define <8 x i32> @ext_i8_8i32(i8 %a0) {
240 ; SSE2-SSSE3-LABEL: ext_i8_8i32:
241 ; SSE2-SSSE3: # %bb.0:
242 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
243 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
244 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
245 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
246 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
247 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
248 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
249 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
250 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
251 ; SSE2-SSSE3-NEXT: retq
253 ; AVX1-LABEL: ext_i8_8i32:
255 ; AVX1-NEXT: vmovd %edi, %xmm0
256 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
257 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
258 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
259 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
260 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
261 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
262 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
265 ; AVX2-LABEL: ext_i8_8i32:
267 ; AVX2-NEXT: vmovd %edi, %xmm0
268 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
269 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
270 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
271 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
274 ; AVX512-LABEL: ext_i8_8i32:
276 ; AVX512-NEXT: kmovd %edi, %k1
277 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
278 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
280 %1 = bitcast i8 %a0 to <8 x i1>
281 %2 = sext <8 x i1> %1 to <8 x i32>
285 define <16 x i16> @ext_i16_16i16(i16 %a0) {
286 ; SSE2-SSSE3-LABEL: ext_i16_16i16:
287 ; SSE2-SSSE3: # %bb.0:
288 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
289 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
290 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
291 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
292 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
293 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
294 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
295 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
296 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
297 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
298 ; SSE2-SSSE3-NEXT: retq
300 ; AVX1-LABEL: ext_i16_16i16:
302 ; AVX1-NEXT: vmovd %edi, %xmm0
303 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
304 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
305 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
306 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
307 ; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
308 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
309 ; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
310 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
313 ; AVX2-LABEL: ext_i16_16i16:
315 ; AVX2-NEXT: vmovd %edi, %xmm0
316 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
317 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
318 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
319 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
322 ; AVX512-LABEL: ext_i16_16i16:
324 ; AVX512-NEXT: kmovd %edi, %k0
325 ; AVX512-NEXT: vpmovm2w %k0, %ymm0
327 %1 = bitcast i16 %a0 to <16 x i1>
328 %2 = sext <16 x i1> %1 to <16 x i16>
332 define <32 x i8> @ext_i32_32i8(i32 %a0) {
333 ; SSE2-SSSE3-LABEL: ext_i32_32i8:
334 ; SSE2-SSSE3: # %bb.0:
335 ; SSE2-SSSE3-NEXT: movd %edi, %xmm1
336 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
337 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
338 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
339 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
340 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
341 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
342 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
343 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
344 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
345 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
346 ; SSE2-SSSE3-NEXT: retq
348 ; AVX1-LABEL: ext_i32_32i8:
350 ; AVX1-NEXT: vmovd %edi, %xmm0
351 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
352 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
353 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
354 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
355 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
356 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
357 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
358 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
359 ; AVX1-NEXT: # xmm2 = mem[0,0]
360 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
361 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
362 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
365 ; AVX2-LABEL: ext_i32_32i8:
367 ; AVX2-NEXT: vmovd %edi, %xmm0
368 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
369 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
370 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
371 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
372 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
375 ; AVX512-LABEL: ext_i32_32i8:
377 ; AVX512-NEXT: kmovd %edi, %k0
378 ; AVX512-NEXT: vpmovm2b %k0, %ymm0
380 %1 = bitcast i32 %a0 to <32 x i1>
381 %2 = sext <32 x i1> %1 to <32 x i8>
389 define <8 x i64> @ext_i8_8i64(i8 %a0) {
390 ; SSE2-SSSE3-LABEL: ext_i8_8i64:
391 ; SSE2-SSSE3: # %bb.0:
392 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
393 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
394 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
395 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
396 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
397 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
398 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
399 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
400 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
401 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
402 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
403 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
404 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
405 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
406 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
407 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
408 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
409 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
410 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
411 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
412 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
413 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
414 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
415 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
416 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
417 ; SSE2-SSSE3-NEXT: retq
419 ; AVX1-LABEL: ext_i8_8i64:
421 ; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
422 ; AVX1-NEXT: vmovq %rdi, %xmm0
423 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
424 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
425 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
426 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
427 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
428 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
429 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
430 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
431 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
432 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
433 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
434 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
437 ; AVX2-LABEL: ext_i8_8i64:
439 ; AVX2-NEXT: vmovd %edi, %xmm0
440 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm1
441 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
442 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
443 ; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
444 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
445 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
446 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
449 ; AVX512-LABEL: ext_i8_8i64:
451 ; AVX512-NEXT: kmovd %edi, %k1
452 ; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
454 %1 = bitcast i8 %a0 to <8 x i1>
455 %2 = sext <8 x i1> %1 to <8 x i64>
459 define <16 x i32> @ext_i16_16i32(i16 %a0) {
460 ; SSE2-SSSE3-LABEL: ext_i16_16i32:
461 ; SSE2-SSSE3: # %bb.0:
462 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
463 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
464 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
465 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
466 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
467 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
468 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
469 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
470 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
471 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
472 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
473 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
474 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
475 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
476 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
477 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
478 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
479 ; SSE2-SSSE3-NEXT: retq
481 ; AVX1-LABEL: ext_i16_16i32:
483 ; AVX1-NEXT: vmovd %edi, %xmm0
484 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
485 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
486 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
487 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
488 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
489 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
490 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
491 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
492 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
493 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
494 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
495 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
498 ; AVX2-LABEL: ext_i16_16i32:
500 ; AVX2-NEXT: vmovd %edi, %xmm0
501 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm1
502 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
503 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
504 ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
505 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
506 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
507 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
510 ; AVX512-LABEL: ext_i16_16i32:
512 ; AVX512-NEXT: kmovd %edi, %k1
513 ; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
515 %1 = bitcast i16 %a0 to <16 x i1>
516 %2 = sext <16 x i1> %1 to <16 x i32>
520 define <32 x i16> @ext_i32_32i16(i32 %a0) {
521 ; SSE2-SSSE3-LABEL: ext_i32_32i16:
522 ; SSE2-SSSE3: # %bb.0:
523 ; SSE2-SSSE3-NEXT: movd %edi, %xmm2
524 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7]
525 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
526 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
527 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
528 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
529 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
530 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
531 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
532 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
533 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
534 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
535 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
536 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
537 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
538 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
539 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
540 ; SSE2-SSSE3-NEXT: retq
542 ; AVX1-LABEL: ext_i32_32i16:
544 ; AVX1-NEXT: vmovd %edi, %xmm1
545 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7]
546 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
547 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
548 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
549 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
550 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
551 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,512,1024,2048,4096,8192,16384,32768]
552 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
553 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128]
554 ; AVX1-NEXT: vpcmpeqw %xmm5, %xmm0, %xmm0
555 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
556 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
557 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
558 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
559 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
560 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
561 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
562 ; AVX1-NEXT: vpcmpeqw %xmm5, %xmm1, %xmm1
563 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
566 ; AVX2-LABEL: ext_i32_32i16:
568 ; AVX2-NEXT: vmovd %edi, %xmm0
569 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
570 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
571 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
572 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
573 ; AVX2-NEXT: shrl $16, %edi
574 ; AVX2-NEXT: vmovd %edi, %xmm2
575 ; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
576 ; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
577 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
580 ; AVX512-LABEL: ext_i32_32i16:
582 ; AVX512-NEXT: kmovd %edi, %k0
583 ; AVX512-NEXT: vpmovm2w %k0, %zmm0
585 %1 = bitcast i32 %a0 to <32 x i1>
586 %2 = sext <32 x i1> %1 to <32 x i16>
590 define <64 x i8> @ext_i64_64i8(i64 %a0) {
591 ; SSE2-SSSE3-LABEL: ext_i64_64i8:
592 ; SSE2-SSSE3: # %bb.0:
593 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
594 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
595 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
596 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
597 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
598 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
599 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
600 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
601 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
602 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
603 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
604 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
605 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
606 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
607 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
608 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
609 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
610 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
611 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
612 ; SSE2-SSSE3-NEXT: retq
614 ; AVX1-LABEL: ext_i64_64i8:
616 ; AVX1-NEXT: vmovq %rdi, %xmm0
617 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
618 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
619 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
620 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
621 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
622 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
623 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
624 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
625 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
626 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
627 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
628 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
629 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
630 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
631 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
632 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
633 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
634 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
635 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
636 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
639 ; AVX2-LABEL: ext_i64_64i8:
641 ; AVX2-NEXT: vmovq %rdi, %xmm0
642 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
643 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
644 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
645 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
646 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
647 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
648 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
649 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
652 ; AVX512-LABEL: ext_i64_64i8:
654 ; AVX512-NEXT: kmovq %rdi, %k0
655 ; AVX512-NEXT: vpmovm2b %k0, %zmm0
657 %1 = bitcast i64 %a0 to <64 x i1>
658 %2 = sext <64 x i1> %1 to <64 x i8>