1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512VLBW
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512VLBW
16 define <2 x i64> @ext_i2_2i64(i2 %a0) {
17 ; SSE2-SSSE3-LABEL: ext_i2_2i64:
18 ; SSE2-SSSE3: # %bb.0:
19 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
20 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
21 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
22 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
23 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
24 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
25 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
26 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
27 ; SSE2-SSSE3-NEXT: retq
29 ; AVX1-LABEL: ext_i2_2i64:
31 ; AVX1-NEXT: vmovd %edi, %xmm0
32 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
33 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
34 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
36 ; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
39 ; AVX2-LABEL: ext_i2_2i64:
41 ; AVX2-NEXT: vmovd %edi, %xmm0
42 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
43 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
44 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
45 ; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
46 ; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0
49 ; AVX512F-LABEL: ext_i2_2i64:
51 ; AVX512F-NEXT: kmovw %edi, %k1
52 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
53 ; AVX512F-NEXT: vpsrlq $63, %xmm0, %xmm0
54 ; AVX512F-NEXT: vzeroupper
57 ; AVX512VLBW-LABEL: ext_i2_2i64:
58 ; AVX512VLBW: # %bb.0:
59 ; AVX512VLBW-NEXT: kmovd %edi, %k1
60 ; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
61 ; AVX512VLBW-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
62 ; AVX512VLBW-NEXT: vpsrlq $63, %xmm0, %xmm0
63 ; AVX512VLBW-NEXT: retq
64 %1 = bitcast i2 %a0 to <2 x i1>
65 %2 = zext <2 x i1> %1 to <2 x i64>
69 define <4 x i32> @ext_i4_4i32(i4 %a0) {
70 ; SSE2-SSSE3-LABEL: ext_i4_4i32:
71 ; SSE2-SSSE3: # %bb.0:
72 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
73 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
74 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
75 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
76 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
77 ; SSE2-SSSE3-NEXT: psrld $31, %xmm0
78 ; SSE2-SSSE3-NEXT: retq
80 ; AVX1-LABEL: ext_i4_4i32:
82 ; AVX1-NEXT: vmovd %edi, %xmm0
83 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
84 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
85 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
86 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
87 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
90 ; AVX2-LABEL: ext_i4_4i32:
92 ; AVX2-NEXT: vmovd %edi, %xmm0
93 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
94 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
95 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
96 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
97 ; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
100 ; AVX512F-LABEL: ext_i4_4i32:
102 ; AVX512F-NEXT: kmovw %edi, %k1
103 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
104 ; AVX512F-NEXT: vpsrld $31, %xmm0, %xmm0
105 ; AVX512F-NEXT: vzeroupper
108 ; AVX512VLBW-LABEL: ext_i4_4i32:
109 ; AVX512VLBW: # %bb.0:
110 ; AVX512VLBW-NEXT: kmovd %edi, %k1
111 ; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
112 ; AVX512VLBW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
113 ; AVX512VLBW-NEXT: vpsrld $31, %xmm0, %xmm0
114 ; AVX512VLBW-NEXT: retq
115 %1 = bitcast i4 %a0 to <4 x i1>
116 %2 = zext <4 x i1> %1 to <4 x i32>
120 define <8 x i16> @ext_i8_8i16(i8 %a0) {
121 ; SSE2-SSSE3-LABEL: ext_i8_8i16:
122 ; SSE2-SSSE3: # %bb.0:
123 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
124 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
125 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
126 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
127 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
128 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
129 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
130 ; SSE2-SSSE3-NEXT: retq
132 ; AVX1-LABEL: ext_i8_8i16:
134 ; AVX1-NEXT: vmovd %edi, %xmm0
135 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
136 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
137 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
138 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
139 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
140 ; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
143 ; AVX2-LABEL: ext_i8_8i16:
145 ; AVX2-NEXT: vmovd %edi, %xmm0
146 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
147 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
148 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
149 ; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
150 ; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
153 ; AVX512F-LABEL: ext_i8_8i16:
155 ; AVX512F-NEXT: kmovw %edi, %k1
156 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
157 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
158 ; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm0
159 ; AVX512F-NEXT: vzeroupper
162 ; AVX512VLBW-LABEL: ext_i8_8i16:
163 ; AVX512VLBW: # %bb.0:
164 ; AVX512VLBW-NEXT: kmovd %edi, %k0
165 ; AVX512VLBW-NEXT: vpmovm2w %k0, %xmm0
166 ; AVX512VLBW-NEXT: vpsrlw $15, %xmm0, %xmm0
167 ; AVX512VLBW-NEXT: retq
168 %1 = bitcast i8 %a0 to <8 x i1>
169 %2 = zext <8 x i1> %1 to <8 x i16>
173 define <16 x i8> @ext_i16_16i8(i16 %a0) {
174 ; SSE2-LABEL: ext_i16_16i8:
176 ; SSE2-NEXT: movd %edi, %xmm0
177 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
178 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
179 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
180 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
181 ; SSE2-NEXT: pand %xmm1, %xmm0
182 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
183 ; SSE2-NEXT: psrlw $7, %xmm0
184 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
187 ; SSSE3-LABEL: ext_i16_16i8:
189 ; SSSE3-NEXT: movd %edi, %xmm0
190 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
191 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
192 ; SSSE3-NEXT: pand %xmm1, %xmm0
193 ; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
194 ; SSSE3-NEXT: psrlw $7, %xmm0
195 ; SSSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
198 ; AVX1-LABEL: ext_i16_16i8:
200 ; AVX1-NEXT: vmovd %edi, %xmm0
201 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
202 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
203 ; AVX1-NEXT: # xmm1 = mem[0,0]
204 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
205 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
206 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
207 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
210 ; AVX2-LABEL: ext_i16_16i8:
212 ; AVX2-NEXT: vmovd %edi, %xmm0
213 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
214 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
215 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
216 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
217 ; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
218 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
221 ; AVX512F-LABEL: ext_i16_16i8:
223 ; AVX512F-NEXT: kmovw %edi, %k1
224 ; AVX512F-NEXT: vpbroadcastd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0 {%k1} {z}
225 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
226 ; AVX512F-NEXT: vzeroupper
229 ; AVX512VLBW-LABEL: ext_i16_16i8:
230 ; AVX512VLBW: # %bb.0:
231 ; AVX512VLBW-NEXT: kmovd %edi, %k1
232 ; AVX512VLBW-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z}
233 ; AVX512VLBW-NEXT: retq
234 %1 = bitcast i16 %a0 to <16 x i1>
235 %2 = zext <16 x i1> %1 to <16 x i8>
243 define <4 x i64> @ext_i4_4i64(i4 %a0) {
244 ; SSE2-SSSE3-LABEL: ext_i4_4i64:
245 ; SSE2-SSSE3: # %bb.0:
246 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
247 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
248 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
249 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
250 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
251 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
252 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
253 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
254 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
255 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
256 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
257 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
258 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
259 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
260 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
261 ; SSE2-SSSE3-NEXT: retq
263 ; AVX1-LABEL: ext_i4_4i64:
265 ; AVX1-NEXT: vmovd %edi, %xmm0
266 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
267 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
268 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
269 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
270 ; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
271 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
272 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
273 ; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
274 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
277 ; AVX2-LABEL: ext_i4_4i64:
279 ; AVX2-NEXT: vmovd %edi, %xmm0
280 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
281 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
282 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
283 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
284 ; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
287 ; AVX512F-LABEL: ext_i4_4i64:
289 ; AVX512F-NEXT: kmovw %edi, %k1
290 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
291 ; AVX512F-NEXT: vpsrlq $63, %ymm0, %ymm0
294 ; AVX512VLBW-LABEL: ext_i4_4i64:
295 ; AVX512VLBW: # %bb.0:
296 ; AVX512VLBW-NEXT: kmovd %edi, %k1
297 ; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
298 ; AVX512VLBW-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
299 ; AVX512VLBW-NEXT: vpsrlq $63, %ymm0, %ymm0
300 ; AVX512VLBW-NEXT: retq
301 %1 = bitcast i4 %a0 to <4 x i1>
302 %2 = zext <4 x i1> %1 to <4 x i64>
306 define <8 x i32> @ext_i8_8i32(i8 %a0) {
307 ; SSE2-SSSE3-LABEL: ext_i8_8i32:
308 ; SSE2-SSSE3: # %bb.0:
309 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
310 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
311 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
312 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
313 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
314 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
315 ; SSE2-SSSE3-NEXT: psrld $31, %xmm0
316 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
317 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
318 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
319 ; SSE2-SSSE3-NEXT: psrld $31, %xmm1
320 ; SSE2-SSSE3-NEXT: retq
322 ; AVX1-LABEL: ext_i8_8i32:
324 ; AVX1-NEXT: vmovd %edi, %xmm0
325 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
326 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
327 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
328 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
329 ; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
330 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
331 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
332 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
333 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
336 ; AVX2-LABEL: ext_i8_8i32:
338 ; AVX2-NEXT: vmovd %edi, %xmm0
339 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
340 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
341 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
342 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
343 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
346 ; AVX512F-LABEL: ext_i8_8i32:
348 ; AVX512F-NEXT: kmovw %edi, %k1
349 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
350 ; AVX512F-NEXT: vpsrld $31, %ymm0, %ymm0
353 ; AVX512VLBW-LABEL: ext_i8_8i32:
354 ; AVX512VLBW: # %bb.0:
355 ; AVX512VLBW-NEXT: kmovd %edi, %k1
356 ; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
357 ; AVX512VLBW-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
358 ; AVX512VLBW-NEXT: vpsrld $31, %ymm0, %ymm0
359 ; AVX512VLBW-NEXT: retq
360 %1 = bitcast i8 %a0 to <8 x i1>
361 %2 = zext <8 x i1> %1 to <8 x i32>
365 define <16 x i16> @ext_i16_16i16(i16 %a0) {
366 ; SSE2-SSSE3-LABEL: ext_i16_16i16:
367 ; SSE2-SSSE3: # %bb.0:
368 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
369 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
370 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
371 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
372 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
373 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
374 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
375 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
376 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
377 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
378 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
379 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
380 ; SSE2-SSSE3-NEXT: retq
382 ; AVX1-LABEL: ext_i16_16i16:
384 ; AVX1-NEXT: vmovd %edi, %xmm0
385 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
386 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
387 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
388 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
389 ; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
390 ; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
391 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
392 ; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
393 ; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
394 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
397 ; AVX2-LABEL: ext_i16_16i16:
399 ; AVX2-NEXT: vmovd %edi, %xmm0
400 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
401 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
402 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
403 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
404 ; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
407 ; AVX512F-LABEL: ext_i16_16i16:
409 ; AVX512F-NEXT: kmovw %edi, %k1
410 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
411 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
412 ; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
415 ; AVX512VLBW-LABEL: ext_i16_16i16:
416 ; AVX512VLBW: # %bb.0:
417 ; AVX512VLBW-NEXT: kmovd %edi, %k0
418 ; AVX512VLBW-NEXT: vpmovm2w %k0, %ymm0
419 ; AVX512VLBW-NEXT: vpsrlw $15, %ymm0, %ymm0
420 ; AVX512VLBW-NEXT: retq
421 %1 = bitcast i16 %a0 to <16 x i1>
422 %2 = zext <16 x i1> %1 to <16 x i16>
426 define <32 x i8> @ext_i32_32i8(i32 %a0) {
427 ; SSE2-SSSE3-LABEL: ext_i32_32i8:
428 ; SSE2-SSSE3: # %bb.0:
429 ; SSE2-SSSE3-NEXT: movd %edi, %xmm1
430 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
431 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
432 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
433 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
434 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
435 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
436 ; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
437 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
438 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
439 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
440 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
441 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
442 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
443 ; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
444 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
445 ; SSE2-SSSE3-NEXT: retq
447 ; AVX1-LABEL: ext_i32_32i8:
449 ; AVX1-NEXT: vmovd %edi, %xmm0
450 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
451 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
452 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
453 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
454 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
455 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
456 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
457 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
458 ; AVX1-NEXT: # xmm2 = mem[0,0]
459 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
460 ; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
461 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
462 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
463 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
464 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
465 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
466 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
469 ; AVX2-LABEL: ext_i32_32i8:
471 ; AVX2-NEXT: vmovd %edi, %xmm0
472 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
473 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
474 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
475 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
476 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
477 ; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
478 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
481 ; AVX512F-LABEL: ext_i32_32i8:
483 ; AVX512F-NEXT: kmovw %edi, %k1
484 ; AVX512F-NEXT: shrl $16, %edi
485 ; AVX512F-NEXT: kmovw %edi, %k2
486 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
487 ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
488 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
489 ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z}
490 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
491 ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
494 ; AVX512VLBW-LABEL: ext_i32_32i8:
495 ; AVX512VLBW: # %bb.0:
496 ; AVX512VLBW-NEXT: kmovd %edi, %k1
497 ; AVX512VLBW-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 {%k1} {z}
498 ; AVX512VLBW-NEXT: retq
499 %1 = bitcast i32 %a0 to <32 x i1>
500 %2 = zext <32 x i1> %1 to <32 x i8>
508 define <8 x i64> @ext_i8_8i64(i8 %a0) {
509 ; SSE2-SSSE3-LABEL: ext_i8_8i64:
510 ; SSE2-SSSE3: # %bb.0:
511 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
512 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
513 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
514 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
515 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
516 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
517 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
518 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
519 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
520 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
521 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
522 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
523 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
524 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
525 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
526 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
527 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
528 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
529 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
530 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
531 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
532 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
533 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm2
534 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
535 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
536 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
537 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
538 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
539 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm3
540 ; SSE2-SSSE3-NEXT: retq
542 ; AVX1-LABEL: ext_i8_8i64:
544 ; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
545 ; AVX1-NEXT: vmovq %rdi, %xmm0
546 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
547 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
548 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
549 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
550 ; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
551 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
552 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
553 ; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
554 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
555 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
556 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
557 ; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
558 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
559 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
560 ; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
561 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
564 ; AVX2-LABEL: ext_i8_8i64:
566 ; AVX2-NEXT: vmovd %edi, %xmm0
567 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm1
568 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
569 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
570 ; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
571 ; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
572 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
573 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
574 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
575 ; AVX2-NEXT: vpsrlq $63, %ymm1, %ymm1
578 ; AVX512F-LABEL: ext_i8_8i64:
580 ; AVX512F-NEXT: kmovw %edi, %k1
581 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
582 ; AVX512F-NEXT: vpsrlq $63, %zmm0, %zmm0
585 ; AVX512VLBW-LABEL: ext_i8_8i64:
586 ; AVX512VLBW: # %bb.0:
587 ; AVX512VLBW-NEXT: kmovd %edi, %k1
588 ; AVX512VLBW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
589 ; AVX512VLBW-NEXT: vpsrlq $63, %zmm0, %zmm0
590 ; AVX512VLBW-NEXT: retq
591 %1 = bitcast i8 %a0 to <8 x i1>
592 %2 = zext <8 x i1> %1 to <8 x i64>
596 define <16 x i32> @ext_i16_16i32(i16 %a0) {
597 ; SSE2-SSSE3-LABEL: ext_i16_16i32:
598 ; SSE2-SSSE3: # %bb.0:
599 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0
600 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
601 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
602 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
603 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
604 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
605 ; SSE2-SSSE3-NEXT: psrld $31, %xmm0
606 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
607 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
608 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
609 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
610 ; SSE2-SSSE3-NEXT: psrld $31, %xmm1
611 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
612 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
613 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
614 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
615 ; SSE2-SSSE3-NEXT: psrld $31, %xmm2
616 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
617 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
618 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
619 ; SSE2-SSSE3-NEXT: psrld $31, %xmm3
620 ; SSE2-SSSE3-NEXT: retq
622 ; AVX1-LABEL: ext_i16_16i32:
624 ; AVX1-NEXT: vmovd %edi, %xmm0
625 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
626 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
627 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
628 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
629 ; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
630 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
631 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
632 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
633 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
634 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
635 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
636 ; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
637 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
638 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
639 ; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
640 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
643 ; AVX2-LABEL: ext_i16_16i32:
645 ; AVX2-NEXT: vmovd %edi, %xmm0
646 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm1
647 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
648 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
649 ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
650 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
651 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
652 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
653 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
654 ; AVX2-NEXT: vpsrld $31, %ymm1, %ymm1
657 ; AVX512F-LABEL: ext_i16_16i32:
659 ; AVX512F-NEXT: kmovw %edi, %k1
660 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
661 ; AVX512F-NEXT: vpsrld $31, %zmm0, %zmm0
664 ; AVX512VLBW-LABEL: ext_i16_16i32:
665 ; AVX512VLBW: # %bb.0:
666 ; AVX512VLBW-NEXT: kmovd %edi, %k1
667 ; AVX512VLBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
668 ; AVX512VLBW-NEXT: vpsrld $31, %zmm0, %zmm0
669 ; AVX512VLBW-NEXT: retq
670 %1 = bitcast i16 %a0 to <16 x i1>
671 %2 = zext <16 x i1> %1 to <16 x i32>
675 define <32 x i16> @ext_i32_32i16(i32 %a0) {
676 ; SSE2-SSSE3-LABEL: ext_i32_32i16:
677 ; SSE2-SSSE3: # %bb.0:
678 ; SSE2-SSSE3-NEXT: movd %edi, %xmm2
679 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7]
680 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
681 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
682 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
683 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
684 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
685 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
686 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
687 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
688 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
689 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
690 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
691 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
692 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
693 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
694 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
695 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm2
696 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
697 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
698 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm3
699 ; SSE2-SSSE3-NEXT: retq
701 ; AVX1-LABEL: ext_i32_32i16:
703 ; AVX1-NEXT: vmovd %edi, %xmm1
704 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7]
705 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
706 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
707 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
708 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
709 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128]
710 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4
711 ; AVX1-NEXT: vpsrlw $15, %xmm4, %xmm4
712 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
713 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
714 ; AVX1-NEXT: vpcmpeqw %xmm5, %xmm0, %xmm0
715 ; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
716 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
717 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
718 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
719 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
720 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
721 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm2
722 ; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
723 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
724 ; AVX1-NEXT: vpcmpeqw %xmm5, %xmm1, %xmm1
725 ; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
726 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
729 ; AVX2-LABEL: ext_i32_32i16:
731 ; AVX2-NEXT: vmovd %edi, %xmm0
732 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
733 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
734 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
735 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
736 ; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
737 ; AVX2-NEXT: shrl $16, %edi
738 ; AVX2-NEXT: vmovd %edi, %xmm2
739 ; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
740 ; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
741 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
742 ; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm1
745 ; AVX512F-LABEL: ext_i32_32i16:
747 ; AVX512F-NEXT: kmovw %edi, %k1
748 ; AVX512F-NEXT: shrl $16, %edi
749 ; AVX512F-NEXT: kmovw %edi, %k2
750 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
751 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
752 ; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
753 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
754 ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
755 ; AVX512F-NEXT: vpsrlw $15, %ymm1, %ymm1
756 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
759 ; AVX512VLBW-LABEL: ext_i32_32i16:
760 ; AVX512VLBW: # %bb.0:
761 ; AVX512VLBW-NEXT: kmovd %edi, %k0
762 ; AVX512VLBW-NEXT: vpmovm2w %k0, %zmm0
763 ; AVX512VLBW-NEXT: vpsrlw $15, %zmm0, %zmm0
764 ; AVX512VLBW-NEXT: retq
765 %1 = bitcast i32 %a0 to <32 x i1>
766 %2 = zext <32 x i1> %1 to <32 x i16>
770 define <64 x i8> @ext_i64_64i8(i64 %a0) {
771 ; SSE2-SSSE3-LABEL: ext_i64_64i8:
772 ; SSE2-SSSE3: # %bb.0:
773 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
774 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
775 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
776 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
777 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
778 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
779 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
780 ; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
781 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
782 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
783 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
784 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
785 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
786 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
787 ; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
788 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
789 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
790 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
791 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
792 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
793 ; SSE2-SSSE3-NEXT: psrlw $7, %xmm2
794 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2
795 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
796 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
797 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
798 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
799 ; SSE2-SSSE3-NEXT: psrlw $7, %xmm3
800 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
801 ; SSE2-SSSE3-NEXT: retq
803 ; AVX1-LABEL: ext_i64_64i8:
805 ; AVX1-NEXT: vmovq %rdi, %xmm0
806 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
807 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
808 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
809 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
810 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
811 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
812 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
813 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
814 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
815 ; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
816 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
817 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
818 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
819 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
820 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
821 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
822 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
823 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
824 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
825 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
826 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
827 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
828 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
829 ; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
830 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
831 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
832 ; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
833 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
834 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
837 ; AVX2-LABEL: ext_i64_64i8:
839 ; AVX2-NEXT: vmovq %rdi, %xmm0
840 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
841 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
842 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
843 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
844 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
845 ; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
846 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
847 ; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
848 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
849 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
850 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
851 ; AVX2-NEXT: vpsrlw $7, %ymm1, %ymm1
852 ; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
855 ; AVX512F-LABEL: ext_i64_64i8:
857 ; AVX512F-NEXT: movq %rdi, %rax
858 ; AVX512F-NEXT: movl %edi, %ecx
859 ; AVX512F-NEXT: kmovw %edi, %k1
860 ; AVX512F-NEXT: shrq $32, %rdi
861 ; AVX512F-NEXT: shrq $48, %rax
862 ; AVX512F-NEXT: shrl $16, %ecx
863 ; AVX512F-NEXT: kmovw %ecx, %k2
864 ; AVX512F-NEXT: kmovw %eax, %k3
865 ; AVX512F-NEXT: kmovw %edi, %k4
866 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
867 ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k4} {z}
868 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
869 ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm2 {%k3} {z}
870 ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
871 ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
872 ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
873 ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
874 ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z}
875 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
876 ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
877 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
880 ; AVX512VLBW-LABEL: ext_i64_64i8:
881 ; AVX512VLBW: # %bb.0:
882 ; AVX512VLBW-NEXT: kmovq %rdi, %k1
883 ; AVX512VLBW-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0 {%k1} {z}
884 ; AVX512VLBW-NEXT: retq
885 %1 = bitcast i64 %a0 to <64 x i1>
886 %2 = zext <64 x i1> %1 to <64 x i8>