1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
10 ; Just two 32-bit runs to make sure we do reasonable things there.
11 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE2
12 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE41
14 define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
15 ; SSE2-LABEL: sext_16i8_to_8i16:
16 ; SSE2: # %bb.0: # %entry
17 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
18 ; SSE2-NEXT: psraw $8, %xmm0
21 ; SSSE3-LABEL: sext_16i8_to_8i16:
22 ; SSSE3: # %bb.0: # %entry
23 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
24 ; SSSE3-NEXT: psraw $8, %xmm0
27 ; SSE41-LABEL: sext_16i8_to_8i16:
28 ; SSE41: # %bb.0: # %entry
29 ; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
32 ; AVX-LABEL: sext_16i8_to_8i16:
33 ; AVX: # %bb.0: # %entry
34 ; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
37 ; X86-SSE2-LABEL: sext_16i8_to_8i16:
38 ; X86-SSE2: # %bb.0: # %entry
39 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
40 ; X86-SSE2-NEXT: psraw $8, %xmm0
43 ; X86-SSE41-LABEL: sext_16i8_to_8i16:
44 ; X86-SSE41: # %bb.0: # %entry
45 ; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm0
46 ; X86-SSE41-NEXT: retl
48 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
49 %C = sext <8 x i8> %B to <8 x i16>
53 define <16 x i16> @sext_16i8_to_16i16(<16 x i8> %A) nounwind uwtable readnone ssp {
54 ; SSE2-LABEL: sext_16i8_to_16i16:
55 ; SSE2: # %bb.0: # %entry
56 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
57 ; SSE2-NEXT: psraw $8, %xmm2
58 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
59 ; SSE2-NEXT: psraw $8, %xmm1
60 ; SSE2-NEXT: movdqa %xmm2, %xmm0
63 ; SSSE3-LABEL: sext_16i8_to_16i16:
64 ; SSSE3: # %bb.0: # %entry
65 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
66 ; SSSE3-NEXT: psraw $8, %xmm2
67 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
68 ; SSSE3-NEXT: psraw $8, %xmm1
69 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
72 ; SSE41-LABEL: sext_16i8_to_16i16:
73 ; SSE41: # %bb.0: # %entry
74 ; SSE41-NEXT: pmovsxbw %xmm0, %xmm2
75 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
76 ; SSE41-NEXT: pmovsxbw %xmm0, %xmm1
77 ; SSE41-NEXT: movdqa %xmm2, %xmm0
80 ; AVX1-LABEL: sext_16i8_to_16i16:
81 ; AVX1: # %bb.0: # %entry
82 ; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1
83 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
84 ; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0
85 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
88 ; AVX2-LABEL: sext_16i8_to_16i16:
89 ; AVX2: # %bb.0: # %entry
90 ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
93 ; AVX512-LABEL: sext_16i8_to_16i16:
94 ; AVX512: # %bb.0: # %entry
95 ; AVX512-NEXT: vpmovsxbw %xmm0, %ymm0
98 ; X86-SSE2-LABEL: sext_16i8_to_16i16:
99 ; X86-SSE2: # %bb.0: # %entry
100 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
101 ; X86-SSE2-NEXT: psraw $8, %xmm2
102 ; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
103 ; X86-SSE2-NEXT: psraw $8, %xmm1
104 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
105 ; X86-SSE2-NEXT: retl
107 ; X86-SSE41-LABEL: sext_16i8_to_16i16:
108 ; X86-SSE41: # %bb.0: # %entry
109 ; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm2
110 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
111 ; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm1
112 ; X86-SSE41-NEXT: movdqa %xmm2, %xmm0
113 ; X86-SSE41-NEXT: retl
115 %B = sext <16 x i8> %A to <16 x i16>
119 define <32 x i16> @sext_32i8_to_32i16(<32 x i8> %A) nounwind uwtable readnone ssp {
120 ; SSE2-LABEL: sext_32i8_to_32i16:
121 ; SSE2: # %bb.0: # %entry
122 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
123 ; SSE2-NEXT: psraw $8, %xmm4
124 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
125 ; SSE2-NEXT: psraw $8, %xmm5
126 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
127 ; SSE2-NEXT: psraw $8, %xmm2
128 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15]
129 ; SSE2-NEXT: psraw $8, %xmm3
130 ; SSE2-NEXT: movdqa %xmm4, %xmm0
131 ; SSE2-NEXT: movdqa %xmm5, %xmm1
134 ; SSSE3-LABEL: sext_32i8_to_32i16:
135 ; SSSE3: # %bb.0: # %entry
136 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
137 ; SSSE3-NEXT: psraw $8, %xmm4
138 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
139 ; SSSE3-NEXT: psraw $8, %xmm5
140 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
141 ; SSSE3-NEXT: psraw $8, %xmm2
142 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15]
143 ; SSSE3-NEXT: psraw $8, %xmm3
144 ; SSSE3-NEXT: movdqa %xmm4, %xmm0
145 ; SSSE3-NEXT: movdqa %xmm5, %xmm1
148 ; SSE41-LABEL: sext_32i8_to_32i16:
149 ; SSE41: # %bb.0: # %entry
150 ; SSE41-NEXT: pmovsxbw %xmm0, %xmm5
151 ; SSE41-NEXT: pmovsxbw %xmm1, %xmm2
152 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
153 ; SSE41-NEXT: pmovsxbw %xmm0, %xmm4
154 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
155 ; SSE41-NEXT: pmovsxbw %xmm0, %xmm3
156 ; SSE41-NEXT: movdqa %xmm5, %xmm0
157 ; SSE41-NEXT: movdqa %xmm4, %xmm1
160 ; AVX1-LABEL: sext_32i8_to_32i16:
161 ; AVX1: # %bb.0: # %entry
162 ; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1
163 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
164 ; AVX1-NEXT: vpmovsxbw %xmm2, %xmm2
165 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
166 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
167 ; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1
168 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
169 ; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0
170 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
171 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
174 ; AVX2-LABEL: sext_32i8_to_32i16:
175 ; AVX2: # %bb.0: # %entry
176 ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm2
177 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
178 ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm1
179 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
182 ; AVX512F-LABEL: sext_32i8_to_32i16:
183 ; AVX512F: # %bb.0: # %entry
184 ; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1
185 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
186 ; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0
187 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
190 ; AVX512BW-LABEL: sext_32i8_to_32i16:
191 ; AVX512BW: # %bb.0: # %entry
192 ; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
193 ; AVX512BW-NEXT: retq
195 ; X86-SSE2-LABEL: sext_32i8_to_32i16:
196 ; X86-SSE2: # %bb.0: # %entry
197 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
198 ; X86-SSE2-NEXT: psraw $8, %xmm4
199 ; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
200 ; X86-SSE2-NEXT: psraw $8, %xmm5
201 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
202 ; X86-SSE2-NEXT: psraw $8, %xmm2
203 ; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15]
204 ; X86-SSE2-NEXT: psraw $8, %xmm3
205 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm0
206 ; X86-SSE2-NEXT: movdqa %xmm5, %xmm1
207 ; X86-SSE2-NEXT: retl
209 ; X86-SSE41-LABEL: sext_32i8_to_32i16:
210 ; X86-SSE41: # %bb.0: # %entry
211 ; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm5
212 ; X86-SSE41-NEXT: pmovsxbw %xmm1, %xmm2
213 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
214 ; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm4
215 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
216 ; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm3
217 ; X86-SSE41-NEXT: movdqa %xmm5, %xmm0
218 ; X86-SSE41-NEXT: movdqa %xmm4, %xmm1
219 ; X86-SSE41-NEXT: retl
221 %B = sext <32 x i8> %A to <32 x i16>
225 define <4 x i32> @sext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
226 ; SSE2-LABEL: sext_16i8_to_4i32:
227 ; SSE2: # %bb.0: # %entry
228 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
229 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
230 ; SSE2-NEXT: psrad $24, %xmm0
233 ; SSSE3-LABEL: sext_16i8_to_4i32:
234 ; SSSE3: # %bb.0: # %entry
235 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
236 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
237 ; SSSE3-NEXT: psrad $24, %xmm0
240 ; SSE41-LABEL: sext_16i8_to_4i32:
241 ; SSE41: # %bb.0: # %entry
242 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
245 ; AVX-LABEL: sext_16i8_to_4i32:
246 ; AVX: # %bb.0: # %entry
247 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
250 ; X86-SSE2-LABEL: sext_16i8_to_4i32:
251 ; X86-SSE2: # %bb.0: # %entry
252 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
253 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
254 ; X86-SSE2-NEXT: psrad $24, %xmm0
255 ; X86-SSE2-NEXT: retl
257 ; X86-SSE41-LABEL: sext_16i8_to_4i32:
258 ; X86-SSE41: # %bb.0: # %entry
259 ; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm0
260 ; X86-SSE41-NEXT: retl
262 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
263 %C = sext <4 x i8> %B to <4 x i32>
267 define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
268 ; SSE2-LABEL: sext_16i8_to_8i32:
269 ; SSE2: # %bb.0: # %entry
270 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
271 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
272 ; SSE2-NEXT: psrad $24, %xmm0
273 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
274 ; SSE2-NEXT: psrad $24, %xmm1
277 ; SSSE3-LABEL: sext_16i8_to_8i32:
278 ; SSSE3: # %bb.0: # %entry
279 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
280 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
281 ; SSSE3-NEXT: psrad $24, %xmm0
282 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
283 ; SSSE3-NEXT: psrad $24, %xmm1
286 ; SSE41-LABEL: sext_16i8_to_8i32:
287 ; SSE41: # %bb.0: # %entry
288 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm2
289 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
290 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm1
291 ; SSE41-NEXT: movdqa %xmm2, %xmm0
294 ; AVX1-LABEL: sext_16i8_to_8i32:
295 ; AVX1: # %bb.0: # %entry
296 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
297 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
298 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
299 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
302 ; AVX2-LABEL: sext_16i8_to_8i32:
303 ; AVX2: # %bb.0: # %entry
304 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
307 ; AVX512-LABEL: sext_16i8_to_8i32:
308 ; AVX512: # %bb.0: # %entry
309 ; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0
312 ; X86-SSE2-LABEL: sext_16i8_to_8i32:
313 ; X86-SSE2: # %bb.0: # %entry
314 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
315 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
316 ; X86-SSE2-NEXT: psrad $24, %xmm0
317 ; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
318 ; X86-SSE2-NEXT: psrad $24, %xmm1
319 ; X86-SSE2-NEXT: retl
321 ; X86-SSE41-LABEL: sext_16i8_to_8i32:
322 ; X86-SSE41: # %bb.0: # %entry
323 ; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm2
324 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
325 ; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm1
326 ; X86-SSE41-NEXT: movdqa %xmm2, %xmm0
327 ; X86-SSE41-NEXT: retl
329 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
330 %C = sext <8 x i8> %B to <8 x i32>
334 define <16 x i32> @sext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp {
335 ; SSE2-LABEL: sext_16i8_to_16i32:
336 ; SSE2: # %bb.0: # %entry
337 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
338 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3]
339 ; SSE2-NEXT: psrad $24, %xmm4
340 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
341 ; SSE2-NEXT: psrad $24, %xmm1
342 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
343 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
344 ; SSE2-NEXT: psrad $24, %xmm2
345 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
346 ; SSE2-NEXT: psrad $24, %xmm3
347 ; SSE2-NEXT: movdqa %xmm4, %xmm0
350 ; SSSE3-LABEL: sext_16i8_to_16i32:
351 ; SSSE3: # %bb.0: # %entry
352 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
353 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3]
354 ; SSSE3-NEXT: psrad $24, %xmm4
355 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
356 ; SSSE3-NEXT: psrad $24, %xmm1
357 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
358 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
359 ; SSSE3-NEXT: psrad $24, %xmm2
360 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
361 ; SSSE3-NEXT: psrad $24, %xmm3
362 ; SSSE3-NEXT: movdqa %xmm4, %xmm0
365 ; SSE41-LABEL: sext_16i8_to_16i32:
366 ; SSE41: # %bb.0: # %entry
367 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm4
368 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
369 ; SSE41-NEXT: pmovsxbd %xmm1, %xmm1
370 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
371 ; SSE41-NEXT: pmovsxbd %xmm2, %xmm2
372 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
373 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm3
374 ; SSE41-NEXT: movdqa %xmm4, %xmm0
377 ; AVX1-LABEL: sext_16i8_to_16i32:
378 ; AVX1: # %bb.0: # %entry
379 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
380 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
381 ; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
382 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
383 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
384 ; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
385 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
386 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
387 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
388 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
391 ; AVX2-LABEL: sext_16i8_to_16i32:
392 ; AVX2: # %bb.0: # %entry
393 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm2
394 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
395 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm1
396 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
399 ; AVX512-LABEL: sext_16i8_to_16i32:
400 ; AVX512: # %bb.0: # %entry
401 ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
404 ; X86-SSE2-LABEL: sext_16i8_to_16i32:
405 ; X86-SSE2: # %bb.0: # %entry
406 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
407 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3]
408 ; X86-SSE2-NEXT: psrad $24, %xmm4
409 ; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
410 ; X86-SSE2-NEXT: psrad $24, %xmm1
411 ; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
412 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
413 ; X86-SSE2-NEXT: psrad $24, %xmm2
414 ; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
415 ; X86-SSE2-NEXT: psrad $24, %xmm3
416 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm0
417 ; X86-SSE2-NEXT: retl
419 ; X86-SSE41-LABEL: sext_16i8_to_16i32:
420 ; X86-SSE41: # %bb.0: # %entry
421 ; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm4
422 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
423 ; X86-SSE41-NEXT: pmovsxbd %xmm1, %xmm1
424 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
425 ; X86-SSE41-NEXT: pmovsxbd %xmm2, %xmm2
426 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
427 ; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm3
428 ; X86-SSE41-NEXT: movdqa %xmm4, %xmm0
429 ; X86-SSE41-NEXT: retl
431 %B = sext <16 x i8> %A to <16 x i32>
435 define <2 x i64> @sext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
436 ; SSE2-LABEL: sext_16i8_to_2i64:
437 ; SSE2: # %bb.0: # %entry
438 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
439 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
440 ; SSE2-NEXT: pxor %xmm1, %xmm1
441 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
442 ; SSE2-NEXT: psrad $24, %xmm0
443 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
446 ; SSSE3-LABEL: sext_16i8_to_2i64:
447 ; SSSE3: # %bb.0: # %entry
448 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
449 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
450 ; SSSE3-NEXT: pxor %xmm1, %xmm1
451 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
452 ; SSSE3-NEXT: psrad $24, %xmm0
453 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
456 ; SSE41-LABEL: sext_16i8_to_2i64:
457 ; SSE41: # %bb.0: # %entry
458 ; SSE41-NEXT: pmovsxbq %xmm0, %xmm0
461 ; AVX-LABEL: sext_16i8_to_2i64:
462 ; AVX: # %bb.0: # %entry
463 ; AVX-NEXT: vpmovsxbq %xmm0, %xmm0
466 ; X86-SSE2-LABEL: sext_16i8_to_2i64:
467 ; X86-SSE2: # %bb.0: # %entry
468 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
469 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
470 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
471 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
472 ; X86-SSE2-NEXT: psrad $24, %xmm0
473 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
474 ; X86-SSE2-NEXT: retl
476 ; X86-SSE41-LABEL: sext_16i8_to_2i64:
477 ; X86-SSE41: # %bb.0: # %entry
478 ; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm0
479 ; X86-SSE41-NEXT: retl
481 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
482 %C = sext <2 x i8> %B to <2 x i64>
486 define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
487 ; SSE2-LABEL: sext_16i8_to_4i64:
488 ; SSE2: # %bb.0: # %entry
489 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
490 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
491 ; SSE2-NEXT: psrad $24, %xmm1
492 ; SSE2-NEXT: pxor %xmm2, %xmm2
493 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
494 ; SSE2-NEXT: movdqa %xmm1, %xmm0
495 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
496 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
499 ; SSSE3-LABEL: sext_16i8_to_4i64:
500 ; SSSE3: # %bb.0: # %entry
501 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
502 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
503 ; SSSE3-NEXT: psrad $24, %xmm1
504 ; SSSE3-NEXT: pxor %xmm2, %xmm2
505 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
506 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
507 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
508 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
511 ; SSE41-LABEL: sext_16i8_to_4i64:
512 ; SSE41: # %bb.0: # %entry
513 ; SSE41-NEXT: pmovsxbq %xmm0, %xmm2
514 ; SSE41-NEXT: psrld $16, %xmm0
515 ; SSE41-NEXT: pmovsxbq %xmm0, %xmm1
516 ; SSE41-NEXT: movdqa %xmm2, %xmm0
519 ; AVX1-LABEL: sext_16i8_to_4i64:
520 ; AVX1: # %bb.0: # %entry
521 ; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1
522 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
523 ; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0
524 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
527 ; AVX2-LABEL: sext_16i8_to_4i64:
528 ; AVX2: # %bb.0: # %entry
529 ; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0
532 ; AVX512-LABEL: sext_16i8_to_4i64:
533 ; AVX512: # %bb.0: # %entry
534 ; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0
537 ; X86-SSE2-LABEL: sext_16i8_to_4i64:
538 ; X86-SSE2: # %bb.0: # %entry
539 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
540 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
541 ; X86-SSE2-NEXT: psrad $24, %xmm1
542 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
543 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
544 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
545 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
546 ; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
547 ; X86-SSE2-NEXT: retl
549 ; X86-SSE41-LABEL: sext_16i8_to_4i64:
550 ; X86-SSE41: # %bb.0: # %entry
551 ; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm2
552 ; X86-SSE41-NEXT: psrld $16, %xmm0
553 ; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm1
554 ; X86-SSE41-NEXT: movdqa %xmm2, %xmm0
555 ; X86-SSE41-NEXT: retl
557 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
558 %C = sext <4 x i8> %B to <4 x i64>
562 define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp {
563 ; SSE2-LABEL: sext_16i8_to_8i64:
564 ; SSE2: # %bb.0: # %entry
565 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
566 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
567 ; SSE2-NEXT: psrad $24, %xmm1
568 ; SSE2-NEXT: pxor %xmm4, %xmm4
569 ; SSE2-NEXT: pxor %xmm3, %xmm3
570 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
571 ; SSE2-NEXT: movdqa %xmm1, %xmm0
572 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
573 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
574 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
575 ; SSE2-NEXT: psrad $24, %xmm3
576 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
577 ; SSE2-NEXT: movdqa %xmm3, %xmm2
578 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
579 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
582 ; SSSE3-LABEL: sext_16i8_to_8i64:
583 ; SSSE3: # %bb.0: # %entry
584 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
585 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
586 ; SSSE3-NEXT: psrad $24, %xmm1
587 ; SSSE3-NEXT: pxor %xmm4, %xmm4
588 ; SSSE3-NEXT: pxor %xmm3, %xmm3
589 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
590 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
591 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
592 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
593 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
594 ; SSSE3-NEXT: psrad $24, %xmm3
595 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
596 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
597 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
598 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
601 ; SSE41-LABEL: sext_16i8_to_8i64:
602 ; SSE41: # %bb.0: # %entry
603 ; SSE41-NEXT: pmovsxbq %xmm0, %xmm4
604 ; SSE41-NEXT: movdqa %xmm0, %xmm1
605 ; SSE41-NEXT: psrld $16, %xmm1
606 ; SSE41-NEXT: pmovsxbq %xmm1, %xmm1
607 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
608 ; SSE41-NEXT: pmovsxbq %xmm2, %xmm2
609 ; SSE41-NEXT: psrlq $48, %xmm0
610 ; SSE41-NEXT: pmovsxbq %xmm0, %xmm3
611 ; SSE41-NEXT: movdqa %xmm4, %xmm0
614 ; AVX1-LABEL: sext_16i8_to_8i64:
615 ; AVX1: # %bb.0: # %entry
616 ; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1
617 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2
618 ; AVX1-NEXT: vpmovsxbq %xmm2, %xmm2
619 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
620 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
621 ; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1
622 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
623 ; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0
624 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
625 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
628 ; AVX2-LABEL: sext_16i8_to_8i64:
629 ; AVX2: # %bb.0: # %entry
630 ; AVX2-NEXT: vpmovsxbq %xmm0, %ymm2
631 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
632 ; AVX2-NEXT: vpmovsxbq %xmm0, %ymm1
633 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
636 ; AVX512-LABEL: sext_16i8_to_8i64:
637 ; AVX512: # %bb.0: # %entry
638 ; AVX512-NEXT: vpmovsxbq %xmm0, %zmm0
641 ; X86-SSE2-LABEL: sext_16i8_to_8i64:
642 ; X86-SSE2: # %bb.0: # %entry
643 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
644 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
645 ; X86-SSE2-NEXT: psrad $24, %xmm1
646 ; X86-SSE2-NEXT: pxor %xmm4, %xmm4
647 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
648 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
649 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
650 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
651 ; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
652 ; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
653 ; X86-SSE2-NEXT: psrad $24, %xmm3
654 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
655 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm2
656 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
657 ; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
658 ; X86-SSE2-NEXT: retl
660 ; X86-SSE41-LABEL: sext_16i8_to_8i64:
661 ; X86-SSE41: # %bb.0: # %entry
662 ; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm4
663 ; X86-SSE41-NEXT: movdqa %xmm0, %xmm1
664 ; X86-SSE41-NEXT: psrld $16, %xmm1
665 ; X86-SSE41-NEXT: pmovsxbq %xmm1, %xmm1
666 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
667 ; X86-SSE41-NEXT: pmovsxbq %xmm2, %xmm2
668 ; X86-SSE41-NEXT: psrlq $48, %xmm0
669 ; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm3
670 ; X86-SSE41-NEXT: movdqa %xmm4, %xmm0
671 ; X86-SSE41-NEXT: retl
673 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
674 %C = sext <8 x i8> %B to <8 x i64>
678 define <4 x i32> @sext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
679 ; SSE2-LABEL: sext_8i16_to_4i32:
680 ; SSE2: # %bb.0: # %entry
681 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
682 ; SSE2-NEXT: psrad $16, %xmm0
685 ; SSSE3-LABEL: sext_8i16_to_4i32:
686 ; SSSE3: # %bb.0: # %entry
687 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
688 ; SSSE3-NEXT: psrad $16, %xmm0
691 ; SSE41-LABEL: sext_8i16_to_4i32:
692 ; SSE41: # %bb.0: # %entry
693 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
696 ; AVX-LABEL: sext_8i16_to_4i32:
697 ; AVX: # %bb.0: # %entry
698 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
701 ; X86-SSE2-LABEL: sext_8i16_to_4i32:
702 ; X86-SSE2: # %bb.0: # %entry
703 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
704 ; X86-SSE2-NEXT: psrad $16, %xmm0
705 ; X86-SSE2-NEXT: retl
707 ; X86-SSE41-LABEL: sext_8i16_to_4i32:
708 ; X86-SSE41: # %bb.0: # %entry
709 ; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm0
710 ; X86-SSE41-NEXT: retl
712 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
713 %C = sext <4 x i16> %B to <4 x i32>
717 define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
718 ; SSE2-LABEL: sext_8i16_to_8i32:
719 ; SSE2: # %bb.0: # %entry
720 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
721 ; SSE2-NEXT: psrad $16, %xmm2
722 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
723 ; SSE2-NEXT: psrad $16, %xmm1
724 ; SSE2-NEXT: movdqa %xmm2, %xmm0
727 ; SSSE3-LABEL: sext_8i16_to_8i32:
728 ; SSSE3: # %bb.0: # %entry
729 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
730 ; SSSE3-NEXT: psrad $16, %xmm2
731 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
732 ; SSSE3-NEXT: psrad $16, %xmm1
733 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
736 ; SSE41-LABEL: sext_8i16_to_8i32:
737 ; SSE41: # %bb.0: # %entry
738 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm2
739 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
740 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm1
741 ; SSE41-NEXT: movdqa %xmm2, %xmm0
744 ; AVX1-LABEL: sext_8i16_to_8i32:
745 ; AVX1: # %bb.0: # %entry
746 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
747 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
748 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
749 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
752 ; AVX2-LABEL: sext_8i16_to_8i32:
753 ; AVX2: # %bb.0: # %entry
754 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
757 ; AVX512-LABEL: sext_8i16_to_8i32:
758 ; AVX512: # %bb.0: # %entry
759 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
762 ; X86-SSE2-LABEL: sext_8i16_to_8i32:
763 ; X86-SSE2: # %bb.0: # %entry
764 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
765 ; X86-SSE2-NEXT: psrad $16, %xmm2
766 ; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
767 ; X86-SSE2-NEXT: psrad $16, %xmm1
768 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
769 ; X86-SSE2-NEXT: retl
771 ; X86-SSE41-LABEL: sext_8i16_to_8i32:
772 ; X86-SSE41: # %bb.0: # %entry
773 ; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm2
774 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
775 ; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm1
776 ; X86-SSE41-NEXT: movdqa %xmm2, %xmm0
777 ; X86-SSE41-NEXT: retl
779 %B = sext <8 x i16> %A to <8 x i32>
783 define <16 x i32> @sext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp {
784 ; SSE2-LABEL: sext_16i16_to_16i32:
785 ; SSE2: # %bb.0: # %entry
786 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
787 ; SSE2-NEXT: psrad $16, %xmm4
788 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
789 ; SSE2-NEXT: psrad $16, %xmm5
790 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
791 ; SSE2-NEXT: psrad $16, %xmm2
792 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
793 ; SSE2-NEXT: psrad $16, %xmm3
794 ; SSE2-NEXT: movdqa %xmm4, %xmm0
795 ; SSE2-NEXT: movdqa %xmm5, %xmm1
798 ; SSSE3-LABEL: sext_16i16_to_16i32:
799 ; SSSE3: # %bb.0: # %entry
800 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
801 ; SSSE3-NEXT: psrad $16, %xmm4
802 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
803 ; SSSE3-NEXT: psrad $16, %xmm5
804 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
805 ; SSSE3-NEXT: psrad $16, %xmm2
806 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
807 ; SSSE3-NEXT: psrad $16, %xmm3
808 ; SSSE3-NEXT: movdqa %xmm4, %xmm0
809 ; SSSE3-NEXT: movdqa %xmm5, %xmm1
812 ; SSE41-LABEL: sext_16i16_to_16i32:
813 ; SSE41: # %bb.0: # %entry
814 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm5
815 ; SSE41-NEXT: pmovsxwd %xmm1, %xmm2
816 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
817 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm4
818 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
819 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm3
820 ; SSE41-NEXT: movdqa %xmm5, %xmm0
821 ; SSE41-NEXT: movdqa %xmm4, %xmm1
824 ; AVX1-LABEL: sext_16i16_to_16i32:
825 ; AVX1: # %bb.0: # %entry
826 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
827 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
828 ; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2
829 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
830 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
831 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
832 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
833 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
834 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
835 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
838 ; AVX2-LABEL: sext_16i16_to_16i32:
839 ; AVX2: # %bb.0: # %entry
840 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm2
841 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
842 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm1
843 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
846 ; AVX512-LABEL: sext_16i16_to_16i32:
847 ; AVX512: # %bb.0: # %entry
848 ; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0
851 ; X86-SSE2-LABEL: sext_16i16_to_16i32:
852 ; X86-SSE2: # %bb.0: # %entry
853 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
854 ; X86-SSE2-NEXT: psrad $16, %xmm4
855 ; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
856 ; X86-SSE2-NEXT: psrad $16, %xmm5
857 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
858 ; X86-SSE2-NEXT: psrad $16, %xmm2
859 ; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
860 ; X86-SSE2-NEXT: psrad $16, %xmm3
861 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm0
862 ; X86-SSE2-NEXT: movdqa %xmm5, %xmm1
863 ; X86-SSE2-NEXT: retl
865 ; X86-SSE41-LABEL: sext_16i16_to_16i32:
866 ; X86-SSE41: # %bb.0: # %entry
867 ; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm5
868 ; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm2
869 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
870 ; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm4
871 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
872 ; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm3
873 ; X86-SSE41-NEXT: movdqa %xmm5, %xmm0
874 ; X86-SSE41-NEXT: movdqa %xmm4, %xmm1
875 ; X86-SSE41-NEXT: retl
877 %B = sext <16 x i16> %A to <16 x i32>
881 define <2 x i64> @sext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
882 ; SSE2-LABEL: sext_8i16_to_2i64:
883 ; SSE2: # %bb.0: # %entry
884 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
885 ; SSE2-NEXT: pxor %xmm1, %xmm1
886 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
887 ; SSE2-NEXT: psrad $16, %xmm0
888 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
891 ; SSSE3-LABEL: sext_8i16_to_2i64:
892 ; SSSE3: # %bb.0: # %entry
893 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
894 ; SSSE3-NEXT: pxor %xmm1, %xmm1
895 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
896 ; SSSE3-NEXT: psrad $16, %xmm0
897 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
900 ; SSE41-LABEL: sext_8i16_to_2i64:
901 ; SSE41: # %bb.0: # %entry
902 ; SSE41-NEXT: pmovsxwq %xmm0, %xmm0
905 ; AVX-LABEL: sext_8i16_to_2i64:
906 ; AVX: # %bb.0: # %entry
907 ; AVX-NEXT: vpmovsxwq %xmm0, %xmm0
910 ; X86-SSE2-LABEL: sext_8i16_to_2i64:
911 ; X86-SSE2: # %bb.0: # %entry
912 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
913 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
914 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
915 ; X86-SSE2-NEXT: psrad $16, %xmm0
916 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
917 ; X86-SSE2-NEXT: retl
919 ; X86-SSE41-LABEL: sext_8i16_to_2i64:
920 ; X86-SSE41: # %bb.0: # %entry
921 ; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm0
922 ; X86-SSE41-NEXT: retl
924 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
925 %C = sext <2 x i16> %B to <2 x i64>
929 define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
930 ; SSE2-LABEL: sext_8i16_to_4i64:
931 ; SSE2: # %bb.0: # %entry
932 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
933 ; SSE2-NEXT: psrad $16, %xmm1
934 ; SSE2-NEXT: pxor %xmm2, %xmm2
935 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
936 ; SSE2-NEXT: movdqa %xmm1, %xmm0
937 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
938 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
941 ; SSSE3-LABEL: sext_8i16_to_4i64:
942 ; SSSE3: # %bb.0: # %entry
943 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
944 ; SSSE3-NEXT: psrad $16, %xmm1
945 ; SSSE3-NEXT: pxor %xmm2, %xmm2
946 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
947 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
948 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
949 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
952 ; SSE41-LABEL: sext_8i16_to_4i64:
953 ; SSE41: # %bb.0: # %entry
954 ; SSE41-NEXT: pmovsxwq %xmm0, %xmm2
955 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
956 ; SSE41-NEXT: pmovsxwq %xmm0, %xmm1
957 ; SSE41-NEXT: movdqa %xmm2, %xmm0
960 ; AVX1-LABEL: sext_8i16_to_4i64:
961 ; AVX1: # %bb.0: # %entry
962 ; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1
963 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
964 ; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0
965 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
968 ; AVX2-LABEL: sext_8i16_to_4i64:
969 ; AVX2: # %bb.0: # %entry
970 ; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0
973 ; AVX512-LABEL: sext_8i16_to_4i64:
974 ; AVX512: # %bb.0: # %entry
975 ; AVX512-NEXT: vpmovsxwq %xmm0, %ymm0
978 ; X86-SSE2-LABEL: sext_8i16_to_4i64:
979 ; X86-SSE2: # %bb.0: # %entry
980 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
981 ; X86-SSE2-NEXT: psrad $16, %xmm1
982 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
983 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
984 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
985 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
986 ; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
987 ; X86-SSE2-NEXT: retl
989 ; X86-SSE41-LABEL: sext_8i16_to_4i64:
990 ; X86-SSE41: # %bb.0: # %entry
991 ; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm2
992 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
993 ; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm1
994 ; X86-SSE41-NEXT: movdqa %xmm2, %xmm0
995 ; X86-SSE41-NEXT: retl
997 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
998 %C = sext <4 x i16> %B to <4 x i64>
1002 define <8 x i64> @sext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp {
1003 ; SSE2-LABEL: sext_8i16_to_8i64:
1004 ; SSE2: # %bb.0: # %entry
1005 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1006 ; SSE2-NEXT: psrad $16, %xmm1
1007 ; SSE2-NEXT: pxor %xmm5, %xmm5
1008 ; SSE2-NEXT: pxor %xmm2, %xmm2
1009 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1010 ; SSE2-NEXT: movdqa %xmm1, %xmm4
1011 ; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
1012 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1013 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
1014 ; SSE2-NEXT: psrad $16, %xmm3
1015 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
1016 ; SSE2-NEXT: movdqa %xmm3, %xmm2
1017 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
1018 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
1019 ; SSE2-NEXT: movdqa %xmm4, %xmm0
1022 ; SSSE3-LABEL: sext_8i16_to_8i64:
1023 ; SSSE3: # %bb.0: # %entry
1024 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1025 ; SSSE3-NEXT: psrad $16, %xmm1
1026 ; SSSE3-NEXT: pxor %xmm5, %xmm5
1027 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1028 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
1029 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
1030 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
1031 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1032 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
1033 ; SSSE3-NEXT: psrad $16, %xmm3
1034 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm5
1035 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
1036 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
1037 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
1038 ; SSSE3-NEXT: movdqa %xmm4, %xmm0
1041 ; SSE41-LABEL: sext_8i16_to_8i64:
1042 ; SSE41: # %bb.0: # %entry
1043 ; SSE41-NEXT: pmovsxwq %xmm0, %xmm4
1044 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1045 ; SSE41-NEXT: pmovsxwq %xmm1, %xmm1
1046 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
1047 ; SSE41-NEXT: pmovsxwq %xmm2, %xmm2
1048 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1049 ; SSE41-NEXT: pmovsxwq %xmm0, %xmm3
1050 ; SSE41-NEXT: movdqa %xmm4, %xmm0
1053 ; AVX1-LABEL: sext_8i16_to_8i64:
1054 ; AVX1: # %bb.0: # %entry
1055 ; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1
1056 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
1057 ; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2
1058 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
1059 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1060 ; AVX1-NEXT: vpmovsxwq %xmm1, %xmm1
1061 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1062 ; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0
1063 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
1064 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
1067 ; AVX2-LABEL: sext_8i16_to_8i64:
1068 ; AVX2: # %bb.0: # %entry
1069 ; AVX2-NEXT: vpmovsxwq %xmm0, %ymm2
1070 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1071 ; AVX2-NEXT: vpmovsxwq %xmm0, %ymm1
1072 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
1075 ; AVX512-LABEL: sext_8i16_to_8i64:
1076 ; AVX512: # %bb.0: # %entry
1077 ; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0
1080 ; X86-SSE2-LABEL: sext_8i16_to_8i64:
1081 ; X86-SSE2: # %bb.0: # %entry
1082 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1083 ; X86-SSE2-NEXT: psrad $16, %xmm1
1084 ; X86-SSE2-NEXT: pxor %xmm5, %xmm5
1085 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
1086 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1087 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm4
1088 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
1089 ; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1090 ; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
1091 ; X86-SSE2-NEXT: psrad $16, %xmm3
1092 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
1093 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm2
1094 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
1095 ; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
1096 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm0
1097 ; X86-SSE2-NEXT: retl
1099 ; X86-SSE41-LABEL: sext_8i16_to_8i64:
1100 ; X86-SSE41: # %bb.0: # %entry
1101 ; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm4
1102 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1103 ; X86-SSE41-NEXT: pmovsxwq %xmm1, %xmm1
1104 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
1105 ; X86-SSE41-NEXT: pmovsxwq %xmm2, %xmm2
1106 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1107 ; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm3
1108 ; X86-SSE41-NEXT: movdqa %xmm4, %xmm0
1109 ; X86-SSE41-NEXT: retl
1111 %B = sext <8 x i16> %A to <8 x i64>
1115 define <2 x i64> @sext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1116 ; SSE2-LABEL: sext_4i32_to_2i64:
1117 ; SSE2: # %bb.0: # %entry
1118 ; SSE2-NEXT: pxor %xmm1, %xmm1
1119 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1120 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1123 ; SSSE3-LABEL: sext_4i32_to_2i64:
1124 ; SSSE3: # %bb.0: # %entry
1125 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1126 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
1127 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1130 ; SSE41-LABEL: sext_4i32_to_2i64:
1131 ; SSE41: # %bb.0: # %entry
1132 ; SSE41-NEXT: pmovsxdq %xmm0, %xmm0
1135 ; AVX-LABEL: sext_4i32_to_2i64:
1136 ; AVX: # %bb.0: # %entry
1137 ; AVX-NEXT: vpmovsxdq %xmm0, %xmm0
1140 ; X86-SSE2-LABEL: sext_4i32_to_2i64:
1141 ; X86-SSE2: # %bb.0: # %entry
1142 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
1143 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1144 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1145 ; X86-SSE2-NEXT: retl
1147 ; X86-SSE41-LABEL: sext_4i32_to_2i64:
1148 ; X86-SSE41: # %bb.0: # %entry
1149 ; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm0
1150 ; X86-SSE41-NEXT: retl
1152 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
1153 %C = sext <2 x i32> %B to <2 x i64>
1157 define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1158 ; SSE2-LABEL: sext_4i32_to_4i64:
1159 ; SSE2: # %bb.0: # %entry
1160 ; SSE2-NEXT: pxor %xmm2, %xmm2
1161 ; SSE2-NEXT: pxor %xmm3, %xmm3
1162 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
1163 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1164 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
1165 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1166 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1169 ; SSSE3-LABEL: sext_4i32_to_4i64:
1170 ; SSSE3: # %bb.0: # %entry
1171 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1172 ; SSSE3-NEXT: pxor %xmm3, %xmm3
1173 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
1174 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1175 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
1176 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
1177 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1180 ; SSE41-LABEL: sext_4i32_to_4i64:
1181 ; SSE41: # %bb.0: # %entry
1182 ; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
1183 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1184 ; SSE41-NEXT: pmovsxdq %xmm0, %xmm1
1185 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1188 ; AVX1-LABEL: sext_4i32_to_4i64:
1189 ; AVX1: # %bb.0: # %entry
1190 ; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
1191 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1192 ; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
1193 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1196 ; AVX2-LABEL: sext_4i32_to_4i64:
1197 ; AVX2: # %bb.0: # %entry
1198 ; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
1201 ; AVX512-LABEL: sext_4i32_to_4i64:
1202 ; AVX512: # %bb.0: # %entry
1203 ; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0
1206 ; X86-SSE2-LABEL: sext_4i32_to_4i64:
1207 ; X86-SSE2: # %bb.0: # %entry
1208 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
1209 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
1210 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
1211 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1212 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
1213 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1214 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1215 ; X86-SSE2-NEXT: retl
1217 ; X86-SSE41-LABEL: sext_4i32_to_4i64:
1218 ; X86-SSE41: # %bb.0: # %entry
1219 ; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm2
1220 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1221 ; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm1
1222 ; X86-SSE41-NEXT: movdqa %xmm2, %xmm0
1223 ; X86-SSE41-NEXT: retl
1225 %B = sext <4 x i32> %A to <4 x i64>
1229 define <8 x i64> @sext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp {
1230 ; SSE2-LABEL: sext_8i32_to_8i64:
1231 ; SSE2: # %bb.0: # %entry
1232 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1233 ; SSE2-NEXT: pxor %xmm4, %xmm4
1234 ; SSE2-NEXT: pxor %xmm3, %xmm3
1235 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
1236 ; SSE2-NEXT: pxor %xmm5, %xmm5
1237 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm5
1238 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1239 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
1240 ; SSE2-NEXT: pxor %xmm3, %xmm3
1241 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
1242 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1243 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
1244 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
1245 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1246 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
1249 ; SSSE3-LABEL: sext_8i32_to_8i64:
1250 ; SSSE3: # %bb.0: # %entry
1251 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
1252 ; SSSE3-NEXT: pxor %xmm4, %xmm4
1253 ; SSSE3-NEXT: pxor %xmm3, %xmm3
1254 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
1255 ; SSSE3-NEXT: pxor %xmm5, %xmm5
1256 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm5
1257 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1258 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
1259 ; SSSE3-NEXT: pxor %xmm3, %xmm3
1260 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
1261 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1262 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
1263 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
1264 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
1265 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
1268 ; SSE41-LABEL: sext_8i32_to_8i64:
1269 ; SSE41: # %bb.0: # %entry
1270 ; SSE41-NEXT: pmovsxdq %xmm0, %xmm5
1271 ; SSE41-NEXT: pmovsxdq %xmm1, %xmm2
1272 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1273 ; SSE41-NEXT: pmovsxdq %xmm0, %xmm4
1274 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1275 ; SSE41-NEXT: pmovsxdq %xmm0, %xmm3
1276 ; SSE41-NEXT: movdqa %xmm5, %xmm0
1277 ; SSE41-NEXT: movdqa %xmm4, %xmm1
1280 ; AVX1-LABEL: sext_8i32_to_8i64:
1281 ; AVX1: # %bb.0: # %entry
1282 ; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
1283 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
1284 ; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2
1285 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
1286 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1287 ; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
1288 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1289 ; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
1290 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
1291 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
1294 ; AVX2-LABEL: sext_8i32_to_8i64:
1295 ; AVX2: # %bb.0: # %entry
1296 ; AVX2-NEXT: vpmovsxdq %xmm0, %ymm2
1297 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1298 ; AVX2-NEXT: vpmovsxdq %xmm0, %ymm1
1299 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
1302 ; AVX512-LABEL: sext_8i32_to_8i64:
1303 ; AVX512: # %bb.0: # %entry
1304 ; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0
1307 ; X86-SSE2-LABEL: sext_8i32_to_8i64:
1308 ; X86-SSE2: # %bb.0: # %entry
1309 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
1310 ; X86-SSE2-NEXT: pxor %xmm4, %xmm4
1311 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
1312 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
1313 ; X86-SSE2-NEXT: pxor %xmm5, %xmm5
1314 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm5
1315 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1316 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
1317 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
1318 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
1319 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1320 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
1321 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
1322 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1323 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
1324 ; X86-SSE2-NEXT: retl
1326 ; X86-SSE41-LABEL: sext_8i32_to_8i64:
1327 ; X86-SSE41: # %bb.0: # %entry
1328 ; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm5
1329 ; X86-SSE41-NEXT: pmovsxdq %xmm1, %xmm2
1330 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1331 ; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm4
1332 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1333 ; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm3
1334 ; X86-SSE41-NEXT: movdqa %xmm5, %xmm0
1335 ; X86-SSE41-NEXT: movdqa %xmm4, %xmm1
1336 ; X86-SSE41-NEXT: retl
1338 %B = sext <8 x i32> %A to <8 x i64>
1342 define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
1343 ; SSE-LABEL: load_sext_2i1_to_2i64:
1344 ; SSE: # %bb.0: # %entry
1345 ; SSE-NEXT: movb (%rdi), %al
1346 ; SSE-NEXT: movzbl %al, %ecx
1347 ; SSE-NEXT: shrb %al
1348 ; SSE-NEXT: movzbl %al, %eax
1349 ; SSE-NEXT: negq %rax
1350 ; SSE-NEXT: movq %rax, %xmm1
1351 ; SSE-NEXT: andl $1, %ecx
1352 ; SSE-NEXT: negq %rcx
1353 ; SSE-NEXT: movq %rcx, %xmm0
1354 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1357 ; AVX1-LABEL: load_sext_2i1_to_2i64:
1358 ; AVX1: # %bb.0: # %entry
1359 ; AVX1-NEXT: movb (%rdi), %al
1360 ; AVX1-NEXT: movzbl %al, %ecx
1361 ; AVX1-NEXT: shrb %al
1362 ; AVX1-NEXT: movzbl %al, %eax
1363 ; AVX1-NEXT: negq %rax
1364 ; AVX1-NEXT: vmovq %rax, %xmm0
1365 ; AVX1-NEXT: andl $1, %ecx
1366 ; AVX1-NEXT: negq %rcx
1367 ; AVX1-NEXT: vmovq %rcx, %xmm1
1368 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1371 ; AVX2-LABEL: load_sext_2i1_to_2i64:
1372 ; AVX2: # %bb.0: # %entry
1373 ; AVX2-NEXT: movb (%rdi), %al
1374 ; AVX2-NEXT: movzbl %al, %ecx
1375 ; AVX2-NEXT: shrb %al
1376 ; AVX2-NEXT: movzbl %al, %eax
1377 ; AVX2-NEXT: negq %rax
1378 ; AVX2-NEXT: vmovq %rax, %xmm0
1379 ; AVX2-NEXT: andl $1, %ecx
1380 ; AVX2-NEXT: negq %rcx
1381 ; AVX2-NEXT: vmovq %rcx, %xmm1
1382 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1385 ; AVX512F-LABEL: load_sext_2i1_to_2i64:
1386 ; AVX512F: # %bb.0: # %entry
1387 ; AVX512F-NEXT: movzbl (%rdi), %eax
1388 ; AVX512F-NEXT: kmovw %eax, %k1
1389 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1390 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1391 ; AVX512F-NEXT: vzeroupper
1392 ; AVX512F-NEXT: retq
1394 ; AVX512BW-LABEL: load_sext_2i1_to_2i64:
1395 ; AVX512BW: # %bb.0: # %entry
1396 ; AVX512BW-NEXT: movzbl (%rdi), %eax
1397 ; AVX512BW-NEXT: kmovd %eax, %k1
1398 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1399 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1400 ; AVX512BW-NEXT: vzeroupper
1401 ; AVX512BW-NEXT: retq
1403 ; X86-SSE2-LABEL: load_sext_2i1_to_2i64:
1404 ; X86-SSE2: # %bb.0: # %entry
1405 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1406 ; X86-SSE2-NEXT: movb (%eax), %al
1407 ; X86-SSE2-NEXT: movzbl %al, %ecx
1408 ; X86-SSE2-NEXT: shrb %al
1409 ; X86-SSE2-NEXT: movzbl %al, %eax
1410 ; X86-SSE2-NEXT: negl %eax
1411 ; X86-SSE2-NEXT: movd %eax, %xmm0
1412 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
1413 ; X86-SSE2-NEXT: andl $1, %ecx
1414 ; X86-SSE2-NEXT: negl %ecx
1415 ; X86-SSE2-NEXT: movd %ecx, %xmm0
1416 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1417 ; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1418 ; X86-SSE2-NEXT: retl
1420 ; X86-SSE41-LABEL: load_sext_2i1_to_2i64:
1421 ; X86-SSE41: # %bb.0: # %entry
1422 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
1423 ; X86-SSE41-NEXT: movb (%eax), %al
1424 ; X86-SSE41-NEXT: movzbl %al, %ecx
1425 ; X86-SSE41-NEXT: andl $1, %ecx
1426 ; X86-SSE41-NEXT: negl %ecx
1427 ; X86-SSE41-NEXT: movd %ecx, %xmm0
1428 ; X86-SSE41-NEXT: pinsrd $1, %ecx, %xmm0
1429 ; X86-SSE41-NEXT: shrb %al
1430 ; X86-SSE41-NEXT: movzbl %al, %eax
1431 ; X86-SSE41-NEXT: negl %eax
1432 ; X86-SSE41-NEXT: pinsrd $2, %eax, %xmm0
1433 ; X86-SSE41-NEXT: pinsrd $3, %eax, %xmm0
1434 ; X86-SSE41-NEXT: retl
1436 %X = load <2 x i1>, <2 x i1>* %ptr
1437 %Y = sext <2 x i1> %X to <2 x i64>
1441 define <2 x i64> @load_sext_2i8_to_2i64(<2 x i8> *%ptr) {
1442 ; SSE2-LABEL: load_sext_2i8_to_2i64:
1443 ; SSE2: # %bb.0: # %entry
1444 ; SSE2-NEXT: movzwl (%rdi), %eax
1445 ; SSE2-NEXT: movd %eax, %xmm0
1446 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1447 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1448 ; SSE2-NEXT: pxor %xmm1, %xmm1
1449 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1450 ; SSE2-NEXT: psrad $24, %xmm0
1451 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1454 ; SSSE3-LABEL: load_sext_2i8_to_2i64:
1455 ; SSSE3: # %bb.0: # %entry
1456 ; SSSE3-NEXT: movzwl (%rdi), %eax
1457 ; SSSE3-NEXT: movd %eax, %xmm0
1458 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1459 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1460 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1461 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
1462 ; SSSE3-NEXT: psrad $24, %xmm0
1463 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1466 ; SSE41-LABEL: load_sext_2i8_to_2i64:
1467 ; SSE41: # %bb.0: # %entry
1468 ; SSE41-NEXT: pmovsxbq (%rdi), %xmm0
1471 ; AVX-LABEL: load_sext_2i8_to_2i64:
1472 ; AVX: # %bb.0: # %entry
1473 ; AVX-NEXT: vpmovsxbq (%rdi), %xmm0
1476 ; X86-SSE2-LABEL: load_sext_2i8_to_2i64:
1477 ; X86-SSE2: # %bb.0: # %entry
1478 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1479 ; X86-SSE2-NEXT: movzwl (%eax), %eax
1480 ; X86-SSE2-NEXT: movd %eax, %xmm0
1481 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1482 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1483 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
1484 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1485 ; X86-SSE2-NEXT: psrad $24, %xmm0
1486 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1487 ; X86-SSE2-NEXT: retl
1489 ; X86-SSE41-LABEL: load_sext_2i8_to_2i64:
1490 ; X86-SSE41: # %bb.0: # %entry
1491 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
1492 ; X86-SSE41-NEXT: pmovsxbq (%eax), %xmm0
1493 ; X86-SSE41-NEXT: retl
1495 %X = load <2 x i8>, <2 x i8>* %ptr
1496 %Y = sext <2 x i8> %X to <2 x i64>
1500 define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
1501 ; SSE2-LABEL: load_sext_4i1_to_4i32:
1502 ; SSE2: # %bb.0: # %entry
1503 ; SSE2-NEXT: movb (%rdi), %al
1504 ; SSE2-NEXT: movl %eax, %ecx
1505 ; SSE2-NEXT: shrb $3, %cl
1506 ; SSE2-NEXT: movzbl %cl, %ecx
1507 ; SSE2-NEXT: negl %ecx
1508 ; SSE2-NEXT: movd %ecx, %xmm0
1509 ; SSE2-NEXT: movzbl %al, %ecx
1510 ; SSE2-NEXT: shrb $2, %al
1511 ; SSE2-NEXT: movzbl %al, %eax
1512 ; SSE2-NEXT: andl $1, %eax
1513 ; SSE2-NEXT: negl %eax
1514 ; SSE2-NEXT: movd %eax, %xmm1
1515 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1516 ; SSE2-NEXT: movl %ecx, %eax
1517 ; SSE2-NEXT: andl $1, %eax
1518 ; SSE2-NEXT: negl %eax
1519 ; SSE2-NEXT: movd %eax, %xmm0
1520 ; SSE2-NEXT: shrb %cl
1521 ; SSE2-NEXT: movzbl %cl, %eax
1522 ; SSE2-NEXT: andl $1, %eax
1523 ; SSE2-NEXT: negl %eax
1524 ; SSE2-NEXT: movd %eax, %xmm2
1525 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1526 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1529 ; SSSE3-LABEL: load_sext_4i1_to_4i32:
1530 ; SSSE3: # %bb.0: # %entry
1531 ; SSSE3-NEXT: movb (%rdi), %al
1532 ; SSSE3-NEXT: movl %eax, %ecx
1533 ; SSSE3-NEXT: shrb $3, %cl
1534 ; SSSE3-NEXT: movzbl %cl, %ecx
1535 ; SSSE3-NEXT: negl %ecx
1536 ; SSSE3-NEXT: movd %ecx, %xmm0
1537 ; SSSE3-NEXT: movzbl %al, %ecx
1538 ; SSSE3-NEXT: shrb $2, %al
1539 ; SSSE3-NEXT: movzbl %al, %eax
1540 ; SSSE3-NEXT: andl $1, %eax
1541 ; SSSE3-NEXT: negl %eax
1542 ; SSSE3-NEXT: movd %eax, %xmm1
1543 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1544 ; SSSE3-NEXT: movl %ecx, %eax
1545 ; SSSE3-NEXT: andl $1, %eax
1546 ; SSSE3-NEXT: negl %eax
1547 ; SSSE3-NEXT: movd %eax, %xmm0
1548 ; SSSE3-NEXT: shrb %cl
1549 ; SSSE3-NEXT: movzbl %cl, %eax
1550 ; SSSE3-NEXT: andl $1, %eax
1551 ; SSSE3-NEXT: negl %eax
1552 ; SSSE3-NEXT: movd %eax, %xmm2
1553 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1554 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1557 ; SSE41-LABEL: load_sext_4i1_to_4i32:
1558 ; SSE41: # %bb.0: # %entry
1559 ; SSE41-NEXT: movb (%rdi), %al
1560 ; SSE41-NEXT: movzbl %al, %ecx
1561 ; SSE41-NEXT: shrb %al
1562 ; SSE41-NEXT: movzbl %al, %eax
1563 ; SSE41-NEXT: andl $1, %eax
1564 ; SSE41-NEXT: negl %eax
1565 ; SSE41-NEXT: movl %ecx, %edx
1566 ; SSE41-NEXT: andl $1, %edx
1567 ; SSE41-NEXT: negl %edx
1568 ; SSE41-NEXT: movd %edx, %xmm0
1569 ; SSE41-NEXT: pinsrd $1, %eax, %xmm0
1570 ; SSE41-NEXT: movl %ecx, %eax
1571 ; SSE41-NEXT: shrb $2, %al
1572 ; SSE41-NEXT: movzbl %al, %eax
1573 ; SSE41-NEXT: andl $1, %eax
1574 ; SSE41-NEXT: negl %eax
1575 ; SSE41-NEXT: pinsrd $2, %eax, %xmm0
1576 ; SSE41-NEXT: shrb $3, %cl
1577 ; SSE41-NEXT: movzbl %cl, %eax
1578 ; SSE41-NEXT: negl %eax
1579 ; SSE41-NEXT: pinsrd $3, %eax, %xmm0
1582 ; AVX1-LABEL: load_sext_4i1_to_4i32:
1583 ; AVX1: # %bb.0: # %entry
1584 ; AVX1-NEXT: movb (%rdi), %al
1585 ; AVX1-NEXT: movzbl %al, %ecx
1586 ; AVX1-NEXT: shrb %al
1587 ; AVX1-NEXT: movzbl %al, %eax
1588 ; AVX1-NEXT: andl $1, %eax
1589 ; AVX1-NEXT: negl %eax
1590 ; AVX1-NEXT: movl %ecx, %edx
1591 ; AVX1-NEXT: andl $1, %edx
1592 ; AVX1-NEXT: negl %edx
1593 ; AVX1-NEXT: vmovd %edx, %xmm0
1594 ; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
1595 ; AVX1-NEXT: movl %ecx, %eax
1596 ; AVX1-NEXT: shrb $2, %al
1597 ; AVX1-NEXT: movzbl %al, %eax
1598 ; AVX1-NEXT: andl $1, %eax
1599 ; AVX1-NEXT: negl %eax
1600 ; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
1601 ; AVX1-NEXT: shrb $3, %cl
1602 ; AVX1-NEXT: movzbl %cl, %eax
1603 ; AVX1-NEXT: negl %eax
1604 ; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
1607 ; AVX2-LABEL: load_sext_4i1_to_4i32:
1608 ; AVX2: # %bb.0: # %entry
1609 ; AVX2-NEXT: movb (%rdi), %al
1610 ; AVX2-NEXT: movzbl %al, %ecx
1611 ; AVX2-NEXT: shrb %al
1612 ; AVX2-NEXT: movzbl %al, %eax
1613 ; AVX2-NEXT: andl $1, %eax
1614 ; AVX2-NEXT: negl %eax
1615 ; AVX2-NEXT: movl %ecx, %edx
1616 ; AVX2-NEXT: andl $1, %edx
1617 ; AVX2-NEXT: negl %edx
1618 ; AVX2-NEXT: vmovd %edx, %xmm0
1619 ; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
1620 ; AVX2-NEXT: movl %ecx, %eax
1621 ; AVX2-NEXT: shrb $2, %al
1622 ; AVX2-NEXT: movzbl %al, %eax
1623 ; AVX2-NEXT: andl $1, %eax
1624 ; AVX2-NEXT: negl %eax
1625 ; AVX2-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
1626 ; AVX2-NEXT: shrb $3, %cl
1627 ; AVX2-NEXT: movzbl %cl, %eax
1628 ; AVX2-NEXT: negl %eax
1629 ; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
1632 ; AVX512F-LABEL: load_sext_4i1_to_4i32:
1633 ; AVX512F: # %bb.0: # %entry
1634 ; AVX512F-NEXT: movzbl (%rdi), %eax
1635 ; AVX512F-NEXT: kmovw %eax, %k1
1636 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1637 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1638 ; AVX512F-NEXT: vzeroupper
1639 ; AVX512F-NEXT: retq
1641 ; AVX512BW-LABEL: load_sext_4i1_to_4i32:
1642 ; AVX512BW: # %bb.0: # %entry
1643 ; AVX512BW-NEXT: movzbl (%rdi), %eax
1644 ; AVX512BW-NEXT: kmovd %eax, %k1
1645 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1646 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1647 ; AVX512BW-NEXT: vzeroupper
1648 ; AVX512BW-NEXT: retq
1650 ; X86-SSE2-LABEL: load_sext_4i1_to_4i32:
1651 ; X86-SSE2: # %bb.0: # %entry
1652 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1653 ; X86-SSE2-NEXT: movb (%eax), %al
1654 ; X86-SSE2-NEXT: movl %eax, %ecx
1655 ; X86-SSE2-NEXT: shrb $3, %cl
1656 ; X86-SSE2-NEXT: movzbl %cl, %ecx
1657 ; X86-SSE2-NEXT: negl %ecx
1658 ; X86-SSE2-NEXT: movd %ecx, %xmm0
1659 ; X86-SSE2-NEXT: movl %eax, %ecx
1660 ; X86-SSE2-NEXT: shrb $2, %cl
1661 ; X86-SSE2-NEXT: movzbl %cl, %ecx
1662 ; X86-SSE2-NEXT: andl $1, %ecx
1663 ; X86-SSE2-NEXT: negl %ecx
1664 ; X86-SSE2-NEXT: movd %ecx, %xmm1
1665 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1666 ; X86-SSE2-NEXT: movzbl %al, %ecx
1667 ; X86-SSE2-NEXT: andl $1, %ecx
1668 ; X86-SSE2-NEXT: negl %ecx
1669 ; X86-SSE2-NEXT: movd %ecx, %xmm0
1670 ; X86-SSE2-NEXT: shrb %al
1671 ; X86-SSE2-NEXT: movzbl %al, %eax
1672 ; X86-SSE2-NEXT: andl $1, %eax
1673 ; X86-SSE2-NEXT: negl %eax
1674 ; X86-SSE2-NEXT: movd %eax, %xmm2
1675 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1676 ; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1677 ; X86-SSE2-NEXT: retl
1679 ; X86-SSE41-LABEL: load_sext_4i1_to_4i32:
1680 ; X86-SSE41: # %bb.0: # %entry
1681 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
1682 ; X86-SSE41-NEXT: movb (%eax), %al
1683 ; X86-SSE41-NEXT: movl %eax, %ecx
1684 ; X86-SSE41-NEXT: shrb %cl
1685 ; X86-SSE41-NEXT: movzbl %cl, %ecx
1686 ; X86-SSE41-NEXT: andl $1, %ecx
1687 ; X86-SSE41-NEXT: negl %ecx
1688 ; X86-SSE41-NEXT: movzbl %al, %edx
1689 ; X86-SSE41-NEXT: andl $1, %edx
1690 ; X86-SSE41-NEXT: negl %edx
1691 ; X86-SSE41-NEXT: movd %edx, %xmm0
1692 ; X86-SSE41-NEXT: pinsrd $1, %ecx, %xmm0
1693 ; X86-SSE41-NEXT: movl %eax, %ecx
1694 ; X86-SSE41-NEXT: shrb $2, %cl
1695 ; X86-SSE41-NEXT: movzbl %cl, %ecx
1696 ; X86-SSE41-NEXT: andl $1, %ecx
1697 ; X86-SSE41-NEXT: negl %ecx
1698 ; X86-SSE41-NEXT: pinsrd $2, %ecx, %xmm0
1699 ; X86-SSE41-NEXT: shrb $3, %al
1700 ; X86-SSE41-NEXT: movzbl %al, %eax
1701 ; X86-SSE41-NEXT: negl %eax
1702 ; X86-SSE41-NEXT: pinsrd $3, %eax, %xmm0
1703 ; X86-SSE41-NEXT: retl
1705 %X = load <4 x i1>, <4 x i1>* %ptr
1706 %Y = sext <4 x i1> %X to <4 x i32>
1710 define <4 x i32> @load_sext_4i8_to_4i32(<4 x i8> *%ptr) {
1711 ; SSE2-LABEL: load_sext_4i8_to_4i32:
1712 ; SSE2: # %bb.0: # %entry
1713 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1714 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1715 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1716 ; SSE2-NEXT: psrad $24, %xmm0
1719 ; SSSE3-LABEL: load_sext_4i8_to_4i32:
1720 ; SSSE3: # %bb.0: # %entry
1721 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1722 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1723 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1724 ; SSSE3-NEXT: psrad $24, %xmm0
1727 ; SSE41-LABEL: load_sext_4i8_to_4i32:
1728 ; SSE41: # %bb.0: # %entry
1729 ; SSE41-NEXT: pmovsxbd (%rdi), %xmm0
1732 ; AVX-LABEL: load_sext_4i8_to_4i32:
1733 ; AVX: # %bb.0: # %entry
1734 ; AVX-NEXT: vpmovsxbd (%rdi), %xmm0
1737 ; X86-SSE2-LABEL: load_sext_4i8_to_4i32:
1738 ; X86-SSE2: # %bb.0: # %entry
1739 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1740 ; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1741 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1742 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1743 ; X86-SSE2-NEXT: psrad $24, %xmm0
1744 ; X86-SSE2-NEXT: retl
1746 ; X86-SSE41-LABEL: load_sext_4i8_to_4i32:
1747 ; X86-SSE41: # %bb.0: # %entry
1748 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
1749 ; X86-SSE41-NEXT: pmovsxbd (%eax), %xmm0
1750 ; X86-SSE41-NEXT: retl
1752 %X = load <4 x i8>, <4 x i8>* %ptr
1753 %Y = sext <4 x i8> %X to <4 x i32>
1757 define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
1758 ; SSE2-LABEL: load_sext_4i1_to_4i64:
1759 ; SSE2: # %bb.0: # %entry
1760 ; SSE2-NEXT: movb (%rdi), %al
1761 ; SSE2-NEXT: movl %eax, %ecx
1762 ; SSE2-NEXT: shrb %cl
1763 ; SSE2-NEXT: andb $1, %cl
1764 ; SSE2-NEXT: movzbl %cl, %ecx
1765 ; SSE2-NEXT: movl %eax, %edx
1766 ; SSE2-NEXT: andb $1, %dl
1767 ; SSE2-NEXT: movzbl %dl, %edx
1768 ; SSE2-NEXT: movd %edx, %xmm1
1769 ; SSE2-NEXT: pinsrw $2, %ecx, %xmm1
1770 ; SSE2-NEXT: movl %eax, %ecx
1771 ; SSE2-NEXT: shrb $2, %cl
1772 ; SSE2-NEXT: andb $1, %cl
1773 ; SSE2-NEXT: movzbl %cl, %ecx
1774 ; SSE2-NEXT: pinsrw $4, %ecx, %xmm1
1775 ; SSE2-NEXT: shrb $3, %al
1776 ; SSE2-NEXT: movzbl %al, %eax
1777 ; SSE2-NEXT: pinsrw $6, %eax, %xmm1
1778 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
1779 ; SSE2-NEXT: psllq $63, %xmm0
1780 ; SSE2-NEXT: psrad $31, %xmm0
1781 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1782 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
1783 ; SSE2-NEXT: psllq $63, %xmm1
1784 ; SSE2-NEXT: psrad $31, %xmm1
1785 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1788 ; SSSE3-LABEL: load_sext_4i1_to_4i64:
1789 ; SSSE3: # %bb.0: # %entry
1790 ; SSSE3-NEXT: movb (%rdi), %al
1791 ; SSSE3-NEXT: movl %eax, %ecx
1792 ; SSSE3-NEXT: shrb %cl
1793 ; SSSE3-NEXT: andb $1, %cl
1794 ; SSSE3-NEXT: movzbl %cl, %ecx
1795 ; SSSE3-NEXT: movl %eax, %edx
1796 ; SSSE3-NEXT: andb $1, %dl
1797 ; SSSE3-NEXT: movzbl %dl, %edx
1798 ; SSSE3-NEXT: movd %edx, %xmm1
1799 ; SSSE3-NEXT: pinsrw $2, %ecx, %xmm1
1800 ; SSSE3-NEXT: movl %eax, %ecx
1801 ; SSSE3-NEXT: shrb $2, %cl
1802 ; SSSE3-NEXT: andb $1, %cl
1803 ; SSSE3-NEXT: movzbl %cl, %ecx
1804 ; SSSE3-NEXT: pinsrw $4, %ecx, %xmm1
1805 ; SSSE3-NEXT: shrb $3, %al
1806 ; SSSE3-NEXT: movzbl %al, %eax
1807 ; SSSE3-NEXT: pinsrw $6, %eax, %xmm1
1808 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
1809 ; SSSE3-NEXT: psllq $63, %xmm0
1810 ; SSSE3-NEXT: psrad $31, %xmm0
1811 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1812 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
1813 ; SSSE3-NEXT: psllq $63, %xmm1
1814 ; SSSE3-NEXT: psrad $31, %xmm1
1815 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1818 ; SSE41-LABEL: load_sext_4i1_to_4i64:
1819 ; SSE41: # %bb.0: # %entry
1820 ; SSE41-NEXT: movb (%rdi), %al
1821 ; SSE41-NEXT: movl %eax, %ecx
1822 ; SSE41-NEXT: shrb %cl
1823 ; SSE41-NEXT: andb $1, %cl
1824 ; SSE41-NEXT: movzbl %cl, %ecx
1825 ; SSE41-NEXT: movl %eax, %edx
1826 ; SSE41-NEXT: andb $1, %dl
1827 ; SSE41-NEXT: movzbl %dl, %edx
1828 ; SSE41-NEXT: movd %edx, %xmm1
1829 ; SSE41-NEXT: pinsrb $4, %ecx, %xmm1
1830 ; SSE41-NEXT: movl %eax, %ecx
1831 ; SSE41-NEXT: shrb $2, %cl
1832 ; SSE41-NEXT: andb $1, %cl
1833 ; SSE41-NEXT: movzbl %cl, %ecx
1834 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
1835 ; SSE41-NEXT: pinsrb $8, %ecx, %xmm1
1836 ; SSE41-NEXT: shrb $3, %al
1837 ; SSE41-NEXT: movzbl %al, %eax
1838 ; SSE41-NEXT: pinsrb $12, %eax, %xmm1
1839 ; SSE41-NEXT: psllq $63, %xmm0
1840 ; SSE41-NEXT: psrad $31, %xmm0
1841 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1842 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1843 ; SSE41-NEXT: psllq $63, %xmm1
1844 ; SSE41-NEXT: psrad $31, %xmm1
1845 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1848 ; AVX1-LABEL: load_sext_4i1_to_4i64:
1849 ; AVX1: # %bb.0: # %entry
1850 ; AVX1-NEXT: movb (%rdi), %al
1851 ; AVX1-NEXT: movzbl %al, %ecx
1852 ; AVX1-NEXT: shrb %al
1853 ; AVX1-NEXT: movzbl %al, %eax
1854 ; AVX1-NEXT: andl $1, %eax
1855 ; AVX1-NEXT: negl %eax
1856 ; AVX1-NEXT: movl %ecx, %edx
1857 ; AVX1-NEXT: andl $1, %edx
1858 ; AVX1-NEXT: negl %edx
1859 ; AVX1-NEXT: vmovd %edx, %xmm0
1860 ; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
1861 ; AVX1-NEXT: movl %ecx, %eax
1862 ; AVX1-NEXT: shrb $2, %al
1863 ; AVX1-NEXT: movzbl %al, %eax
1864 ; AVX1-NEXT: andl $1, %eax
1865 ; AVX1-NEXT: negl %eax
1866 ; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
1867 ; AVX1-NEXT: shrb $3, %cl
1868 ; AVX1-NEXT: movzbl %cl, %eax
1869 ; AVX1-NEXT: negl %eax
1870 ; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
1871 ; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
1872 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1873 ; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
1874 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1877 ; AVX2-LABEL: load_sext_4i1_to_4i64:
1878 ; AVX2: # %bb.0: # %entry
1879 ; AVX2-NEXT: movb (%rdi), %al
1880 ; AVX2-NEXT: movl %eax, %ecx
1881 ; AVX2-NEXT: shrb $3, %cl
1882 ; AVX2-NEXT: movzbl %cl, %ecx
1883 ; AVX2-NEXT: negq %rcx
1884 ; AVX2-NEXT: vmovq %rcx, %xmm0
1885 ; AVX2-NEXT: movzbl %al, %ecx
1886 ; AVX2-NEXT: shrb $2, %al
1887 ; AVX2-NEXT: movzbl %al, %eax
1888 ; AVX2-NEXT: andl $1, %eax
1889 ; AVX2-NEXT: negq %rax
1890 ; AVX2-NEXT: vmovq %rax, %xmm1
1891 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1892 ; AVX2-NEXT: movl %ecx, %eax
1893 ; AVX2-NEXT: andl $1, %eax
1894 ; AVX2-NEXT: negq %rax
1895 ; AVX2-NEXT: vmovq %rax, %xmm1
1896 ; AVX2-NEXT: shrb %cl
1897 ; AVX2-NEXT: movzbl %cl, %eax
1898 ; AVX2-NEXT: andl $1, %eax
1899 ; AVX2-NEXT: negq %rax
1900 ; AVX2-NEXT: vmovq %rax, %xmm2
1901 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1902 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1905 ; AVX512F-LABEL: load_sext_4i1_to_4i64:
1906 ; AVX512F: # %bb.0: # %entry
1907 ; AVX512F-NEXT: movzbl (%rdi), %eax
1908 ; AVX512F-NEXT: kmovw %eax, %k1
1909 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1910 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1911 ; AVX512F-NEXT: retq
1913 ; AVX512BW-LABEL: load_sext_4i1_to_4i64:
1914 ; AVX512BW: # %bb.0: # %entry
1915 ; AVX512BW-NEXT: movzbl (%rdi), %eax
1916 ; AVX512BW-NEXT: kmovd %eax, %k1
1917 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1918 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1919 ; AVX512BW-NEXT: retq
1921 ; X86-SSE2-LABEL: load_sext_4i1_to_4i64:
1922 ; X86-SSE2: # %bb.0: # %entry
1923 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1924 ; X86-SSE2-NEXT: movb (%eax), %al
1925 ; X86-SSE2-NEXT: movl %eax, %ecx
1926 ; X86-SSE2-NEXT: shrb %cl
1927 ; X86-SSE2-NEXT: andb $1, %cl
1928 ; X86-SSE2-NEXT: movzbl %cl, %ecx
1929 ; X86-SSE2-NEXT: movl %eax, %edx
1930 ; X86-SSE2-NEXT: andb $1, %dl
1931 ; X86-SSE2-NEXT: movzbl %dl, %edx
1932 ; X86-SSE2-NEXT: movd %edx, %xmm1
1933 ; X86-SSE2-NEXT: pinsrw $2, %ecx, %xmm1
1934 ; X86-SSE2-NEXT: movl %eax, %ecx
1935 ; X86-SSE2-NEXT: shrb $2, %cl
1936 ; X86-SSE2-NEXT: andb $1, %cl
1937 ; X86-SSE2-NEXT: movzbl %cl, %ecx
1938 ; X86-SSE2-NEXT: pinsrw $4, %ecx, %xmm1
1939 ; X86-SSE2-NEXT: shrb $3, %al
1940 ; X86-SSE2-NEXT: movzbl %al, %eax
1941 ; X86-SSE2-NEXT: pinsrw $6, %eax, %xmm1
1942 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
1943 ; X86-SSE2-NEXT: psllq $63, %xmm0
1944 ; X86-SSE2-NEXT: psrad $31, %xmm0
1945 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1946 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
1947 ; X86-SSE2-NEXT: psllq $63, %xmm1
1948 ; X86-SSE2-NEXT: psrad $31, %xmm1
1949 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1950 ; X86-SSE2-NEXT: retl
1952 ; X86-SSE41-LABEL: load_sext_4i1_to_4i64:
1953 ; X86-SSE41: # %bb.0: # %entry
1954 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
1955 ; X86-SSE41-NEXT: movb (%eax), %al
1956 ; X86-SSE41-NEXT: movl %eax, %ecx
1957 ; X86-SSE41-NEXT: shrb %cl
1958 ; X86-SSE41-NEXT: andb $1, %cl
1959 ; X86-SSE41-NEXT: movzbl %cl, %ecx
1960 ; X86-SSE41-NEXT: movl %eax, %edx
1961 ; X86-SSE41-NEXT: andb $1, %dl
1962 ; X86-SSE41-NEXT: movzbl %dl, %edx
1963 ; X86-SSE41-NEXT: movd %edx, %xmm1
1964 ; X86-SSE41-NEXT: pinsrb $4, %ecx, %xmm1
1965 ; X86-SSE41-NEXT: movl %eax, %ecx
1966 ; X86-SSE41-NEXT: shrb $2, %cl
1967 ; X86-SSE41-NEXT: andb $1, %cl
1968 ; X86-SSE41-NEXT: movzbl %cl, %ecx
1969 ; X86-SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
1970 ; X86-SSE41-NEXT: pinsrb $8, %ecx, %xmm1
1971 ; X86-SSE41-NEXT: shrb $3, %al
1972 ; X86-SSE41-NEXT: movzbl %al, %eax
1973 ; X86-SSE41-NEXT: pinsrb $12, %eax, %xmm1
1974 ; X86-SSE41-NEXT: psllq $63, %xmm0
1975 ; X86-SSE41-NEXT: psrad $31, %xmm0
1976 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1977 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1978 ; X86-SSE41-NEXT: psllq $63, %xmm1
1979 ; X86-SSE41-NEXT: psrad $31, %xmm1
1980 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1981 ; X86-SSE41-NEXT: retl
1983 %X = load <4 x i1>, <4 x i1>* %ptr
1984 %Y = sext <4 x i1> %X to <4 x i64>
1988 define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
1989 ; SSE2-LABEL: load_sext_4i8_to_4i64:
1990 ; SSE2: # %bb.0: # %entry
1991 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1992 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1993 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1994 ; SSE2-NEXT: psrad $24, %xmm1
1995 ; SSE2-NEXT: pxor %xmm2, %xmm2
1996 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1997 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1998 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1999 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2002 ; SSSE3-LABEL: load_sext_4i8_to_4i64:
2003 ; SSSE3: # %bb.0: # %entry
2004 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2005 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2006 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2007 ; SSSE3-NEXT: psrad $24, %xmm1
2008 ; SSSE3-NEXT: pxor %xmm2, %xmm2
2009 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
2010 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2011 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2012 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2015 ; SSE41-LABEL: load_sext_4i8_to_4i64:
2016 ; SSE41: # %bb.0: # %entry
2017 ; SSE41-NEXT: pmovsxbq (%rdi), %xmm0
2018 ; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1
2021 ; AVX1-LABEL: load_sext_4i8_to_4i64:
2022 ; AVX1: # %bb.0: # %entry
2023 ; AVX1-NEXT: vpmovsxbq 2(%rdi), %xmm0
2024 ; AVX1-NEXT: vpmovsxbq (%rdi), %xmm1
2025 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2028 ; AVX2-LABEL: load_sext_4i8_to_4i64:
2029 ; AVX2: # %bb.0: # %entry
2030 ; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0
2033 ; AVX512-LABEL: load_sext_4i8_to_4i64:
2034 ; AVX512: # %bb.0: # %entry
2035 ; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0
2038 ; X86-SSE2-LABEL: load_sext_4i8_to_4i64:
2039 ; X86-SSE2: # %bb.0: # %entry
2040 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
2041 ; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2042 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2043 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2044 ; X86-SSE2-NEXT: psrad $24, %xmm1
2045 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
2046 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
2047 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
2048 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2049 ; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2050 ; X86-SSE2-NEXT: retl
2052 ; X86-SSE41-LABEL: load_sext_4i8_to_4i64:
2053 ; X86-SSE41: # %bb.0: # %entry
2054 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
2055 ; X86-SSE41-NEXT: pmovsxbq (%eax), %xmm0
2056 ; X86-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1
2057 ; X86-SSE41-NEXT: retl
2059 %X = load <4 x i8>, <4 x i8>* %ptr
2060 %Y = sext <4 x i8> %X to <4 x i64>
2064 define <2 x i64> @load_sext_4i8_to_4i64_extract(<4 x i8> *%ptr) {
2065 ; SSE2-LABEL: load_sext_4i8_to_4i64_extract:
2067 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2068 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2069 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2070 ; SSE2-NEXT: psrad $24, %xmm0
2071 ; SSE2-NEXT: pxor %xmm1, %xmm1
2072 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
2073 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2076 ; SSSE3-LABEL: load_sext_4i8_to_4i64_extract:
2078 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2079 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2080 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2081 ; SSSE3-NEXT: psrad $24, %xmm0
2082 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2083 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
2084 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2087 ; SSE41-LABEL: load_sext_4i8_to_4i64_extract:
2089 ; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm0
2092 ; AVX1-LABEL: load_sext_4i8_to_4i64_extract:
2094 ; AVX1-NEXT: vpmovsxbq 2(%rdi), %xmm0
2097 ; AVX2-LABEL: load_sext_4i8_to_4i64_extract:
2099 ; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0
2100 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2101 ; AVX2-NEXT: vzeroupper
2104 ; AVX512-LABEL: load_sext_4i8_to_4i64_extract:
2106 ; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0
2107 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
2108 ; AVX512-NEXT: vzeroupper
2111 ; X86-SSE2-LABEL: load_sext_4i8_to_4i64_extract:
2112 ; X86-SSE2: # %bb.0:
2113 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
2114 ; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2115 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2116 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2117 ; X86-SSE2-NEXT: psrad $24, %xmm0
2118 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
2119 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
2120 ; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2121 ; X86-SSE2-NEXT: retl
2123 ; X86-SSE41-LABEL: load_sext_4i8_to_4i64_extract:
2124 ; X86-SSE41: # %bb.0:
2125 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
2126 ; X86-SSE41-NEXT: pmovsxbq 2(%eax), %xmm0
2127 ; X86-SSE41-NEXT: retl
2128 %ld = load <4 x i8>, <4 x i8>* %ptr
2129 %sext = sext <4 x i8> %ld to <4 x i64>
2130 %extract = shufflevector <4 x i64> %sext, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
2131 ret <2 x i64> %extract
2134 define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
2135 ; SSE-LABEL: load_sext_8i1_to_8i16:
2136 ; SSE: # %bb.0: # %entry
2137 ; SSE-NEXT: movzbl (%rdi), %eax
2138 ; SSE-NEXT: movd %eax, %xmm0
2139 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2140 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2141 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
2142 ; SSE-NEXT: pand %xmm1, %xmm0
2143 ; SSE-NEXT: pcmpeqw %xmm1, %xmm0
2146 ; AVX1-LABEL: load_sext_8i1_to_8i16:
2147 ; AVX1: # %bb.0: # %entry
2148 ; AVX1-NEXT: movzbl (%rdi), %eax
2149 ; AVX1-NEXT: vmovd %eax, %xmm0
2150 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2151 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2152 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
2153 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2154 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
2157 ; AVX2-LABEL: load_sext_8i1_to_8i16:
2158 ; AVX2: # %bb.0: # %entry
2159 ; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
2160 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
2161 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2162 ; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
2165 ; AVX512F-LABEL: load_sext_8i1_to_8i16:
2166 ; AVX512F: # %bb.0: # %entry
2167 ; AVX512F-NEXT: movzbl (%rdi), %eax
2168 ; AVX512F-NEXT: kmovw %eax, %k1
2169 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2170 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
2171 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2172 ; AVX512F-NEXT: vzeroupper
2173 ; AVX512F-NEXT: retq
2175 ; AVX512BW-LABEL: load_sext_8i1_to_8i16:
2176 ; AVX512BW: # %bb.0: # %entry
2177 ; AVX512BW-NEXT: movzbl (%rdi), %eax
2178 ; AVX512BW-NEXT: kmovd %eax, %k0
2179 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
2180 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2181 ; AVX512BW-NEXT: vzeroupper
2182 ; AVX512BW-NEXT: retq
2184 ; X86-SSE-LABEL: load_sext_8i1_to_8i16:
2185 ; X86-SSE: # %bb.0: # %entry
2186 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
2187 ; X86-SSE-NEXT: movzbl (%eax), %eax
2188 ; X86-SSE-NEXT: movd %eax, %xmm0
2189 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2190 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2191 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
2192 ; X86-SSE-NEXT: pand %xmm1, %xmm0
2193 ; X86-SSE-NEXT: pcmpeqw %xmm1, %xmm0
2194 ; X86-SSE-NEXT: retl
2196 %X = load <8 x i1>, <8 x i1>* %ptr
2197 %Y = sext <8 x i1> %X to <8 x i16>
2201 define <8 x i16> @load_sext_8i8_to_8i16(<8 x i8> *%ptr) {
2202 ; SSE2-LABEL: load_sext_8i8_to_8i16:
2203 ; SSE2: # %bb.0: # %entry
2204 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2205 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2206 ; SSE2-NEXT: psraw $8, %xmm0
2209 ; SSSE3-LABEL: load_sext_8i8_to_8i16:
2210 ; SSSE3: # %bb.0: # %entry
2211 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2212 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2213 ; SSSE3-NEXT: psraw $8, %xmm0
2216 ; SSE41-LABEL: load_sext_8i8_to_8i16:
2217 ; SSE41: # %bb.0: # %entry
2218 ; SSE41-NEXT: pmovsxbw (%rdi), %xmm0
2221 ; AVX-LABEL: load_sext_8i8_to_8i16:
2222 ; AVX: # %bb.0: # %entry
2223 ; AVX-NEXT: vpmovsxbw (%rdi), %xmm0
2226 ; X86-SSE2-LABEL: load_sext_8i8_to_8i16:
2227 ; X86-SSE2: # %bb.0: # %entry
2228 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
2229 ; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2230 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2231 ; X86-SSE2-NEXT: psraw $8, %xmm0
2232 ; X86-SSE2-NEXT: retl
2234 ; X86-SSE41-LABEL: load_sext_8i8_to_8i16:
2235 ; X86-SSE41: # %bb.0: # %entry
2236 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
2237 ; X86-SSE41-NEXT: pmovsxbw (%eax), %xmm0
2238 ; X86-SSE41-NEXT: retl
2240 %X = load <8 x i8>, <8 x i8>* %ptr
2241 %Y = sext <8 x i8> %X to <8 x i16>
2245 define <8 x i64> @load_sext_8i8_to_8i64(<8 x i8> *%ptr) {
2246 ; SSE2-LABEL: load_sext_8i8_to_8i64:
2247 ; SSE2: # %bb.0: # %entry
2248 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2249 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2250 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2251 ; SSE2-NEXT: psrad $24, %xmm1
2252 ; SSE2-NEXT: pxor %xmm4, %xmm4
2253 ; SSE2-NEXT: pxor %xmm3, %xmm3
2254 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
2255 ; SSE2-NEXT: movdqa %xmm1, %xmm0
2256 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
2257 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
2258 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2259 ; SSE2-NEXT: psrad $24, %xmm3
2260 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
2261 ; SSE2-NEXT: movdqa %xmm3, %xmm2
2262 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
2263 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
2266 ; SSSE3-LABEL: load_sext_8i8_to_8i64:
2267 ; SSSE3: # %bb.0: # %entry
2268 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2269 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2270 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2271 ; SSSE3-NEXT: psrad $24, %xmm1
2272 ; SSSE3-NEXT: pxor %xmm4, %xmm4
2273 ; SSSE3-NEXT: pxor %xmm3, %xmm3
2274 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
2275 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2276 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
2277 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
2278 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2279 ; SSSE3-NEXT: psrad $24, %xmm3
2280 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
2281 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
2282 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
2283 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
2286 ; SSE41-LABEL: load_sext_8i8_to_8i64:
2287 ; SSE41: # %bb.0: # %entry
2288 ; SSE41-NEXT: pmovsxbq (%rdi), %xmm0
2289 ; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1
2290 ; SSE41-NEXT: pmovsxbq 4(%rdi), %xmm2
2291 ; SSE41-NEXT: pmovsxbq 6(%rdi), %xmm3
2294 ; AVX1-LABEL: load_sext_8i8_to_8i64:
2295 ; AVX1: # %bb.0: # %entry
2296 ; AVX1-NEXT: vpmovsxbq 6(%rdi), %xmm1
2297 ; AVX1-NEXT: vpmovsxbq 4(%rdi), %xmm2
2298 ; AVX1-NEXT: vpmovsxbq 2(%rdi), %xmm0
2299 ; AVX1-NEXT: vpmovsxbq (%rdi), %xmm3
2300 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
2301 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2304 ; AVX2-LABEL: load_sext_8i8_to_8i64:
2305 ; AVX2: # %bb.0: # %entry
2306 ; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0
2307 ; AVX2-NEXT: vpmovsxbq 4(%rdi), %ymm1
2310 ; AVX512-LABEL: load_sext_8i8_to_8i64:
2311 ; AVX512: # %bb.0: # %entry
2312 ; AVX512-NEXT: vpmovsxbq (%rdi), %zmm0
2315 ; X86-SSE2-LABEL: load_sext_8i8_to_8i64:
2316 ; X86-SSE2: # %bb.0: # %entry
2317 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
2318 ; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2319 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2320 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2321 ; X86-SSE2-NEXT: psrad $24, %xmm1
2322 ; X86-SSE2-NEXT: pxor %xmm4, %xmm4
2323 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
2324 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
2325 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
2326 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
2327 ; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
2328 ; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2329 ; X86-SSE2-NEXT: psrad $24, %xmm3
2330 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
2331 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm2
2332 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
2333 ; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
2334 ; X86-SSE2-NEXT: retl
2336 ; X86-SSE41-LABEL: load_sext_8i8_to_8i64:
2337 ; X86-SSE41: # %bb.0: # %entry
2338 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
2339 ; X86-SSE41-NEXT: pmovsxbq (%eax), %xmm0
2340 ; X86-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1
2341 ; X86-SSE41-NEXT: pmovsxbq 4(%eax), %xmm2
2342 ; X86-SSE41-NEXT: pmovsxbq 6(%eax), %xmm3
2343 ; X86-SSE41-NEXT: retl
2345 %X = load <8 x i8>, <8 x i8>* %ptr
2346 %Y = sext <8 x i8> %X to <8 x i64>
2350 define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
2351 ; SSE-LABEL: load_sext_8i1_to_8i32:
2352 ; SSE: # %bb.0: # %entry
2353 ; SSE-NEXT: movzbl (%rdi), %eax
2354 ; SSE-NEXT: movd %eax, %xmm0
2355 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
2356 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
2357 ; SSE-NEXT: movdqa %xmm1, %xmm0
2358 ; SSE-NEXT: pand %xmm2, %xmm0
2359 ; SSE-NEXT: pcmpeqd %xmm2, %xmm0
2360 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
2361 ; SSE-NEXT: pand %xmm2, %xmm1
2362 ; SSE-NEXT: pcmpeqd %xmm2, %xmm1
2365 ; AVX1-LABEL: load_sext_8i1_to_8i32:
2366 ; AVX1: # %bb.0: # %entry
2367 ; AVX1-NEXT: movzbl (%rdi), %eax
2368 ; AVX1-NEXT: vmovd %eax, %xmm0
2369 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2370 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2371 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2372 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2373 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2374 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2375 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2378 ; AVX2-LABEL: load_sext_8i1_to_8i32:
2379 ; AVX2: # %bb.0: # %entry
2380 ; AVX2-NEXT: vpbroadcastb (%rdi), %ymm0
2381 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
2382 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
2383 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
2386 ; AVX512F-LABEL: load_sext_8i1_to_8i32:
2387 ; AVX512F: # %bb.0: # %entry
2388 ; AVX512F-NEXT: movzbl (%rdi), %eax
2389 ; AVX512F-NEXT: kmovw %eax, %k1
2390 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2391 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
2392 ; AVX512F-NEXT: retq
2394 ; AVX512BW-LABEL: load_sext_8i1_to_8i32:
2395 ; AVX512BW: # %bb.0: # %entry
2396 ; AVX512BW-NEXT: movzbl (%rdi), %eax
2397 ; AVX512BW-NEXT: kmovd %eax, %k1
2398 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2399 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
2400 ; AVX512BW-NEXT: retq
2402 ; X86-SSE-LABEL: load_sext_8i1_to_8i32:
2403 ; X86-SSE: # %bb.0: # %entry
2404 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
2405 ; X86-SSE-NEXT: movzbl (%eax), %eax
2406 ; X86-SSE-NEXT: movd %eax, %xmm0
2407 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
2408 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
2409 ; X86-SSE-NEXT: movdqa %xmm1, %xmm0
2410 ; X86-SSE-NEXT: pand %xmm2, %xmm0
2411 ; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm0
2412 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
2413 ; X86-SSE-NEXT: pand %xmm2, %xmm1
2414 ; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm1
2415 ; X86-SSE-NEXT: retl
2417 %X = load <8 x i1>, <8 x i1>* %ptr
2418 %Y = sext <8 x i1> %X to <8 x i32>
2422 define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) {
2423 ; SSE2-LABEL: load_sext_8i8_to_8i32:
2424 ; SSE2: # %bb.0: # %entry
2425 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2426 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2427 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2428 ; SSE2-NEXT: psrad $24, %xmm0
2429 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
2430 ; SSE2-NEXT: psrad $24, %xmm1
2433 ; SSSE3-LABEL: load_sext_8i8_to_8i32:
2434 ; SSSE3: # %bb.0: # %entry
2435 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2436 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2437 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2438 ; SSSE3-NEXT: psrad $24, %xmm0
2439 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
2440 ; SSSE3-NEXT: psrad $24, %xmm1
2443 ; SSE41-LABEL: load_sext_8i8_to_8i32:
2444 ; SSE41: # %bb.0: # %entry
2445 ; SSE41-NEXT: pmovsxbd (%rdi), %xmm0
2446 ; SSE41-NEXT: pmovsxbd 4(%rdi), %xmm1
2449 ; AVX1-LABEL: load_sext_8i8_to_8i32:
2450 ; AVX1: # %bb.0: # %entry
2451 ; AVX1-NEXT: vpmovsxbd 4(%rdi), %xmm0
2452 ; AVX1-NEXT: vpmovsxbd (%rdi), %xmm1
2453 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2456 ; AVX2-LABEL: load_sext_8i8_to_8i32:
2457 ; AVX2: # %bb.0: # %entry
2458 ; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0
2461 ; AVX512-LABEL: load_sext_8i8_to_8i32:
2462 ; AVX512: # %bb.0: # %entry
2463 ; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0
2466 ; X86-SSE2-LABEL: load_sext_8i8_to_8i32:
2467 ; X86-SSE2: # %bb.0: # %entry
2468 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
2469 ; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2470 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2471 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2472 ; X86-SSE2-NEXT: psrad $24, %xmm0
2473 ; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
2474 ; X86-SSE2-NEXT: psrad $24, %xmm1
2475 ; X86-SSE2-NEXT: retl
2477 ; X86-SSE41-LABEL: load_sext_8i8_to_8i32:
2478 ; X86-SSE41: # %bb.0: # %entry
2479 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
2480 ; X86-SSE41-NEXT: pmovsxbd (%eax), %xmm0
2481 ; X86-SSE41-NEXT: pmovsxbd 4(%eax), %xmm1
2482 ; X86-SSE41-NEXT: retl
2484 %X = load <8 x i8>, <8 x i8>* %ptr
2485 %Y = sext <8 x i8> %X to <8 x i32>
2489 define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
2490 ; SSE2-LABEL: load_sext_16i1_to_16i8:
2491 ; SSE2: # %bb.0: # %entry
2492 ; SSE2-NEXT: movzwl (%rdi), %eax
2493 ; SSE2-NEXT: movd %eax, %xmm0
2494 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2495 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
2496 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2497 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
2498 ; SSE2-NEXT: pand %xmm1, %xmm0
2499 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
2502 ; SSSE3-LABEL: load_sext_16i1_to_16i8:
2503 ; SSSE3: # %bb.0: # %entry
2504 ; SSSE3-NEXT: movzwl (%rdi), %eax
2505 ; SSSE3-NEXT: movd %eax, %xmm0
2506 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
2507 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
2508 ; SSSE3-NEXT: pand %xmm1, %xmm0
2509 ; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
2512 ; SSE41-LABEL: load_sext_16i1_to_16i8:
2513 ; SSE41: # %bb.0: # %entry
2514 ; SSE41-NEXT: movzwl (%rdi), %eax
2515 ; SSE41-NEXT: movd %eax, %xmm0
2516 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
2517 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
2518 ; SSE41-NEXT: pand %xmm1, %xmm0
2519 ; SSE41-NEXT: pcmpeqb %xmm1, %xmm0
2522 ; AVX1-LABEL: load_sext_16i1_to_16i8:
2523 ; AVX1: # %bb.0: # %entry
2524 ; AVX1-NEXT: movzwl (%rdi), %eax
2525 ; AVX1-NEXT: vmovd %eax, %xmm0
2526 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
2527 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
2528 ; AVX1-NEXT: # xmm1 = mem[0,0]
2529 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2530 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
2533 ; AVX2-LABEL: load_sext_16i1_to_16i8:
2534 ; AVX2: # %bb.0: # %entry
2535 ; AVX2-NEXT: movzwl (%rdi), %eax
2536 ; AVX2-NEXT: vmovd %eax, %xmm0
2537 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
2538 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
2539 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2540 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
2543 ; AVX512F-LABEL: load_sext_16i1_to_16i8:
2544 ; AVX512F: # %bb.0: # %entry
2545 ; AVX512F-NEXT: kmovw (%rdi), %k1
2546 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2547 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
2548 ; AVX512F-NEXT: vzeroupper
2549 ; AVX512F-NEXT: retq
2551 ; AVX512BW-LABEL: load_sext_16i1_to_16i8:
2552 ; AVX512BW: # %bb.0: # %entry
2553 ; AVX512BW-NEXT: kmovw (%rdi), %k0
2554 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
2555 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2556 ; AVX512BW-NEXT: vzeroupper
2557 ; AVX512BW-NEXT: retq
2559 ; X86-SSE2-LABEL: load_sext_16i1_to_16i8:
2560 ; X86-SSE2: # %bb.0: # %entry
2561 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
2562 ; X86-SSE2-NEXT: movzwl (%eax), %eax
2563 ; X86-SSE2-NEXT: movd %eax, %xmm0
2564 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2565 ; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
2566 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2567 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
2568 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
2569 ; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
2570 ; X86-SSE2-NEXT: retl
2572 ; X86-SSE41-LABEL: load_sext_16i1_to_16i8:
2573 ; X86-SSE41: # %bb.0: # %entry
2574 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
2575 ; X86-SSE41-NEXT: movzwl (%eax), %eax
2576 ; X86-SSE41-NEXT: movd %eax, %xmm0
2577 ; X86-SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
2578 ; X86-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
2579 ; X86-SSE41-NEXT: pand %xmm1, %xmm0
2580 ; X86-SSE41-NEXT: pcmpeqb %xmm1, %xmm0
2581 ; X86-SSE41-NEXT: retl
2583 %X = load <16 x i1>, <16 x i1>* %ptr
2584 %Y = sext <16 x i1> %X to <16 x i8>
2588 define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
2589 ; SSE-LABEL: load_sext_16i1_to_16i16:
2590 ; SSE: # %bb.0: # %entry
2591 ; SSE-NEXT: movzwl (%rdi), %eax
2592 ; SSE-NEXT: movd %eax, %xmm0
2593 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2594 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
2595 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
2596 ; SSE-NEXT: movdqa %xmm1, %xmm0
2597 ; SSE-NEXT: pand %xmm2, %xmm0
2598 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
2599 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
2600 ; SSE-NEXT: pand %xmm2, %xmm1
2601 ; SSE-NEXT: pcmpeqw %xmm2, %xmm1
2604 ; AVX1-LABEL: load_sext_16i1_to_16i16:
2605 ; AVX1: # %bb.0: # %entry
2606 ; AVX1-NEXT: movzwl (%rdi), %eax
2607 ; AVX1-NEXT: vmovd %eax, %xmm0
2608 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2609 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2610 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2611 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2612 ; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2613 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2614 ; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2615 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2618 ; AVX2-LABEL: load_sext_16i1_to_16i16:
2619 ; AVX2: # %bb.0: # %entry
2620 ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0
2621 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
2622 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
2623 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2626 ; AVX512F-LABEL: load_sext_16i1_to_16i16:
2627 ; AVX512F: # %bb.0: # %entry
2628 ; AVX512F-NEXT: kmovw (%rdi), %k1
2629 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2630 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
2631 ; AVX512F-NEXT: retq
2633 ; AVX512BW-LABEL: load_sext_16i1_to_16i16:
2634 ; AVX512BW: # %bb.0: # %entry
2635 ; AVX512BW-NEXT: kmovw (%rdi), %k0
2636 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
2637 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
2638 ; AVX512BW-NEXT: retq
2640 ; X86-SSE-LABEL: load_sext_16i1_to_16i16:
2641 ; X86-SSE: # %bb.0: # %entry
2642 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
2643 ; X86-SSE-NEXT: movzwl (%eax), %eax
2644 ; X86-SSE-NEXT: movd %eax, %xmm0
2645 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2646 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
2647 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
2648 ; X86-SSE-NEXT: movdqa %xmm1, %xmm0
2649 ; X86-SSE-NEXT: pand %xmm2, %xmm0
2650 ; X86-SSE-NEXT: pcmpeqw %xmm2, %xmm0
2651 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
2652 ; X86-SSE-NEXT: pand %xmm2, %xmm1
2653 ; X86-SSE-NEXT: pcmpeqw %xmm2, %xmm1
2654 ; X86-SSE-NEXT: retl
2656 %X = load <16 x i1>, <16 x i1>* %ptr
2657 %Y = sext <16 x i1> %X to <16 x i16>
2661 define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
2662 ; SSE-LABEL: load_sext_32i1_to_32i8:
2663 ; SSE: # %bb.0: # %entry
2664 ; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2665 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2666 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
2667 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2668 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
2669 ; SSE-NEXT: pand %xmm2, %xmm0
2670 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
2671 ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
2672 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
2673 ; SSE-NEXT: pand %xmm2, %xmm1
2674 ; SSE-NEXT: pcmpeqb %xmm2, %xmm1
2677 ; AVX1-LABEL: load_sext_32i1_to_32i8:
2678 ; AVX1: # %bb.0: # %entry
2679 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2680 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2681 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
2682 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
2683 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2684 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
2685 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2686 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2687 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
2688 ; AVX1-NEXT: # xmm2 = mem[0,0]
2689 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
2690 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
2691 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2694 ; AVX2-LABEL: load_sext_32i1_to_32i8:
2695 ; AVX2: # %bb.0: # %entry
2696 ; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2697 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2698 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
2699 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
2700 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
2701 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
2704 ; AVX512F-LABEL: load_sext_32i1_to_32i8:
2705 ; AVX512F: # %bb.0: # %entry
2706 ; AVX512F-NEXT: kmovw (%rdi), %k1
2707 ; AVX512F-NEXT: kmovw 2(%rdi), %k2
2708 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2709 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
2710 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
2711 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
2712 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2713 ; AVX512F-NEXT: retq
2715 ; AVX512BW-LABEL: load_sext_32i1_to_32i8:
2716 ; AVX512BW: # %bb.0: # %entry
2717 ; AVX512BW-NEXT: kmovd (%rdi), %k0
2718 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
2719 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
2720 ; AVX512BW-NEXT: retq
2722 ; X86-SSE-LABEL: load_sext_32i1_to_32i8:
2723 ; X86-SSE: # %bb.0: # %entry
2724 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
2725 ; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2726 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2727 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
2728 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2729 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
2730 ; X86-SSE-NEXT: pand %xmm2, %xmm0
2731 ; X86-SSE-NEXT: pcmpeqb %xmm2, %xmm0
2732 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
2733 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
2734 ; X86-SSE-NEXT: pand %xmm2, %xmm1
2735 ; X86-SSE-NEXT: pcmpeqb %xmm2, %xmm1
2736 ; X86-SSE-NEXT: retl
2738 %X = load <32 x i1>, <32 x i1>* %ptr
2739 %Y = sext <32 x i1> %X to <32 x i8>
2743 define <16 x i16> @load_sext_16i8_to_16i16(<16 x i8> *%ptr) {
2744 ; SSE2-LABEL: load_sext_16i8_to_16i16:
2745 ; SSE2: # %bb.0: # %entry
2746 ; SSE2-NEXT: movdqa (%rdi), %xmm1
2747 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2748 ; SSE2-NEXT: psraw $8, %xmm0
2749 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2750 ; SSE2-NEXT: psraw $8, %xmm1
2753 ; SSSE3-LABEL: load_sext_16i8_to_16i16:
2754 ; SSSE3: # %bb.0: # %entry
2755 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
2756 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2757 ; SSSE3-NEXT: psraw $8, %xmm0
2758 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2759 ; SSSE3-NEXT: psraw $8, %xmm1
2762 ; SSE41-LABEL: load_sext_16i8_to_16i16:
2763 ; SSE41: # %bb.0: # %entry
2764 ; SSE41-NEXT: pmovsxbw (%rdi), %xmm0
2765 ; SSE41-NEXT: pmovsxbw 8(%rdi), %xmm1
2768 ; AVX1-LABEL: load_sext_16i8_to_16i16:
2769 ; AVX1: # %bb.0: # %entry
2770 ; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm0
2771 ; AVX1-NEXT: vpmovsxbw (%rdi), %xmm1
2772 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2775 ; AVX2-LABEL: load_sext_16i8_to_16i16:
2776 ; AVX2: # %bb.0: # %entry
2777 ; AVX2-NEXT: vpmovsxbw (%rdi), %ymm0
2780 ; AVX512-LABEL: load_sext_16i8_to_16i16:
2781 ; AVX512: # %bb.0: # %entry
2782 ; AVX512-NEXT: vpmovsxbw (%rdi), %ymm0
2785 ; X86-SSE2-LABEL: load_sext_16i8_to_16i16:
2786 ; X86-SSE2: # %bb.0: # %entry
2787 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
2788 ; X86-SSE2-NEXT: movdqa (%eax), %xmm1
2789 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2790 ; X86-SSE2-NEXT: psraw $8, %xmm0
2791 ; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2792 ; X86-SSE2-NEXT: psraw $8, %xmm1
2793 ; X86-SSE2-NEXT: retl
2795 ; X86-SSE41-LABEL: load_sext_16i8_to_16i16:
2796 ; X86-SSE41: # %bb.0: # %entry
2797 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
2798 ; X86-SSE41-NEXT: pmovsxbw (%eax), %xmm0
2799 ; X86-SSE41-NEXT: pmovsxbw 8(%eax), %xmm1
2800 ; X86-SSE41-NEXT: retl
2802 %X = load <16 x i8>, <16 x i8>* %ptr
2803 %Y = sext <16 x i8> %X to <16 x i16>
2807 define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) {
2808 ; SSE2-LABEL: load_sext_2i16_to_2i64:
2809 ; SSE2: # %bb.0: # %entry
2810 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2811 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
2812 ; SSE2-NEXT: pxor %xmm1, %xmm1
2813 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
2814 ; SSE2-NEXT: psrad $16, %xmm0
2815 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2818 ; SSSE3-LABEL: load_sext_2i16_to_2i64:
2819 ; SSSE3: # %bb.0: # %entry
2820 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2821 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
2822 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2823 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
2824 ; SSSE3-NEXT: psrad $16, %xmm0
2825 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2828 ; SSE41-LABEL: load_sext_2i16_to_2i64:
2829 ; SSE41: # %bb.0: # %entry
2830 ; SSE41-NEXT: pmovsxwq (%rdi), %xmm0
2833 ; AVX-LABEL: load_sext_2i16_to_2i64:
2834 ; AVX: # %bb.0: # %entry
2835 ; AVX-NEXT: vpmovsxwq (%rdi), %xmm0
2838 ; X86-SSE2-LABEL: load_sext_2i16_to_2i64:
2839 ; X86-SSE2: # %bb.0: # %entry
2840 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
2841 ; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2842 ; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
2843 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
2844 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
2845 ; X86-SSE2-NEXT: psrad $16, %xmm0
2846 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2847 ; X86-SSE2-NEXT: retl
2849 ; X86-SSE41-LABEL: load_sext_2i16_to_2i64:
2850 ; X86-SSE41: # %bb.0: # %entry
2851 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
2852 ; X86-SSE41-NEXT: pmovsxwq (%eax), %xmm0
2853 ; X86-SSE41-NEXT: retl
2855 %X = load <2 x i16>, <2 x i16>* %ptr
2856 %Y = sext <2 x i16> %X to <2 x i64>
2860 define <4 x i32> @load_sext_4i16_to_4i32(<4 x i16> *%ptr) {
2861 ; SSE2-LABEL: load_sext_4i16_to_4i32:
2862 ; SSE2: # %bb.0: # %entry
2863 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2864 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2865 ; SSE2-NEXT: psrad $16, %xmm0
2868 ; SSSE3-LABEL: load_sext_4i16_to_4i32:
2869 ; SSSE3: # %bb.0: # %entry
2870 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2871 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2872 ; SSSE3-NEXT: psrad $16, %xmm0
2875 ; SSE41-LABEL: load_sext_4i16_to_4i32:
2876 ; SSE41: # %bb.0: # %entry
2877 ; SSE41-NEXT: pmovsxwd (%rdi), %xmm0
2880 ; AVX-LABEL: load_sext_4i16_to_4i32:
2881 ; AVX: # %bb.0: # %entry
2882 ; AVX-NEXT: vpmovsxwd (%rdi), %xmm0
2885 ; X86-SSE2-LABEL: load_sext_4i16_to_4i32:
2886 ; X86-SSE2: # %bb.0: # %entry
2887 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
2888 ; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2889 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2890 ; X86-SSE2-NEXT: psrad $16, %xmm0
2891 ; X86-SSE2-NEXT: retl
2893 ; X86-SSE41-LABEL: load_sext_4i16_to_4i32:
2894 ; X86-SSE41: # %bb.0: # %entry
2895 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
2896 ; X86-SSE41-NEXT: pmovsxwd (%eax), %xmm0
2897 ; X86-SSE41-NEXT: retl
2899 %X = load <4 x i16>, <4 x i16>* %ptr
2900 %Y = sext <4 x i16> %X to <4 x i32>
2904 define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
2905 ; SSE2-LABEL: load_sext_4i16_to_4i64:
2906 ; SSE2: # %bb.0: # %entry
2907 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2908 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2909 ; SSE2-NEXT: psrad $16, %xmm1
2910 ; SSE2-NEXT: pxor %xmm2, %xmm2
2911 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
2912 ; SSE2-NEXT: movdqa %xmm1, %xmm0
2913 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2914 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2917 ; SSSE3-LABEL: load_sext_4i16_to_4i64:
2918 ; SSSE3: # %bb.0: # %entry
2919 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2920 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2921 ; SSSE3-NEXT: psrad $16, %xmm1
2922 ; SSSE3-NEXT: pxor %xmm2, %xmm2
2923 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
2924 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2925 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2926 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2929 ; SSE41-LABEL: load_sext_4i16_to_4i64:
2930 ; SSE41: # %bb.0: # %entry
2931 ; SSE41-NEXT: pmovsxwq (%rdi), %xmm0
2932 ; SSE41-NEXT: pmovsxwq 4(%rdi), %xmm1
2935 ; AVX1-LABEL: load_sext_4i16_to_4i64:
2936 ; AVX1: # %bb.0: # %entry
2937 ; AVX1-NEXT: vpmovsxwq 4(%rdi), %xmm0
2938 ; AVX1-NEXT: vpmovsxwq (%rdi), %xmm1
2939 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2942 ; AVX2-LABEL: load_sext_4i16_to_4i64:
2943 ; AVX2: # %bb.0: # %entry
2944 ; AVX2-NEXT: vpmovsxwq (%rdi), %ymm0
2947 ; AVX512-LABEL: load_sext_4i16_to_4i64:
2948 ; AVX512: # %bb.0: # %entry
2949 ; AVX512-NEXT: vpmovsxwq (%rdi), %ymm0
2952 ; X86-SSE2-LABEL: load_sext_4i16_to_4i64:
2953 ; X86-SSE2: # %bb.0: # %entry
2954 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
2955 ; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2956 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2957 ; X86-SSE2-NEXT: psrad $16, %xmm1
2958 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
2959 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
2960 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
2961 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2962 ; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2963 ; X86-SSE2-NEXT: retl
2965 ; X86-SSE41-LABEL: load_sext_4i16_to_4i64:
2966 ; X86-SSE41: # %bb.0: # %entry
2967 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
2968 ; X86-SSE41-NEXT: pmovsxwq (%eax), %xmm0
2969 ; X86-SSE41-NEXT: pmovsxwq 4(%eax), %xmm1
2970 ; X86-SSE41-NEXT: retl
2972 %X = load <4 x i16>, <4 x i16>* %ptr
2973 %Y = sext <4 x i16> %X to <4 x i64>
2977 define <8 x i32> @load_sext_8i16_to_8i32(<8 x i16> *%ptr) {
2978 ; SSE2-LABEL: load_sext_8i16_to_8i32:
2979 ; SSE2: # %bb.0: # %entry
2980 ; SSE2-NEXT: movdqa (%rdi), %xmm1
2981 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2982 ; SSE2-NEXT: psrad $16, %xmm0
2983 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
2984 ; SSE2-NEXT: psrad $16, %xmm1
2987 ; SSSE3-LABEL: load_sext_8i16_to_8i32:
2988 ; SSSE3: # %bb.0: # %entry
2989 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
2990 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2991 ; SSSE3-NEXT: psrad $16, %xmm0
2992 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
2993 ; SSSE3-NEXT: psrad $16, %xmm1
2996 ; SSE41-LABEL: load_sext_8i16_to_8i32:
2997 ; SSE41: # %bb.0: # %entry
2998 ; SSE41-NEXT: pmovsxwd (%rdi), %xmm0
2999 ; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1
3002 ; AVX1-LABEL: load_sext_8i16_to_8i32:
3003 ; AVX1: # %bb.0: # %entry
3004 ; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm0
3005 ; AVX1-NEXT: vpmovsxwd (%rdi), %xmm1
3006 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3009 ; AVX2-LABEL: load_sext_8i16_to_8i32:
3010 ; AVX2: # %bb.0: # %entry
3011 ; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0
3014 ; AVX512-LABEL: load_sext_8i16_to_8i32:
3015 ; AVX512: # %bb.0: # %entry
3016 ; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0
3019 ; X86-SSE2-LABEL: load_sext_8i16_to_8i32:
3020 ; X86-SSE2: # %bb.0: # %entry
3021 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
3022 ; X86-SSE2-NEXT: movdqa (%eax), %xmm1
3023 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3024 ; X86-SSE2-NEXT: psrad $16, %xmm0
3025 ; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
3026 ; X86-SSE2-NEXT: psrad $16, %xmm1
3027 ; X86-SSE2-NEXT: retl
3029 ; X86-SSE41-LABEL: load_sext_8i16_to_8i32:
3030 ; X86-SSE41: # %bb.0: # %entry
3031 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
3032 ; X86-SSE41-NEXT: pmovsxwd (%eax), %xmm0
3033 ; X86-SSE41-NEXT: pmovsxwd 8(%eax), %xmm1
3034 ; X86-SSE41-NEXT: retl
3036 %X = load <8 x i16>, <8 x i16>* %ptr
3037 %Y = sext <8 x i16> %X to <8 x i32>
3041 define <2 x i64> @load_sext_2i32_to_2i64(<2 x i32> *%ptr) {
3042 ; SSE2-LABEL: load_sext_2i32_to_2i64:
3043 ; SSE2: # %bb.0: # %entry
3044 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3045 ; SSE2-NEXT: pxor %xmm1, %xmm1
3046 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
3047 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3050 ; SSSE3-LABEL: load_sext_2i32_to_2i64:
3051 ; SSSE3: # %bb.0: # %entry
3052 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3053 ; SSSE3-NEXT: pxor %xmm1, %xmm1
3054 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
3055 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3058 ; SSE41-LABEL: load_sext_2i32_to_2i64:
3059 ; SSE41: # %bb.0: # %entry
3060 ; SSE41-NEXT: pmovsxdq (%rdi), %xmm0
3063 ; AVX-LABEL: load_sext_2i32_to_2i64:
3064 ; AVX: # %bb.0: # %entry
3065 ; AVX-NEXT: vpmovsxdq (%rdi), %xmm0
3068 ; X86-SSE2-LABEL: load_sext_2i32_to_2i64:
3069 ; X86-SSE2: # %bb.0: # %entry
3070 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
3071 ; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3072 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
3073 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
3074 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3075 ; X86-SSE2-NEXT: retl
3077 ; X86-SSE41-LABEL: load_sext_2i32_to_2i64:
3078 ; X86-SSE41: # %bb.0: # %entry
3079 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
3080 ; X86-SSE41-NEXT: pmovsxdq (%eax), %xmm0
3081 ; X86-SSE41-NEXT: retl
3083 %X = load <2 x i32>, <2 x i32>* %ptr
3084 %Y = sext <2 x i32> %X to <2 x i64>
3088 define <4 x i64> @load_sext_4i32_to_4i64(<4 x i32> *%ptr) {
3089 ; SSE2-LABEL: load_sext_4i32_to_4i64:
3090 ; SSE2: # %bb.0: # %entry
3091 ; SSE2-NEXT: movdqa (%rdi), %xmm0
3092 ; SSE2-NEXT: pxor %xmm2, %xmm2
3093 ; SSE2-NEXT: pxor %xmm3, %xmm3
3094 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
3095 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
3096 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3097 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
3098 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3101 ; SSSE3-LABEL: load_sext_4i32_to_4i64:
3102 ; SSSE3: # %bb.0: # %entry
3103 ; SSSE3-NEXT: movdqa (%rdi), %xmm0
3104 ; SSSE3-NEXT: pxor %xmm2, %xmm2
3105 ; SSSE3-NEXT: pxor %xmm3, %xmm3
3106 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
3107 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
3108 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3109 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
3110 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3113 ; SSE41-LABEL: load_sext_4i32_to_4i64:
3114 ; SSE41: # %bb.0: # %entry
3115 ; SSE41-NEXT: pmovsxdq (%rdi), %xmm0
3116 ; SSE41-NEXT: pmovsxdq 8(%rdi), %xmm1
3119 ; AVX1-LABEL: load_sext_4i32_to_4i64:
3120 ; AVX1: # %bb.0: # %entry
3121 ; AVX1-NEXT: vpmovsxdq 8(%rdi), %xmm0
3122 ; AVX1-NEXT: vpmovsxdq (%rdi), %xmm1
3123 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3126 ; AVX2-LABEL: load_sext_4i32_to_4i64:
3127 ; AVX2: # %bb.0: # %entry
3128 ; AVX2-NEXT: vpmovsxdq (%rdi), %ymm0
3131 ; AVX512-LABEL: load_sext_4i32_to_4i64:
3132 ; AVX512: # %bb.0: # %entry
3133 ; AVX512-NEXT: vpmovsxdq (%rdi), %ymm0
3136 ; X86-SSE2-LABEL: load_sext_4i32_to_4i64:
3137 ; X86-SSE2: # %bb.0: # %entry
3138 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
3139 ; X86-SSE2-NEXT: movdqa (%eax), %xmm0
3140 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
3141 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
3142 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
3143 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
3144 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3145 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
3146 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3147 ; X86-SSE2-NEXT: retl
3149 ; X86-SSE41-LABEL: load_sext_4i32_to_4i64:
3150 ; X86-SSE41: # %bb.0: # %entry
3151 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
3152 ; X86-SSE41-NEXT: pmovsxdq (%eax), %xmm0
3153 ; X86-SSE41-NEXT: pmovsxdq 8(%eax), %xmm1
3154 ; X86-SSE41-NEXT: retl
3156 %X = load <4 x i32>, <4 x i32>* %ptr
3157 %Y = sext <4 x i32> %X to <4 x i64>
3161 define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp {
3162 ; SSE2-LABEL: sext_2i8_to_i32:
3163 ; SSE2: # %bb.0: # %entry
3164 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3165 ; SSE2-NEXT: psraw $8, %xmm0
3166 ; SSE2-NEXT: movd %xmm0, %eax
3169 ; SSSE3-LABEL: sext_2i8_to_i32:
3170 ; SSSE3: # %bb.0: # %entry
3171 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3172 ; SSSE3-NEXT: psraw $8, %xmm0
3173 ; SSSE3-NEXT: movd %xmm0, %eax
3176 ; SSE41-LABEL: sext_2i8_to_i32:
3177 ; SSE41: # %bb.0: # %entry
3178 ; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
3179 ; SSE41-NEXT: movd %xmm0, %eax
3182 ; AVX-LABEL: sext_2i8_to_i32:
3183 ; AVX: # %bb.0: # %entry
3184 ; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
3185 ; AVX-NEXT: vmovd %xmm0, %eax
3188 ; X86-SSE2-LABEL: sext_2i8_to_i32:
3189 ; X86-SSE2: # %bb.0: # %entry
3190 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3191 ; X86-SSE2-NEXT: psraw $8, %xmm0
3192 ; X86-SSE2-NEXT: movd %xmm0, %eax
3193 ; X86-SSE2-NEXT: retl
3195 ; X86-SSE41-LABEL: sext_2i8_to_i32:
3196 ; X86-SSE41: # %bb.0: # %entry
3197 ; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm0
3198 ; X86-SSE41-NEXT: movd %xmm0, %eax
3199 ; X86-SSE41-NEXT: retl
3201 %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
3202 %Ex = sext <2 x i8> %Shuf to <2 x i16>
3203 %Bc = bitcast <2 x i16> %Ex to i32
3207 define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
3208 ; SSE2-LABEL: sext_4i1_to_4i64:
3210 ; SSE2-NEXT: pslld $31, %xmm0
3211 ; SSE2-NEXT: psrad $31, %xmm0
3212 ; SSE2-NEXT: pxor %xmm2, %xmm2
3213 ; SSE2-NEXT: pxor %xmm3, %xmm3
3214 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
3215 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
3216 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3217 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
3218 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3221 ; SSSE3-LABEL: sext_4i1_to_4i64:
3223 ; SSSE3-NEXT: pslld $31, %xmm0
3224 ; SSSE3-NEXT: psrad $31, %xmm0
3225 ; SSSE3-NEXT: pxor %xmm2, %xmm2
3226 ; SSSE3-NEXT: pxor %xmm3, %xmm3
3227 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
3228 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
3229 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3230 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
3231 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3234 ; SSE41-LABEL: sext_4i1_to_4i64:
3236 ; SSE41-NEXT: pslld $31, %xmm0
3237 ; SSE41-NEXT: psrad $31, %xmm0
3238 ; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
3239 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
3240 ; SSE41-NEXT: pmovsxdq %xmm0, %xmm1
3241 ; SSE41-NEXT: movdqa %xmm2, %xmm0
3244 ; AVX1-LABEL: sext_4i1_to_4i64:
3246 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
3247 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
3248 ; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
3249 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
3250 ; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
3251 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3254 ; AVX2-LABEL: sext_4i1_to_4i64:
3256 ; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
3257 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
3258 ; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
3261 ; AVX512-LABEL: sext_4i1_to_4i64:
3263 ; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
3264 ; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
3265 ; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0
3268 ; X86-SSE2-LABEL: sext_4i1_to_4i64:
3269 ; X86-SSE2: # %bb.0:
3270 ; X86-SSE2-NEXT: pslld $31, %xmm0
3271 ; X86-SSE2-NEXT: psrad $31, %xmm0
3272 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
3273 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
3274 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
3275 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
3276 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3277 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
3278 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3279 ; X86-SSE2-NEXT: retl
3281 ; X86-SSE41-LABEL: sext_4i1_to_4i64:
3282 ; X86-SSE41: # %bb.0:
3283 ; X86-SSE41-NEXT: pslld $31, %xmm0
3284 ; X86-SSE41-NEXT: psrad $31, %xmm0
3285 ; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm2
3286 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
3287 ; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm1
3288 ; X86-SSE41-NEXT: movdqa %xmm2, %xmm0
3289 ; X86-SSE41-NEXT: retl
3290 %extmask = sext <4 x i1> %mask to <4 x i64>
3291 ret <4 x i64> %extmask
3294 define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
3295 ; SSE2-LABEL: sext_4i8_to_4i64:
3297 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3298 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3299 ; SSE2-NEXT: psrad $24, %xmm1
3300 ; SSE2-NEXT: pxor %xmm2, %xmm2
3301 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
3302 ; SSE2-NEXT: movdqa %xmm1, %xmm0
3303 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3304 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
3307 ; SSSE3-LABEL: sext_4i8_to_4i64:
3309 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3310 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3311 ; SSSE3-NEXT: psrad $24, %xmm1
3312 ; SSSE3-NEXT: pxor %xmm2, %xmm2
3313 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
3314 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
3315 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3316 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
3319 ; SSE41-LABEL: sext_4i8_to_4i64:
3321 ; SSE41-NEXT: pmovsxbq %xmm0, %xmm2
3322 ; SSE41-NEXT: psrld $16, %xmm0
3323 ; SSE41-NEXT: pmovsxbq %xmm0, %xmm1
3324 ; SSE41-NEXT: movdqa %xmm2, %xmm0
3327 ; AVX1-LABEL: sext_4i8_to_4i64:
3329 ; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1
3330 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
3331 ; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0
3332 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3335 ; AVX2-LABEL: sext_4i8_to_4i64:
3337 ; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0
3340 ; AVX512-LABEL: sext_4i8_to_4i64:
3342 ; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0
3345 ; X86-SSE2-LABEL: sext_4i8_to_4i64:
3346 ; X86-SSE2: # %bb.0:
3347 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3348 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3349 ; X86-SSE2-NEXT: psrad $24, %xmm1
3350 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
3351 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
3352 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
3353 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3354 ; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
3355 ; X86-SSE2-NEXT: retl
3357 ; X86-SSE41-LABEL: sext_4i8_to_4i64:
3358 ; X86-SSE41: # %bb.0:
3359 ; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm2
3360 ; X86-SSE41-NEXT: psrld $16, %xmm0
3361 ; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm1
3362 ; X86-SSE41-NEXT: movdqa %xmm2, %xmm0
3363 ; X86-SSE41-NEXT: retl
3364 %extmask = sext <4 x i8> %mask to <4 x i64>
3365 ret <4 x i64> %extmask
3368 define <32 x i8> @sext_32xi1_to_32xi8(<32 x i16> %c1, <32 x i16> %c2)nounwind {
3369 ; SSE-LABEL: sext_32xi1_to_32xi8:
3371 ; SSE-NEXT: pcmpeqw %xmm5, %xmm1
3372 ; SSE-NEXT: pcmpeqw %xmm4, %xmm0
3373 ; SSE-NEXT: packsswb %xmm1, %xmm0
3374 ; SSE-NEXT: pcmpeqw %xmm7, %xmm3
3375 ; SSE-NEXT: pcmpeqw %xmm6, %xmm2
3376 ; SSE-NEXT: packsswb %xmm3, %xmm2
3377 ; SSE-NEXT: movdqa %xmm2, %xmm1
3380 ; AVX1-LABEL: sext_32xi1_to_32xi8:
3382 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
3383 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
3384 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4
3385 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
3386 ; AVX1-NEXT: vpacksswb %xmm4, %xmm1, %xmm1
3387 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
3388 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
3389 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm4, %xmm3
3390 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
3391 ; AVX1-NEXT: vpacksswb %xmm3, %xmm0, %xmm0
3392 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3395 ; AVX2-LABEL: sext_32xi1_to_32xi8:
3397 ; AVX2-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
3398 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
3399 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
3400 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3403 ; AVX512F-LABEL: sext_32xi1_to_32xi8:
3405 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
3406 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
3407 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
3408 ; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
3409 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3410 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
3411 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
3412 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
3413 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3414 ; AVX512F-NEXT: retq
3416 ; AVX512BW-LABEL: sext_32xi1_to_32xi8:
3417 ; AVX512BW: # %bb.0:
3418 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
3419 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
3420 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
3421 ; AVX512BW-NEXT: retq
3423 ; X86-SSE-LABEL: sext_32xi1_to_32xi8:
3425 ; X86-SSE-NEXT: pushl %ebp
3426 ; X86-SSE-NEXT: movl %esp, %ebp
3427 ; X86-SSE-NEXT: andl $-16, %esp
3428 ; X86-SSE-NEXT: subl $16, %esp
3429 ; X86-SSE-NEXT: movdqa 8(%ebp), %xmm3
3430 ; X86-SSE-NEXT: pcmpeqw 40(%ebp), %xmm1
3431 ; X86-SSE-NEXT: pcmpeqw 24(%ebp), %xmm0
3432 ; X86-SSE-NEXT: packsswb %xmm1, %xmm0
3433 ; X86-SSE-NEXT: pcmpeqw 72(%ebp), %xmm3
3434 ; X86-SSE-NEXT: pcmpeqw 56(%ebp), %xmm2
3435 ; X86-SSE-NEXT: packsswb %xmm3, %xmm2
3436 ; X86-SSE-NEXT: movdqa %xmm2, %xmm1
3437 ; X86-SSE-NEXT: movl %ebp, %esp
3438 ; X86-SSE-NEXT: popl %ebp
3439 ; X86-SSE-NEXT: retl
3440 %a = icmp eq <32 x i16> %c1, %c2
3441 %b = sext <32 x i1> %a to <32 x i8>
3445 define <2 x i32> @sext_2i8_to_2i32(<2 x i8>* %addr) {
3446 ; SSE2-LABEL: sext_2i8_to_2i32:
3448 ; SSE2-NEXT: movzwl (%rdi), %eax
3449 ; SSE2-NEXT: movd %eax, %xmm0
3450 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3451 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3452 ; SSE2-NEXT: psrad $24, %xmm0
3453 ; SSE2-NEXT: paddd %xmm0, %xmm0
3456 ; SSSE3-LABEL: sext_2i8_to_2i32:
3458 ; SSSE3-NEXT: movzwl (%rdi), %eax
3459 ; SSSE3-NEXT: movd %eax, %xmm0
3460 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3461 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3462 ; SSSE3-NEXT: psrad $24, %xmm0
3463 ; SSSE3-NEXT: paddd %xmm0, %xmm0
3466 ; SSE41-LABEL: sext_2i8_to_2i32:
3468 ; SSE41-NEXT: movzwl (%rdi), %eax
3469 ; SSE41-NEXT: movd %eax, %xmm0
3470 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
3471 ; SSE41-NEXT: paddd %xmm0, %xmm0
3474 ; AVX-LABEL: sext_2i8_to_2i32:
3476 ; AVX-NEXT: movzwl (%rdi), %eax
3477 ; AVX-NEXT: vmovd %eax, %xmm0
3478 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
3479 ; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
3482 ; X86-SSE2-LABEL: sext_2i8_to_2i32:
3483 ; X86-SSE2: # %bb.0:
3484 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
3485 ; X86-SSE2-NEXT: movzwl (%eax), %eax
3486 ; X86-SSE2-NEXT: movd %eax, %xmm0
3487 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3488 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3489 ; X86-SSE2-NEXT: psrad $24, %xmm0
3490 ; X86-SSE2-NEXT: paddd %xmm0, %xmm0
3491 ; X86-SSE2-NEXT: retl
3493 ; X86-SSE41-LABEL: sext_2i8_to_2i32:
3494 ; X86-SSE41: # %bb.0:
3495 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
3496 ; X86-SSE41-NEXT: movzwl (%eax), %eax
3497 ; X86-SSE41-NEXT: movd %eax, %xmm0
3498 ; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm0
3499 ; X86-SSE41-NEXT: paddd %xmm0, %xmm0
3500 ; X86-SSE41-NEXT: retl
3501 %x = load <2 x i8>, <2 x i8>* %addr, align 1
3502 %y = sext <2 x i8> %x to <2 x i32>
3503 %z = add <2 x i32>%y, %y
3507 define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) {
3508 ; SSE2-LABEL: sext_4i17_to_4i32:
3510 ; SSE2-NEXT: movq (%rdi), %rax
3511 ; SSE2-NEXT: movl %eax, %ecx
3512 ; SSE2-NEXT: shll $15, %ecx
3513 ; SSE2-NEXT: sarl $15, %ecx
3514 ; SSE2-NEXT: movd %ecx, %xmm0
3515 ; SSE2-NEXT: movq %rax, %rcx
3516 ; SSE2-NEXT: shrq $17, %rcx
3517 ; SSE2-NEXT: shll $15, %ecx
3518 ; SSE2-NEXT: sarl $15, %ecx
3519 ; SSE2-NEXT: movd %ecx, %xmm1
3520 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3521 ; SSE2-NEXT: movl 8(%rdi), %ecx
3522 ; SSE2-NEXT: shll $13, %ecx
3523 ; SSE2-NEXT: movq %rax, %rdx
3524 ; SSE2-NEXT: shrq $51, %rdx
3525 ; SSE2-NEXT: orl %ecx, %edx
3526 ; SSE2-NEXT: shll $15, %edx
3527 ; SSE2-NEXT: sarl $15, %edx
3528 ; SSE2-NEXT: movd %edx, %xmm1
3529 ; SSE2-NEXT: shrq $34, %rax
3530 ; SSE2-NEXT: shll $15, %eax
3531 ; SSE2-NEXT: sarl $15, %eax
3532 ; SSE2-NEXT: movd %eax, %xmm2
3533 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
3534 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
3537 ; SSSE3-LABEL: sext_4i17_to_4i32:
3539 ; SSSE3-NEXT: movq (%rdi), %rax
3540 ; SSSE3-NEXT: movl %eax, %ecx
3541 ; SSSE3-NEXT: shll $15, %ecx
3542 ; SSSE3-NEXT: sarl $15, %ecx
3543 ; SSSE3-NEXT: movd %ecx, %xmm0
3544 ; SSSE3-NEXT: movq %rax, %rcx
3545 ; SSSE3-NEXT: shrq $17, %rcx
3546 ; SSSE3-NEXT: shll $15, %ecx
3547 ; SSSE3-NEXT: sarl $15, %ecx
3548 ; SSSE3-NEXT: movd %ecx, %xmm1
3549 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3550 ; SSSE3-NEXT: movl 8(%rdi), %ecx
3551 ; SSSE3-NEXT: shll $13, %ecx
3552 ; SSSE3-NEXT: movq %rax, %rdx
3553 ; SSSE3-NEXT: shrq $51, %rdx
3554 ; SSSE3-NEXT: orl %ecx, %edx
3555 ; SSSE3-NEXT: shll $15, %edx
3556 ; SSSE3-NEXT: sarl $15, %edx
3557 ; SSSE3-NEXT: movd %edx, %xmm1
3558 ; SSSE3-NEXT: shrq $34, %rax
3559 ; SSSE3-NEXT: shll $15, %eax
3560 ; SSSE3-NEXT: sarl $15, %eax
3561 ; SSSE3-NEXT: movd %eax, %xmm2
3562 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
3563 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
3566 ; SSE41-LABEL: sext_4i17_to_4i32:
3568 ; SSE41-NEXT: movq (%rdi), %rax
3569 ; SSE41-NEXT: movq %rax, %rcx
3570 ; SSE41-NEXT: shrq $17, %rcx
3571 ; SSE41-NEXT: shll $15, %ecx
3572 ; SSE41-NEXT: sarl $15, %ecx
3573 ; SSE41-NEXT: movl %eax, %edx
3574 ; SSE41-NEXT: shll $15, %edx
3575 ; SSE41-NEXT: sarl $15, %edx
3576 ; SSE41-NEXT: movd %edx, %xmm0
3577 ; SSE41-NEXT: pinsrd $1, %ecx, %xmm0
3578 ; SSE41-NEXT: movq %rax, %rcx
3579 ; SSE41-NEXT: shrq $34, %rcx
3580 ; SSE41-NEXT: shll $15, %ecx
3581 ; SSE41-NEXT: sarl $15, %ecx
3582 ; SSE41-NEXT: pinsrd $2, %ecx, %xmm0
3583 ; SSE41-NEXT: movl 8(%rdi), %ecx
3584 ; SSE41-NEXT: shll $13, %ecx
3585 ; SSE41-NEXT: shrq $51, %rax
3586 ; SSE41-NEXT: orl %ecx, %eax
3587 ; SSE41-NEXT: shll $15, %eax
3588 ; SSE41-NEXT: sarl $15, %eax
3589 ; SSE41-NEXT: pinsrd $3, %eax, %xmm0
3592 ; AVX-LABEL: sext_4i17_to_4i32:
3594 ; AVX-NEXT: movq (%rdi), %rax
3595 ; AVX-NEXT: movq %rax, %rcx
3596 ; AVX-NEXT: shrq $17, %rcx
3597 ; AVX-NEXT: shll $15, %ecx
3598 ; AVX-NEXT: sarl $15, %ecx
3599 ; AVX-NEXT: movl %eax, %edx
3600 ; AVX-NEXT: shll $15, %edx
3601 ; AVX-NEXT: sarl $15, %edx
3602 ; AVX-NEXT: vmovd %edx, %xmm0
3603 ; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
3604 ; AVX-NEXT: movq %rax, %rcx
3605 ; AVX-NEXT: shrq $34, %rcx
3606 ; AVX-NEXT: shll $15, %ecx
3607 ; AVX-NEXT: sarl $15, %ecx
3608 ; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
3609 ; AVX-NEXT: movl 8(%rdi), %ecx
3610 ; AVX-NEXT: shll $13, %ecx
3611 ; AVX-NEXT: shrq $51, %rax
3612 ; AVX-NEXT: orl %ecx, %eax
3613 ; AVX-NEXT: shll $15, %eax
3614 ; AVX-NEXT: sarl $15, %eax
3615 ; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
3618 ; X86-SSE2-LABEL: sext_4i17_to_4i32:
3619 ; X86-SSE2: # %bb.0:
3620 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
3621 ; X86-SSE2-NEXT: movl (%eax), %ecx
3622 ; X86-SSE2-NEXT: movl 4(%eax), %edx
3623 ; X86-SSE2-NEXT: movl 8(%eax), %eax
3624 ; X86-SSE2-NEXT: shldl $13, %edx, %eax
3625 ; X86-SSE2-NEXT: shll $15, %eax
3626 ; X86-SSE2-NEXT: sarl $15, %eax
3627 ; X86-SSE2-NEXT: movd %eax, %xmm0
3628 ; X86-SSE2-NEXT: movl %edx, %eax
3629 ; X86-SSE2-NEXT: shll $13, %eax
3630 ; X86-SSE2-NEXT: sarl $15, %eax
3631 ; X86-SSE2-NEXT: movd %eax, %xmm1
3632 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3633 ; X86-SSE2-NEXT: shldl $15, %ecx, %edx
3634 ; X86-SSE2-NEXT: shll $15, %ecx
3635 ; X86-SSE2-NEXT: sarl $15, %ecx
3636 ; X86-SSE2-NEXT: movd %ecx, %xmm0
3637 ; X86-SSE2-NEXT: shll $15, %edx
3638 ; X86-SSE2-NEXT: sarl $15, %edx
3639 ; X86-SSE2-NEXT: movd %edx, %xmm2
3640 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3641 ; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3642 ; X86-SSE2-NEXT: retl
3644 ; X86-SSE41-LABEL: sext_4i17_to_4i32:
3645 ; X86-SSE41: # %bb.0:
3646 ; X86-SSE41-NEXT: pushl %esi
3647 ; X86-SSE41-NEXT: .cfi_def_cfa_offset 8
3648 ; X86-SSE41-NEXT: .cfi_offset %esi, -8
3649 ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
3650 ; X86-SSE41-NEXT: movl (%eax), %ecx
3651 ; X86-SSE41-NEXT: movl 4(%eax), %edx
3652 ; X86-SSE41-NEXT: movl %edx, %esi
3653 ; X86-SSE41-NEXT: movl 8(%eax), %eax
3654 ; X86-SSE41-NEXT: shldl $13, %edx, %eax
3655 ; X86-SSE41-NEXT: shldl $15, %ecx, %edx
3656 ; X86-SSE41-NEXT: shll $15, %edx
3657 ; X86-SSE41-NEXT: sarl $15, %edx
3658 ; X86-SSE41-NEXT: shll $15, %ecx
3659 ; X86-SSE41-NEXT: sarl $15, %ecx
3660 ; X86-SSE41-NEXT: movd %ecx, %xmm0
3661 ; X86-SSE41-NEXT: pinsrd $1, %edx, %xmm0
3662 ; X86-SSE41-NEXT: shll $13, %esi
3663 ; X86-SSE41-NEXT: sarl $15, %esi
3664 ; X86-SSE41-NEXT: pinsrd $2, %esi, %xmm0
3665 ; X86-SSE41-NEXT: shll $15, %eax
3666 ; X86-SSE41-NEXT: sarl $15, %eax
3667 ; X86-SSE41-NEXT: pinsrd $3, %eax, %xmm0
3668 ; X86-SSE41-NEXT: popl %esi
3669 ; X86-SSE41-NEXT: .cfi_def_cfa_offset 4
3670 ; X86-SSE41-NEXT: retl
3671 %a = load <4 x i17>, <4 x i17>* %ptr
3672 %b = sext <4 x i17> %a to <4 x i32>
3676 define <8 x i64> @sext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp {
3677 ; SSE2-LABEL: sext_8i6_to_8i64:
3678 ; SSE2: # %bb.0: # %entry
3679 ; SSE2-NEXT: movd %edi, %xmm0
3680 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3681 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
3682 ; SSE2-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
3683 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0]
3684 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
3685 ; SSE2-NEXT: psllq $58, %xmm0
3686 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3687 ; SSE2-NEXT: psrad $31, %xmm1
3688 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
3689 ; SSE2-NEXT: psrad $26, %xmm0
3690 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
3691 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3692 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
3693 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5]
3694 ; SSE2-NEXT: psllq $58, %xmm1
3695 ; SSE2-NEXT: movdqa %xmm1, %xmm2
3696 ; SSE2-NEXT: psrad $31, %xmm2
3697 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
3698 ; SSE2-NEXT: psrad $26, %xmm1
3699 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
3700 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3701 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2]
3702 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5]
3703 ; SSE2-NEXT: psllq $58, %xmm2
3704 ; SSE2-NEXT: movdqa %xmm2, %xmm4
3705 ; SSE2-NEXT: psrad $31, %xmm4
3706 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
3707 ; SSE2-NEXT: psrad $26, %xmm2
3708 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
3709 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
3710 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
3711 ; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5]
3712 ; SSE2-NEXT: psllq $58, %xmm3
3713 ; SSE2-NEXT: movdqa %xmm3, %xmm4
3714 ; SSE2-NEXT: psrad $31, %xmm4
3715 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
3716 ; SSE2-NEXT: psrad $26, %xmm3
3717 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
3718 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
3721 ; SSSE3-LABEL: sext_8i6_to_8i64:
3722 ; SSSE3: # %bb.0: # %entry
3723 ; SSSE3-NEXT: movd %edi, %xmm0
3724 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3725 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
3726 ; SSSE3-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
3727 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0]
3728 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
3729 ; SSSE3-NEXT: psllq $58, %xmm0
3730 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
3731 ; SSSE3-NEXT: psrad $31, %xmm1
3732 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
3733 ; SSSE3-NEXT: psrad $26, %xmm0
3734 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
3735 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3736 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
3737 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5]
3738 ; SSSE3-NEXT: psllq $58, %xmm1
3739 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
3740 ; SSSE3-NEXT: psrad $31, %xmm2
3741 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
3742 ; SSSE3-NEXT: psrad $26, %xmm1
3743 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
3744 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3745 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2]
3746 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5]
3747 ; SSSE3-NEXT: psllq $58, %xmm2
3748 ; SSSE3-NEXT: movdqa %xmm2, %xmm4
3749 ; SSSE3-NEXT: psrad $31, %xmm4
3750 ; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
3751 ; SSSE3-NEXT: psrad $26, %xmm2
3752 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
3753 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
3754 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
3755 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5]
3756 ; SSSE3-NEXT: psllq $58, %xmm3
3757 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
3758 ; SSSE3-NEXT: psrad $31, %xmm4
3759 ; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
3760 ; SSSE3-NEXT: psrad $26, %xmm3
3761 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
3762 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
3765 ; SSE41-LABEL: sext_8i6_to_8i64:
3766 ; SSE41: # %bb.0: # %entry
3767 ; SSE41-NEXT: movd %edi, %xmm0
3768 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3769 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
3770 ; SSE41-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
3771 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
3772 ; SSE41-NEXT: psllq $58, %xmm0
3773 ; SSE41-NEXT: movdqa %xmm0, %xmm1
3774 ; SSE41-NEXT: psrad $31, %xmm1
3775 ; SSE41-NEXT: psrad $26, %xmm0
3776 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3777 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
3778 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
3779 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
3780 ; SSE41-NEXT: psllq $58, %xmm1
3781 ; SSE41-NEXT: movdqa %xmm1, %xmm2
3782 ; SSE41-NEXT: psrad $31, %xmm2
3783 ; SSE41-NEXT: psrad $26, %xmm1
3784 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3785 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
3786 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
3787 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
3788 ; SSE41-NEXT: psllq $58, %xmm2
3789 ; SSE41-NEXT: movdqa %xmm2, %xmm4
3790 ; SSE41-NEXT: psrad $31, %xmm4
3791 ; SSE41-NEXT: psrad $26, %xmm2
3792 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
3793 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
3794 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
3795 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
3796 ; SSE41-NEXT: psllq $58, %xmm3
3797 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3798 ; SSE41-NEXT: psrad $31, %xmm4
3799 ; SSE41-NEXT: psrad $26, %xmm3
3800 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3801 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7]
3804 ; AVX1-LABEL: sext_8i6_to_8i64:
3805 ; AVX1: # %bb.0: # %entry
3806 ; AVX1-NEXT: vmovd %edi, %xmm0
3807 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3808 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3809 ; AVX1-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3810 ; AVX1-NEXT: vpsllw $10, %xmm0, %xmm0
3811 ; AVX1-NEXT: vpsraw $10, %xmm0, %xmm1
3812 ; AVX1-NEXT: vpmovsxwq %xmm1, %xmm0
3813 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
3814 ; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2
3815 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3816 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
3817 ; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2
3818 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
3819 ; AVX1-NEXT: vpmovsxwq %xmm1, %xmm1
3820 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
3823 ; AVX2-LABEL: sext_8i6_to_8i64:
3824 ; AVX2: # %bb.0: # %entry
3825 ; AVX2-NEXT: vmovd %edi, %xmm0
3826 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
3827 ; AVX2-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3828 ; AVX2-NEXT: vpsllw $10, %xmm0, %xmm0
3829 ; AVX2-NEXT: vpsraw $10, %xmm0, %xmm1
3830 ; AVX2-NEXT: vpmovsxwq %xmm1, %ymm0
3831 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3832 ; AVX2-NEXT: vpmovsxwq %xmm1, %ymm1
3835 ; AVX512-LABEL: sext_8i6_to_8i64:
3836 ; AVX512: # %bb.0: # %entry
3837 ; AVX512-NEXT: vmovd %edi, %xmm0
3838 ; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0
3839 ; AVX512-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3840 ; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3841 ; AVX512-NEXT: vpsllq $58, %zmm0, %zmm0
3842 ; AVX512-NEXT: vpsraq $58, %zmm0, %zmm0
3845 ; X86-SSE2-LABEL: sext_8i6_to_8i64:
3846 ; X86-SSE2: # %bb.0: # %entry
3847 ; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3848 ; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3849 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
3850 ; X86-SSE2-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3
3851 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0]
3852 ; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
3853 ; X86-SSE2-NEXT: psllq $58, %xmm0
3854 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
3855 ; X86-SSE2-NEXT: psrad $31, %xmm1
3856 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
3857 ; X86-SSE2-NEXT: psrad $26, %xmm0
3858 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
3859 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3860 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
3861 ; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5]
3862 ; X86-SSE2-NEXT: psllq $58, %xmm1
3863 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
3864 ; X86-SSE2-NEXT: psrad $31, %xmm2
3865 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
3866 ; X86-SSE2-NEXT: psrad $26, %xmm1
3867 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
3868 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3869 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2]
3870 ; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5]
3871 ; X86-SSE2-NEXT: psllq $58, %xmm2
3872 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
3873 ; X86-SSE2-NEXT: psrad $31, %xmm4
3874 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
3875 ; X86-SSE2-NEXT: psrad $26, %xmm2
3876 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
3877 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
3878 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
3879 ; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5]
3880 ; X86-SSE2-NEXT: psllq $58, %xmm3
3881 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm4
3882 ; X86-SSE2-NEXT: psrad $31, %xmm4
3883 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
3884 ; X86-SSE2-NEXT: psrad $26, %xmm3
3885 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
3886 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
3887 ; X86-SSE2-NEXT: retl
3889 ; X86-SSE41-LABEL: sext_8i6_to_8i64:
3890 ; X86-SSE41: # %bb.0: # %entry
3891 ; X86-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3892 ; X86-SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3893 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
3894 ; X86-SSE41-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3
3895 ; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
3896 ; X86-SSE41-NEXT: psllq $58, %xmm0
3897 ; X86-SSE41-NEXT: movdqa %xmm0, %xmm1
3898 ; X86-SSE41-NEXT: psrad $31, %xmm1
3899 ; X86-SSE41-NEXT: psrad $26, %xmm0
3900 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3901 ; X86-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
3902 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
3903 ; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
3904 ; X86-SSE41-NEXT: psllq $58, %xmm1
3905 ; X86-SSE41-NEXT: movdqa %xmm1, %xmm2
3906 ; X86-SSE41-NEXT: psrad $31, %xmm2
3907 ; X86-SSE41-NEXT: psrad $26, %xmm1
3908 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3909 ; X86-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
3910 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
3911 ; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
3912 ; X86-SSE41-NEXT: psllq $58, %xmm2
3913 ; X86-SSE41-NEXT: movdqa %xmm2, %xmm4
3914 ; X86-SSE41-NEXT: psrad $31, %xmm4
3915 ; X86-SSE41-NEXT: psrad $26, %xmm2
3916 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
3917 ; X86-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
3918 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
3919 ; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
3920 ; X86-SSE41-NEXT: psllq $58, %xmm3
3921 ; X86-SSE41-NEXT: movdqa %xmm3, %xmm4
3922 ; X86-SSE41-NEXT: psrad $31, %xmm4
3923 ; X86-SSE41-NEXT: psrad $26, %xmm3
3924 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3925 ; X86-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7]
3926 ; X86-SSE41-NEXT: retl
3928 %a = trunc i32 %x to i6
3929 %b = insertelement <8 x i6> undef, i6 %a, i32 0
3930 %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer
3931 %d = add <8 x i6> %c, <i6 0, i6 1, i6 2, i6 3, i6 4, i6 5, i6 6, i6 7>
3932 %e = sext <8 x i6> %d to <8 x i64>
3936 define <8 x i32> @zext_negate_sext(<8 x i8> %x) {
3937 ; SSE2-LABEL: zext_negate_sext:
3939 ; SSE2-NEXT: pxor %xmm1, %xmm1
3940 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3941 ; SSE2-NEXT: psubw %xmm0, %xmm1
3942 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3943 ; SSE2-NEXT: psrad $16, %xmm0
3944 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
3945 ; SSE2-NEXT: psrad $16, %xmm1
3948 ; SSSE3-LABEL: zext_negate_sext:
3950 ; SSSE3-NEXT: pxor %xmm1, %xmm1
3951 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3952 ; SSSE3-NEXT: psubw %xmm0, %xmm1
3953 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3954 ; SSSE3-NEXT: psrad $16, %xmm0
3955 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
3956 ; SSSE3-NEXT: psrad $16, %xmm1
3959 ; SSE41-LABEL: zext_negate_sext:
3961 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3962 ; SSE41-NEXT: pxor %xmm1, %xmm1
3963 ; SSE41-NEXT: psubw %xmm0, %xmm1
3964 ; SSE41-NEXT: pmovsxwd %xmm1, %xmm0
3965 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3966 ; SSE41-NEXT: pmovsxwd %xmm1, %xmm1
3969 ; AVX1-LABEL: zext_negate_sext:
3971 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
3972 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
3973 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
3974 ; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
3975 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3976 ; AVX1-NEXT: vpsubd %xmm0, %xmm2, %xmm0
3977 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3980 ; AVX2-LABEL: zext_negate_sext:
3982 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3983 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
3984 ; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0
3987 ; AVX512-LABEL: zext_negate_sext:
3989 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3990 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
3991 ; AVX512-NEXT: vpsubd %ymm0, %ymm1, %ymm0
3994 ; X86-SSE2-LABEL: zext_negate_sext:
3995 ; X86-SSE2: # %bb.0:
3996 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
3997 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3998 ; X86-SSE2-NEXT: psubw %xmm0, %xmm1
3999 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4000 ; X86-SSE2-NEXT: psrad $16, %xmm0
4001 ; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
4002 ; X86-SSE2-NEXT: psrad $16, %xmm1
4003 ; X86-SSE2-NEXT: retl
4005 ; X86-SSE41-LABEL: zext_negate_sext:
4006 ; X86-SSE41: # %bb.0:
4007 ; X86-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4008 ; X86-SSE41-NEXT: pxor %xmm1, %xmm1
4009 ; X86-SSE41-NEXT: psubw %xmm0, %xmm1
4010 ; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm0
4011 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
4012 ; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm1
4013 ; X86-SSE41-NEXT: retl
4014 %z = zext <8 x i8> %x to <8 x i16>
4015 %neg = sub nsw <8 x i16> zeroinitializer, %z
4016 %r = sext <8 x i16> %neg to <8 x i32>
4020 define <8 x i32> @zext_decremenet_sext(<8 x i8> %x) {
4021 ; SSE2-LABEL: zext_decremenet_sext:
4023 ; SSE2-NEXT: pxor %xmm1, %xmm1
4024 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4025 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
4026 ; SSE2-NEXT: paddw %xmm0, %xmm1
4027 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4028 ; SSE2-NEXT: psrad $16, %xmm0
4029 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
4030 ; SSE2-NEXT: psrad $16, %xmm1
4033 ; SSSE3-LABEL: zext_decremenet_sext:
4035 ; SSSE3-NEXT: pxor %xmm1, %xmm1
4036 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4037 ; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
4038 ; SSSE3-NEXT: paddw %xmm0, %xmm1
4039 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4040 ; SSSE3-NEXT: psrad $16, %xmm0
4041 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
4042 ; SSSE3-NEXT: psrad $16, %xmm1
4045 ; SSE41-LABEL: zext_decremenet_sext:
4047 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4048 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
4049 ; SSE41-NEXT: paddw %xmm0, %xmm1
4050 ; SSE41-NEXT: pmovsxwd %xmm1, %xmm0
4051 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
4052 ; SSE41-NEXT: pmovsxwd %xmm1, %xmm1
4055 ; AVX1-LABEL: zext_decremenet_sext:
4057 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
4058 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
4059 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
4060 ; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
4061 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
4062 ; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
4063 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4066 ; AVX2-LABEL: zext_decremenet_sext:
4068 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
4069 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
4070 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
4073 ; AVX512-LABEL: zext_decremenet_sext:
4075 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
4076 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
4077 ; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
4080 ; X86-SSE2-LABEL: zext_decremenet_sext:
4081 ; X86-SSE2: # %bb.0:
4082 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
4083 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4084 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
4085 ; X86-SSE2-NEXT: paddw %xmm0, %xmm1
4086 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4087 ; X86-SSE2-NEXT: psrad $16, %xmm0
4088 ; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
4089 ; X86-SSE2-NEXT: psrad $16, %xmm1
4090 ; X86-SSE2-NEXT: retl
4092 ; X86-SSE41-LABEL: zext_decremenet_sext:
4093 ; X86-SSE41: # %bb.0:
4094 ; X86-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4095 ; X86-SSE41-NEXT: pcmpeqd %xmm1, %xmm1
4096 ; X86-SSE41-NEXT: paddw %xmm0, %xmm1
4097 ; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm0
4098 ; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
4099 ; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm1
4100 ; X86-SSE41-NEXT: retl
4101 %z = zext <8 x i8> %x to <8 x i16>
4102 %dec = add <8 x i16> %z, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
4103 %r = sext <8 x i16> %dec to <8 x i32>