1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-SLOW
7 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST
8 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
9 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
11 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
12 ; SSE2-LABEL: zext_16i8_to_8i16:
13 ; SSE2: # %bb.0: # %entry
14 ; SSE2-NEXT: pxor %xmm1, %xmm1
15 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
18 ; SSSE3-LABEL: zext_16i8_to_8i16:
19 ; SSSE3: # %bb.0: # %entry
20 ; SSSE3-NEXT: pxor %xmm1, %xmm1
21 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
24 ; SSE41-LABEL: zext_16i8_to_8i16:
25 ; SSE41: # %bb.0: # %entry
26 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
29 ; AVX-LABEL: zext_16i8_to_8i16:
30 ; AVX: # %bb.0: # %entry
31 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
34 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
35 %C = zext <8 x i8> %B to <8 x i16>
40 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
41 ; SSE2-LABEL: zext_16i8_to_16i16:
42 ; SSE2: # %bb.0: # %entry
43 ; SSE2-NEXT: movdqa %xmm0, %xmm1
44 ; SSE2-NEXT: pxor %xmm2, %xmm2
45 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
46 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
49 ; SSSE3-LABEL: zext_16i8_to_16i16:
50 ; SSSE3: # %bb.0: # %entry
51 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
52 ; SSSE3-NEXT: pxor %xmm2, %xmm2
53 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
54 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
57 ; SSE41-LABEL: zext_16i8_to_16i16:
58 ; SSE41: # %bb.0: # %entry
59 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
60 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
61 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
62 ; SSE41-NEXT: movdqa %xmm2, %xmm0
65 ; AVX1-LABEL: zext_16i8_to_16i16:
66 ; AVX1: # %bb.0: # %entry
67 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
68 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
69 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
70 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
73 ; AVX2-LABEL: zext_16i8_to_16i16:
74 ; AVX2: # %bb.0: # %entry
75 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
78 ; AVX512-LABEL: zext_16i8_to_16i16:
79 ; AVX512: # %bb.0: # %entry
80 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
83 %B = zext <16 x i8> %A to <16 x i16>
87 define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) {
88 ; SSE2-LABEL: zext_32i8_to_32i16:
89 ; SSE2: # %bb.0: # %entry
90 ; SSE2-NEXT: movdqa %xmm1, %xmm3
91 ; SSE2-NEXT: movdqa %xmm0, %xmm1
92 ; SSE2-NEXT: pxor %xmm4, %xmm4
93 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
94 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
95 ; SSE2-NEXT: movdqa %xmm3, %xmm2
96 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
97 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
100 ; SSSE3-LABEL: zext_32i8_to_32i16:
101 ; SSSE3: # %bb.0: # %entry
102 ; SSSE3-NEXT: movdqa %xmm1, %xmm3
103 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
104 ; SSSE3-NEXT: pxor %xmm4, %xmm4
105 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
106 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
107 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
108 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
109 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
112 ; SSE41-LABEL: zext_32i8_to_32i16:
113 ; SSE41: # %bb.0: # %entry
114 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
115 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
116 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
117 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
118 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
119 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
120 ; SSE41-NEXT: movdqa %xmm5, %xmm0
121 ; SSE41-NEXT: movdqa %xmm4, %xmm1
124 ; AVX1-LABEL: zext_32i8_to_32i16:
125 ; AVX1: # %bb.0: # %entry
126 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
127 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
128 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
129 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
130 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
131 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
132 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
133 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
134 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
137 ; AVX2-LABEL: zext_32i8_to_32i16:
138 ; AVX2: # %bb.0: # %entry
139 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
140 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
141 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
142 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
145 ; AVX512F-LABEL: zext_32i8_to_32i16:
146 ; AVX512F: # %bb.0: # %entry
147 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
148 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
149 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
150 ; AVX512F-NEXT: vmovdqa %ymm2, %ymm0
153 ; AVX512BW-LABEL: zext_32i8_to_32i16:
154 ; AVX512BW: # %bb.0: # %entry
155 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
156 ; AVX512BW-NEXT: retq
158 %B = zext <32 x i8> %A to <32 x i16>
162 define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
163 ; SSE2-LABEL: zext_16i8_to_4i32:
164 ; SSE2: # %bb.0: # %entry
165 ; SSE2-NEXT: pxor %xmm1, %xmm1
166 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
167 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
170 ; SSSE3-LABEL: zext_16i8_to_4i32:
171 ; SSSE3: # %bb.0: # %entry
172 ; SSSE3-NEXT: pxor %xmm1, %xmm1
173 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
174 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
177 ; SSE41-LABEL: zext_16i8_to_4i32:
178 ; SSE41: # %bb.0: # %entry
179 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
182 ; AVX-LABEL: zext_16i8_to_4i32:
183 ; AVX: # %bb.0: # %entry
184 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
187 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
188 %C = zext <4 x i8> %B to <4 x i32>
192 define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
193 ; SSE2-LABEL: zext_16i8_to_8i32:
194 ; SSE2: # %bb.0: # %entry
195 ; SSE2-NEXT: movdqa %xmm0, %xmm1
196 ; SSE2-NEXT: pxor %xmm2, %xmm2
197 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
198 ; SSE2-NEXT: movdqa %xmm1, %xmm0
199 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
200 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
203 ; SSSE3-LABEL: zext_16i8_to_8i32:
204 ; SSSE3: # %bb.0: # %entry
205 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
206 ; SSSE3-NEXT: pxor %xmm2, %xmm2
207 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
208 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
209 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
210 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
213 ; SSE41-LABEL: zext_16i8_to_8i32:
214 ; SSE41: # %bb.0: # %entry
215 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
216 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
217 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
218 ; SSE41-NEXT: movdqa %xmm2, %xmm0
221 ; AVX1-LABEL: zext_16i8_to_8i32:
222 ; AVX1: # %bb.0: # %entry
223 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
224 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
225 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
226 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
229 ; AVX2-LABEL: zext_16i8_to_8i32:
230 ; AVX2: # %bb.0: # %entry
231 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
234 ; AVX512-LABEL: zext_16i8_to_8i32:
235 ; AVX512: # %bb.0: # %entry
236 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
239 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
240 %C = zext <8 x i8> %B to <8 x i32>
244 define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp {
245 ; SSE2-LABEL: zext_16i8_to_16i32:
246 ; SSE2: # %bb.0: # %entry
247 ; SSE2-NEXT: movdqa %xmm0, %xmm3
248 ; SSE2-NEXT: pxor %xmm4, %xmm4
249 ; SSE2-NEXT: movdqa %xmm0, %xmm1
250 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
251 ; SSE2-NEXT: movdqa %xmm1, %xmm0
252 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
253 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
254 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
255 ; SSE2-NEXT: movdqa %xmm3, %xmm2
256 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
257 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
260 ; SSSE3-LABEL: zext_16i8_to_16i32:
261 ; SSSE3: # %bb.0: # %entry
262 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
263 ; SSSE3-NEXT: pxor %xmm4, %xmm4
264 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
265 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
266 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
267 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
268 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
269 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
270 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
271 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
272 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
275 ; SSE41-LABEL: zext_16i8_to_16i32:
276 ; SSE41: # %bb.0: # %entry
277 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
278 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
279 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
280 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
281 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
282 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
283 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
284 ; SSE41-NEXT: movdqa %xmm4, %xmm0
287 ; AVX1-LABEL: zext_16i8_to_16i32:
288 ; AVX1: # %bb.0: # %entry
289 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
290 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
291 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
292 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
293 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
294 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
295 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,0,1]
296 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
297 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
298 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
301 ; AVX2-LABEL: zext_16i8_to_16i32:
302 ; AVX2: # %bb.0: # %entry
303 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
304 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
305 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
306 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
309 ; AVX512-LABEL: zext_16i8_to_16i32:
310 ; AVX512: # %bb.0: # %entry
311 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
314 %B = zext <16 x i8> %A to <16 x i32>
318 define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
319 ; SSE2-LABEL: zext_16i8_to_2i64:
320 ; SSE2: # %bb.0: # %entry
321 ; SSE2-NEXT: pxor %xmm1, %xmm1
322 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
323 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
324 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
327 ; SSSE3-LABEL: zext_16i8_to_2i64:
328 ; SSSE3: # %bb.0: # %entry
329 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
332 ; SSE41-LABEL: zext_16i8_to_2i64:
333 ; SSE41: # %bb.0: # %entry
334 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
337 ; AVX-LABEL: zext_16i8_to_2i64:
338 ; AVX: # %bb.0: # %entry
339 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
342 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
343 %C = zext <2 x i8> %B to <2 x i64>
347 define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
348 ; SSE2-LABEL: zext_16i8_to_4i64:
349 ; SSE2: # %bb.0: # %entry
350 ; SSE2-NEXT: movdqa %xmm0, %xmm1
351 ; SSE2-NEXT: pxor %xmm2, %xmm2
352 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
353 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
354 ; SSE2-NEXT: movdqa %xmm1, %xmm0
355 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
356 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
359 ; SSSE3-LABEL: zext_16i8_to_4i64:
360 ; SSSE3: # %bb.0: # %entry
361 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
362 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
363 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
366 ; SSE41-LABEL: zext_16i8_to_4i64:
367 ; SSE41: # %bb.0: # %entry
368 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
369 ; SSE41-NEXT: psrld $16, %xmm0
370 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
371 ; SSE41-NEXT: movdqa %xmm2, %xmm0
374 ; AVX1-LABEL: zext_16i8_to_4i64:
375 ; AVX1: # %bb.0: # %entry
376 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
377 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
378 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
379 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
382 ; AVX2-LABEL: zext_16i8_to_4i64:
383 ; AVX2: # %bb.0: # %entry
384 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
387 ; AVX512-LABEL: zext_16i8_to_4i64:
388 ; AVX512: # %bb.0: # %entry
389 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
392 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
393 %C = zext <4 x i8> %B to <4 x i64>
397 define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp {
398 ; SSE2-LABEL: zext_16i8_to_8i64:
399 ; SSE2: # %bb.0: # %entry
400 ; SSE2-NEXT: movdqa %xmm0, %xmm3
401 ; SSE2-NEXT: pxor %xmm4, %xmm4
402 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
403 ; SSE2-NEXT: movdqa %xmm3, %xmm1
404 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
405 ; SSE2-NEXT: movdqa %xmm1, %xmm0
406 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
407 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
408 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
409 ; SSE2-NEXT: movdqa %xmm3, %xmm2
410 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
411 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
414 ; SSSE3-LABEL: zext_16i8_to_8i64:
415 ; SSSE3: # %bb.0: # %entry
416 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
417 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
418 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
419 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
420 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
421 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero
422 ; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero
425 ; SSE41-LABEL: zext_16i8_to_8i64:
426 ; SSE41: # %bb.0: # %entry
427 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
428 ; SSE41-NEXT: movdqa %xmm0, %xmm1
429 ; SSE41-NEXT: psrld $16, %xmm1
430 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
431 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
432 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
433 ; SSE41-NEXT: psrlq $48, %xmm0
434 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
435 ; SSE41-NEXT: movdqa %xmm4, %xmm0
438 ; AVX1-LABEL: zext_16i8_to_8i64:
439 ; AVX1: # %bb.0: # %entry
440 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
441 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2
442 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
443 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
444 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
445 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
446 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
447 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
448 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
449 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
452 ; AVX2-LABEL: zext_16i8_to_8i64:
453 ; AVX2: # %bb.0: # %entry
454 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
455 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
456 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
457 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
460 ; AVX512-LABEL: zext_16i8_to_8i64:
461 ; AVX512: # %bb.0: # %entry
462 ; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
465 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
466 %C = zext <8 x i8> %B to <8 x i64>
470 define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
471 ; SSE2-LABEL: zext_8i16_to_4i32:
472 ; SSE2: # %bb.0: # %entry
473 ; SSE2-NEXT: pxor %xmm1, %xmm1
474 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
477 ; SSSE3-LABEL: zext_8i16_to_4i32:
478 ; SSSE3: # %bb.0: # %entry
479 ; SSSE3-NEXT: pxor %xmm1, %xmm1
480 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
483 ; SSE41-LABEL: zext_8i16_to_4i32:
484 ; SSE41: # %bb.0: # %entry
485 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
488 ; AVX-LABEL: zext_8i16_to_4i32:
489 ; AVX: # %bb.0: # %entry
490 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
493 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
494 %C = zext <4 x i16> %B to <4 x i32>
498 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
499 ; SSE2-LABEL: zext_8i16_to_8i32:
500 ; SSE2: # %bb.0: # %entry
501 ; SSE2-NEXT: movdqa %xmm0, %xmm1
502 ; SSE2-NEXT: pxor %xmm2, %xmm2
503 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
504 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
507 ; SSSE3-LABEL: zext_8i16_to_8i32:
508 ; SSSE3: # %bb.0: # %entry
509 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
510 ; SSSE3-NEXT: pxor %xmm2, %xmm2
511 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
512 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
515 ; SSE41-LABEL: zext_8i16_to_8i32:
516 ; SSE41: # %bb.0: # %entry
517 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
518 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
519 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
520 ; SSE41-NEXT: movdqa %xmm2, %xmm0
523 ; AVX1-LABEL: zext_8i16_to_8i32:
524 ; AVX1: # %bb.0: # %entry
525 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
526 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
527 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
528 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
531 ; AVX2-LABEL: zext_8i16_to_8i32:
532 ; AVX2: # %bb.0: # %entry
533 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
536 ; AVX512-LABEL: zext_8i16_to_8i32:
537 ; AVX512: # %bb.0: # %entry
538 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
541 %B = zext <8 x i16> %A to <8 x i32>
545 define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp {
546 ; SSE2-LABEL: zext_16i16_to_16i32:
547 ; SSE2: # %bb.0: # %entry
548 ; SSE2-NEXT: movdqa %xmm1, %xmm3
549 ; SSE2-NEXT: movdqa %xmm0, %xmm1
550 ; SSE2-NEXT: pxor %xmm4, %xmm4
551 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
552 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
553 ; SSE2-NEXT: movdqa %xmm3, %xmm2
554 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
555 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
558 ; SSSE3-LABEL: zext_16i16_to_16i32:
559 ; SSSE3: # %bb.0: # %entry
560 ; SSSE3-NEXT: movdqa %xmm1, %xmm3
561 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
562 ; SSSE3-NEXT: pxor %xmm4, %xmm4
563 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
564 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
565 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
566 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
567 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
570 ; SSE41-LABEL: zext_16i16_to_16i32:
571 ; SSE41: # %bb.0: # %entry
572 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
573 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
574 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
575 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
576 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
577 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
578 ; SSE41-NEXT: movdqa %xmm5, %xmm0
579 ; SSE41-NEXT: movdqa %xmm4, %xmm1
582 ; AVX1-LABEL: zext_16i16_to_16i32:
583 ; AVX1: # %bb.0: # %entry
584 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
585 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
586 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
587 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
588 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
589 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
590 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
591 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
592 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
595 ; AVX2-LABEL: zext_16i16_to_16i32:
596 ; AVX2: # %bb.0: # %entry
597 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
598 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
599 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
600 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
603 ; AVX512-LABEL: zext_16i16_to_16i32:
604 ; AVX512: # %bb.0: # %entry
605 ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
608 %B = zext <16 x i16> %A to <16 x i32>
612 define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
613 ; SSE2-LABEL: zext_8i16_to_2i64:
614 ; SSE2: # %bb.0: # %entry
615 ; SSE2-NEXT: pxor %xmm1, %xmm1
616 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
617 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
620 ; SSSE3-LABEL: zext_8i16_to_2i64:
621 ; SSSE3: # %bb.0: # %entry
622 ; SSSE3-NEXT: pxor %xmm1, %xmm1
623 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
624 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
627 ; SSE41-LABEL: zext_8i16_to_2i64:
628 ; SSE41: # %bb.0: # %entry
629 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
632 ; AVX-LABEL: zext_8i16_to_2i64:
633 ; AVX: # %bb.0: # %entry
634 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
637 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
638 %C = zext <2 x i16> %B to <2 x i64>
642 define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
643 ; SSE2-LABEL: zext_8i16_to_4i64:
644 ; SSE2: # %bb.0: # %entry
645 ; SSE2-NEXT: movdqa %xmm0, %xmm1
646 ; SSE2-NEXT: pxor %xmm2, %xmm2
647 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
648 ; SSE2-NEXT: movdqa %xmm1, %xmm0
649 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
650 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
653 ; SSSE3-LABEL: zext_8i16_to_4i64:
654 ; SSSE3: # %bb.0: # %entry
655 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
656 ; SSSE3-NEXT: pxor %xmm2, %xmm2
657 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
658 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
659 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
660 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
663 ; SSE41-LABEL: zext_8i16_to_4i64:
664 ; SSE41: # %bb.0: # %entry
665 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
666 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
667 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
668 ; SSE41-NEXT: movdqa %xmm2, %xmm0
671 ; AVX1-LABEL: zext_8i16_to_4i64:
672 ; AVX1: # %bb.0: # %entry
673 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
674 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
675 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
676 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
679 ; AVX2-LABEL: zext_8i16_to_4i64:
680 ; AVX2: # %bb.0: # %entry
681 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
684 ; AVX512-LABEL: zext_8i16_to_4i64:
685 ; AVX512: # %bb.0: # %entry
686 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
689 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
690 %C = zext <4 x i16> %B to <4 x i64>
694 define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp {
695 ; SSE2-LABEL: zext_8i16_to_8i64:
696 ; SSE2: # %bb.0: # %entry
697 ; SSE2-NEXT: movdqa %xmm0, %xmm3
698 ; SSE2-NEXT: pxor %xmm4, %xmm4
699 ; SSE2-NEXT: movdqa %xmm0, %xmm1
700 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
701 ; SSE2-NEXT: movdqa %xmm1, %xmm0
702 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
703 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
704 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
705 ; SSE2-NEXT: movdqa %xmm3, %xmm2
706 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
707 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
710 ; SSSE3-LABEL: zext_8i16_to_8i64:
711 ; SSSE3: # %bb.0: # %entry
712 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
713 ; SSSE3-NEXT: pxor %xmm4, %xmm4
714 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
715 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
716 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
717 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
718 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
719 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
720 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
721 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
722 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
725 ; SSE41-LABEL: zext_8i16_to_8i64:
726 ; SSE41: # %bb.0: # %entry
727 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
728 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
729 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
730 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
731 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
732 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
733 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
734 ; SSE41-NEXT: movdqa %xmm4, %xmm0
737 ; AVX1-LABEL: zext_8i16_to_8i64:
738 ; AVX1: # %bb.0: # %entry
739 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
740 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
741 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
742 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
743 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
744 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
745 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,0,1]
746 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
747 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
748 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
751 ; AVX2-LABEL: zext_8i16_to_8i64:
752 ; AVX2: # %bb.0: # %entry
753 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
754 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
755 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
756 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
759 ; AVX512-LABEL: zext_8i16_to_8i64:
760 ; AVX512: # %bb.0: # %entry
761 ; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
764 %B = zext <8 x i16> %A to <8 x i64>
768 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
769 ; SSE2-LABEL: zext_4i32_to_2i64:
770 ; SSE2: # %bb.0: # %entry
771 ; SSE2-NEXT: xorps %xmm1, %xmm1
772 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
775 ; SSSE3-LABEL: zext_4i32_to_2i64:
776 ; SSSE3: # %bb.0: # %entry
777 ; SSSE3-NEXT: xorps %xmm1, %xmm1
778 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
781 ; SSE41-LABEL: zext_4i32_to_2i64:
782 ; SSE41: # %bb.0: # %entry
783 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
786 ; AVX-LABEL: zext_4i32_to_2i64:
787 ; AVX: # %bb.0: # %entry
788 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
791 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
792 %C = zext <2 x i32> %B to <2 x i64>
796 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
797 ; SSE2-LABEL: zext_4i32_to_4i64:
798 ; SSE2: # %bb.0: # %entry
799 ; SSE2-NEXT: movaps %xmm0, %xmm1
800 ; SSE2-NEXT: xorps %xmm2, %xmm2
801 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
802 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
805 ; SSSE3-LABEL: zext_4i32_to_4i64:
806 ; SSSE3: # %bb.0: # %entry
807 ; SSSE3-NEXT: movaps %xmm0, %xmm1
808 ; SSSE3-NEXT: xorps %xmm2, %xmm2
809 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
810 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
813 ; SSE41-LABEL: zext_4i32_to_4i64:
814 ; SSE41: # %bb.0: # %entry
815 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
816 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
817 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
818 ; SSE41-NEXT: movdqa %xmm2, %xmm0
821 ; AVX1-LABEL: zext_4i32_to_4i64:
822 ; AVX1: # %bb.0: # %entry
823 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
824 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
825 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
826 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
829 ; AVX2-LABEL: zext_4i32_to_4i64:
830 ; AVX2: # %bb.0: # %entry
831 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
834 ; AVX512-LABEL: zext_4i32_to_4i64:
835 ; AVX512: # %bb.0: # %entry
836 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
839 %B = zext <4 x i32> %A to <4 x i64>
843 define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp {
844 ; SSE2-LABEL: zext_8i32_to_8i64:
845 ; SSE2: # %bb.0: # %entry
846 ; SSE2-NEXT: movaps %xmm1, %xmm3
847 ; SSE2-NEXT: movaps %xmm0, %xmm1
848 ; SSE2-NEXT: xorps %xmm4, %xmm4
849 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
850 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
851 ; SSE2-NEXT: movaps %xmm3, %xmm2
852 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
853 ; SSE2-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
856 ; SSSE3-LABEL: zext_8i32_to_8i64:
857 ; SSSE3: # %bb.0: # %entry
858 ; SSSE3-NEXT: movaps %xmm1, %xmm3
859 ; SSSE3-NEXT: movaps %xmm0, %xmm1
860 ; SSSE3-NEXT: xorps %xmm4, %xmm4
861 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
862 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
863 ; SSSE3-NEXT: movaps %xmm3, %xmm2
864 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
865 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
868 ; SSE41-LABEL: zext_8i32_to_8i64:
869 ; SSE41: # %bb.0: # %entry
870 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero
871 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
872 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
873 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
874 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
875 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
876 ; SSE41-NEXT: movdqa %xmm5, %xmm0
877 ; SSE41-NEXT: movdqa %xmm4, %xmm1
880 ; AVX1-LABEL: zext_8i32_to_8i64:
881 ; AVX1: # %bb.0: # %entry
882 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
883 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
884 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
885 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
886 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
887 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
888 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
889 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
890 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
893 ; AVX2-LABEL: zext_8i32_to_8i64:
894 ; AVX2: # %bb.0: # %entry
895 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
896 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
897 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
898 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
901 ; AVX512-LABEL: zext_8i32_to_8i64:
902 ; AVX512: # %bb.0: # %entry
903 ; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
906 %B = zext <8 x i32> %A to <8 x i64>
910 define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
911 ; SSE2-LABEL: load_zext_2i8_to_2i64:
912 ; SSE2: # %bb.0: # %entry
913 ; SSE2-NEXT: movzwl (%rdi), %eax
914 ; SSE2-NEXT: movd %eax, %xmm0
915 ; SSE2-NEXT: pxor %xmm1, %xmm1
916 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
917 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
918 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
921 ; SSSE3-LABEL: load_zext_2i8_to_2i64:
922 ; SSSE3: # %bb.0: # %entry
923 ; SSSE3-NEXT: movzwl (%rdi), %eax
924 ; SSSE3-NEXT: movd %eax, %xmm0
925 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
928 ; SSE41-LABEL: load_zext_2i8_to_2i64:
929 ; SSE41: # %bb.0: # %entry
930 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
933 ; AVX-LABEL: load_zext_2i8_to_2i64:
934 ; AVX: # %bb.0: # %entry
935 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
938 %X = load <2 x i8>, <2 x i8>* %ptr
939 %Y = zext <2 x i8> %X to <2 x i64>
943 define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
944 ; SSE2-LABEL: load_zext_4i8_to_4i32:
945 ; SSE2: # %bb.0: # %entry
946 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
947 ; SSE2-NEXT: pxor %xmm1, %xmm1
948 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
949 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
952 ; SSSE3-LABEL: load_zext_4i8_to_4i32:
953 ; SSSE3: # %bb.0: # %entry
954 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
955 ; SSSE3-NEXT: pxor %xmm1, %xmm1
956 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
957 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
960 ; SSE41-LABEL: load_zext_4i8_to_4i32:
961 ; SSE41: # %bb.0: # %entry
962 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
965 ; AVX-LABEL: load_zext_4i8_to_4i32:
966 ; AVX: # %bb.0: # %entry
967 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
970 %X = load <4 x i8>, <4 x i8>* %ptr
971 %Y = zext <4 x i8> %X to <4 x i32>
975 define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
976 ; SSE2-LABEL: load_zext_4i8_to_4i64:
977 ; SSE2: # %bb.0: # %entry
978 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
979 ; SSE2-NEXT: pxor %xmm2, %xmm2
980 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
981 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
982 ; SSE2-NEXT: movdqa %xmm1, %xmm0
983 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
984 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
987 ; SSSE3-LABEL: load_zext_4i8_to_4i64:
988 ; SSSE3: # %bb.0: # %entry
989 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
990 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
991 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
992 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
995 ; SSE41-LABEL: load_zext_4i8_to_4i64:
996 ; SSE41: # %bb.0: # %entry
997 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
998 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1001 ; AVX1-LABEL: load_zext_4i8_to_4i64:
1002 ; AVX1: # %bb.0: # %entry
1003 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1004 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1005 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1008 ; AVX2-LABEL: load_zext_4i8_to_4i64:
1009 ; AVX2: # %bb.0: # %entry
1010 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1013 ; AVX512-LABEL: load_zext_4i8_to_4i64:
1014 ; AVX512: # %bb.0: # %entry
1015 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1018 %X = load <4 x i8>, <4 x i8>* %ptr
1019 %Y = zext <4 x i8> %X to <4 x i64>
1023 define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
1024 ; SSE2-LABEL: load_zext_8i8_to_8i16:
1025 ; SSE2: # %bb.0: # %entry
1026 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1027 ; SSE2-NEXT: pxor %xmm1, %xmm1
1028 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1031 ; SSSE3-LABEL: load_zext_8i8_to_8i16:
1032 ; SSSE3: # %bb.0: # %entry
1033 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1034 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1035 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1038 ; SSE41-LABEL: load_zext_8i8_to_8i16:
1039 ; SSE41: # %bb.0: # %entry
1040 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1043 ; AVX-LABEL: load_zext_8i8_to_8i16:
1044 ; AVX: # %bb.0: # %entry
1045 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1048 %X = load <8 x i8>, <8 x i8>* %ptr
1049 %Y = zext <8 x i8> %X to <8 x i16>
1053 define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
1054 ; SSE2-LABEL: load_zext_8i8_to_8i32:
1055 ; SSE2: # %bb.0: # %entry
1056 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1057 ; SSE2-NEXT: pxor %xmm2, %xmm2
1058 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1059 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1060 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1061 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1064 ; SSSE3-LABEL: load_zext_8i8_to_8i32:
1065 ; SSSE3: # %bb.0: # %entry
1066 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1067 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1068 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1069 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1070 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1071 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1074 ; SSE41-LABEL: load_zext_8i8_to_8i32:
1075 ; SSE41: # %bb.0: # %entry
1076 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1077 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1080 ; AVX1-LABEL: load_zext_8i8_to_8i32:
1081 ; AVX1: # %bb.0: # %entry
1082 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1083 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1084 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1087 ; AVX2-LABEL: load_zext_8i8_to_8i32:
1088 ; AVX2: # %bb.0: # %entry
1089 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1092 ; AVX512-LABEL: load_zext_8i8_to_8i32:
1093 ; AVX512: # %bb.0: # %entry
1094 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1097 %X = load <8 x i8>, <8 x i8>* %ptr
1098 %Y = zext <8 x i8> %X to <8 x i32>
1102 define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) {
1103 ; SSE2-LABEL: load_zext_16i8_to_8i32:
1104 ; SSE2: # %bb.0: # %entry
1105 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1106 ; SSE2-NEXT: pxor %xmm2, %xmm2
1107 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1108 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1109 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1110 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1113 ; SSSE3-LABEL: load_zext_16i8_to_8i32:
1114 ; SSSE3: # %bb.0: # %entry
1115 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1116 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1117 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1118 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1119 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1120 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1123 ; SSE41-LABEL: load_zext_16i8_to_8i32:
1124 ; SSE41: # %bb.0: # %entry
1125 ; SSE41-NEXT: movdqa (%rdi), %xmm1
1126 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1127 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
1128 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1131 ; AVX1-LABEL: load_zext_16i8_to_8i32:
1132 ; AVX1: # %bb.0: # %entry
1133 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
1134 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1135 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1136 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1137 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1140 ; AVX2-LABEL: load_zext_16i8_to_8i32:
1141 ; AVX2: # %bb.0: # %entry
1142 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1145 ; AVX512-LABEL: load_zext_16i8_to_8i32:
1146 ; AVX512: # %bb.0: # %entry
1147 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1150 %X = load <16 x i8>, <16 x i8>* %ptr
1151 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1152 %Z = zext <8 x i8> %Y to <8 x i32>
1156 define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) {
1157 ; SSE2-LABEL: load_zext_8i8_to_8i64:
1158 ; SSE2: # %bb.0: # %entry
1159 ; SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
1160 ; SSE2-NEXT: pxor %xmm4, %xmm4
1161 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1162 ; SSE2-NEXT: movdqa %xmm3, %xmm1
1163 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1164 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1165 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
1166 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1167 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1168 ; SSE2-NEXT: movdqa %xmm3, %xmm2
1169 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
1170 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
1173 ; SSSE3-LABEL: load_zext_8i8_to_8i64:
1174 ; SSSE3: # %bb.0: # %entry
1175 ; SSSE3-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
1176 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
1177 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1178 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
1179 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
1180 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
1181 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero
1182 ; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero
1185 ; SSE41-LABEL: load_zext_8i8_to_8i64:
1186 ; SSE41: # %bb.0: # %entry
1187 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1188 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1189 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1190 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1193 ; AVX1-LABEL: load_zext_8i8_to_8i64:
1194 ; AVX1: # %bb.0: # %entry
1195 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1196 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1197 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1198 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1199 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
1200 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
1203 ; AVX2-LABEL: load_zext_8i8_to_8i64:
1204 ; AVX2: # %bb.0: # %entry
1205 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1206 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1209 ; AVX512-LABEL: load_zext_8i8_to_8i64:
1210 ; AVX512: # %bb.0: # %entry
1211 ; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
1214 %X = load <8 x i8>, <8 x i8>* %ptr
1215 %Y = zext <8 x i8> %X to <8 x i64>
1219 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
1220 ; SSE2-LABEL: load_zext_16i8_to_16i16:
1221 ; SSE2: # %bb.0: # %entry
1222 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1223 ; SSE2-NEXT: pxor %xmm2, %xmm2
1224 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1225 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1226 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1229 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
1230 ; SSSE3: # %bb.0: # %entry
1231 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1232 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1233 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1234 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1235 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1238 ; SSE41-LABEL: load_zext_16i8_to_16i16:
1239 ; SSE41: # %bb.0: # %entry
1240 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1241 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1244 ; AVX1-LABEL: load_zext_16i8_to_16i16:
1245 ; AVX1: # %bb.0: # %entry
1246 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1247 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1248 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1251 ; AVX2-LABEL: load_zext_16i8_to_16i16:
1252 ; AVX2: # %bb.0: # %entry
1253 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1256 ; AVX512-LABEL: load_zext_16i8_to_16i16:
1257 ; AVX512: # %bb.0: # %entry
1258 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1261 %X = load <16 x i8>, <16 x i8>* %ptr
1262 %Y = zext <16 x i8> %X to <16 x i16>
1266 define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
1267 ; SSE2-LABEL: load_zext_2i16_to_2i64:
1268 ; SSE2: # %bb.0: # %entry
1269 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1270 ; SSE2-NEXT: pxor %xmm1, %xmm1
1271 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1272 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1275 ; SSSE3-LABEL: load_zext_2i16_to_2i64:
1276 ; SSSE3: # %bb.0: # %entry
1277 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1278 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1279 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1280 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1283 ; SSE41-LABEL: load_zext_2i16_to_2i64:
1284 ; SSE41: # %bb.0: # %entry
1285 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1288 ; AVX-LABEL: load_zext_2i16_to_2i64:
1289 ; AVX: # %bb.0: # %entry
1290 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1293 %X = load <2 x i16>, <2 x i16>* %ptr
1294 %Y = zext <2 x i16> %X to <2 x i64>
1298 define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
1299 ; SSE2-LABEL: load_zext_4i16_to_4i32:
1300 ; SSE2: # %bb.0: # %entry
1301 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1302 ; SSE2-NEXT: pxor %xmm1, %xmm1
1303 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1306 ; SSSE3-LABEL: load_zext_4i16_to_4i32:
1307 ; SSSE3: # %bb.0: # %entry
1308 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1309 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1310 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1313 ; SSE41-LABEL: load_zext_4i16_to_4i32:
1314 ; SSE41: # %bb.0: # %entry
1315 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1318 ; AVX-LABEL: load_zext_4i16_to_4i32:
1319 ; AVX: # %bb.0: # %entry
1320 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1323 %X = load <4 x i16>, <4 x i16>* %ptr
1324 %Y = zext <4 x i16> %X to <4 x i32>
1328 define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
1329 ; SSE2-LABEL: load_zext_4i16_to_4i64:
1330 ; SSE2: # %bb.0: # %entry
1331 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1332 ; SSE2-NEXT: pxor %xmm2, %xmm2
1333 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1334 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1335 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1336 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1339 ; SSSE3-LABEL: load_zext_4i16_to_4i64:
1340 ; SSSE3: # %bb.0: # %entry
1341 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1342 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1343 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1344 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1345 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1346 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1349 ; SSE41-LABEL: load_zext_4i16_to_4i64:
1350 ; SSE41: # %bb.0: # %entry
1351 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1352 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1355 ; AVX1-LABEL: load_zext_4i16_to_4i64:
1356 ; AVX1: # %bb.0: # %entry
1357 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1358 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1359 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1362 ; AVX2-LABEL: load_zext_4i16_to_4i64:
1363 ; AVX2: # %bb.0: # %entry
1364 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1367 ; AVX512-LABEL: load_zext_4i16_to_4i64:
1368 ; AVX512: # %bb.0: # %entry
1369 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1372 %X = load <4 x i16>, <4 x i16>* %ptr
1373 %Y = zext <4 x i16> %X to <4 x i64>
1377 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
1378 ; SSE2-LABEL: load_zext_8i16_to_8i32:
1379 ; SSE2: # %bb.0: # %entry
1380 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1381 ; SSE2-NEXT: pxor %xmm2, %xmm2
1382 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1383 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1384 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1387 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
1388 ; SSSE3: # %bb.0: # %entry
1389 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1390 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1391 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1392 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1393 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1396 ; SSE41-LABEL: load_zext_8i16_to_8i32:
1397 ; SSE41: # %bb.0: # %entry
1398 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1399 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1402 ; AVX1-LABEL: load_zext_8i16_to_8i32:
1403 ; AVX1: # %bb.0: # %entry
1404 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1405 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1406 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1409 ; AVX2-LABEL: load_zext_8i16_to_8i32:
1410 ; AVX2: # %bb.0: # %entry
1411 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1414 ; AVX512-LABEL: load_zext_8i16_to_8i32:
1415 ; AVX512: # %bb.0: # %entry
1416 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1419 %X = load <8 x i16>, <8 x i16>* %ptr
1420 %Y = zext <8 x i16> %X to <8 x i32>
1424 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
1425 ; SSE2-LABEL: load_zext_2i32_to_2i64:
1426 ; SSE2: # %bb.0: # %entry
1427 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1428 ; SSE2-NEXT: xorps %xmm1, %xmm1
1429 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1432 ; SSSE3-LABEL: load_zext_2i32_to_2i64:
1433 ; SSSE3: # %bb.0: # %entry
1434 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1435 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1436 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1439 ; SSE41-LABEL: load_zext_2i32_to_2i64:
1440 ; SSE41: # %bb.0: # %entry
1441 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1444 ; AVX-LABEL: load_zext_2i32_to_2i64:
1445 ; AVX: # %bb.0: # %entry
1446 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1449 %X = load <2 x i32>, <2 x i32>* %ptr
1450 %Y = zext <2 x i32> %X to <2 x i64>
1454 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
1455 ; SSE2-LABEL: load_zext_4i32_to_4i64:
1456 ; SSE2: # %bb.0: # %entry
1457 ; SSE2-NEXT: movaps (%rdi), %xmm1
1458 ; SSE2-NEXT: xorps %xmm2, %xmm2
1459 ; SSE2-NEXT: movaps %xmm1, %xmm0
1460 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1461 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1464 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
1465 ; SSSE3: # %bb.0: # %entry
1466 ; SSSE3-NEXT: movaps (%rdi), %xmm1
1467 ; SSSE3-NEXT: xorps %xmm2, %xmm2
1468 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1469 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1470 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1473 ; SSE41-LABEL: load_zext_4i32_to_4i64:
1474 ; SSE41: # %bb.0: # %entry
1475 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1476 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1479 ; AVX1-LABEL: load_zext_4i32_to_4i64:
1480 ; AVX1: # %bb.0: # %entry
1481 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1482 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1483 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1486 ; AVX2-LABEL: load_zext_4i32_to_4i64:
1487 ; AVX2: # %bb.0: # %entry
1488 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1491 ; AVX512-LABEL: load_zext_4i32_to_4i64:
1492 ; AVX512: # %bb.0: # %entry
1493 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1496 %X = load <4 x i32>, <4 x i32>* %ptr
1497 %Y = zext <4 x i32> %X to <4 x i64>
1501 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
1502 ; SSE2-LABEL: zext_8i8_to_8i32:
1503 ; SSE2: # %bb.0: # %entry
1504 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1505 ; SSE2-NEXT: pxor %xmm2, %xmm2
1506 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1507 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1508 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1509 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1512 ; SSSE3-LABEL: zext_8i8_to_8i32:
1513 ; SSSE3: # %bb.0: # %entry
1514 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1515 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1516 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1517 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1518 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1519 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1522 ; SSE41-LABEL: zext_8i8_to_8i32:
1523 ; SSE41: # %bb.0: # %entry
1524 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1525 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1526 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1527 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1530 ; AVX1-LABEL: zext_8i8_to_8i32:
1531 ; AVX1: # %bb.0: # %entry
1532 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1533 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1534 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1535 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1538 ; AVX2-LABEL: zext_8i8_to_8i32:
1539 ; AVX2: # %bb.0: # %entry
1540 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1543 ; AVX512-LABEL: zext_8i8_to_8i32:
1544 ; AVX512: # %bb.0: # %entry
1545 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1548 %t = zext <8 x i8> %z to <8 x i32>
1552 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
1553 ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
1554 ; SSE2: # %bb.0: # %entry
1555 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1556 ; SSE2-NEXT: pxor %xmm2, %xmm2
1557 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1558 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1561 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
1562 ; SSSE3: # %bb.0: # %entry
1563 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1564 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1565 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1566 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1569 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
1570 ; SSE41: # %bb.0: # %entry
1571 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1572 ; SSE41-NEXT: pxor %xmm2, %xmm2
1573 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1574 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1577 ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
1578 ; AVX1: # %bb.0: # %entry
1579 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1580 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1581 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1582 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1585 ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
1586 ; AVX2: # %bb.0: # %entry
1587 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1590 ; AVX512-LABEL: shuf_zext_8i16_to_8i32:
1591 ; AVX512: # %bb.0: # %entry
1592 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1595 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
1596 %Z = bitcast <16 x i16> %B to <8 x i32>
1600 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1601 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
1602 ; SSE2: # %bb.0: # %entry
1603 ; SSE2-NEXT: movaps %xmm0, %xmm1
1604 ; SSE2-NEXT: xorps %xmm2, %xmm2
1605 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1606 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1609 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
1610 ; SSSE3: # %bb.0: # %entry
1611 ; SSSE3-NEXT: movaps %xmm0, %xmm1
1612 ; SSSE3-NEXT: xorps %xmm2, %xmm2
1613 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1614 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1617 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1618 ; SSE41: # %bb.0: # %entry
1619 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1620 ; SSE41-NEXT: pxor %xmm2, %xmm2
1621 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1622 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1625 ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
1626 ; AVX1: # %bb.0: # %entry
1627 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1628 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1629 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1630 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1633 ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
1634 ; AVX2: # %bb.0: # %entry
1635 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1638 ; AVX512-LABEL: shuf_zext_4i32_to_4i64:
1639 ; AVX512: # %bb.0: # %entry
1640 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1643 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
1644 %Z = bitcast <8 x i32> %B to <4 x i64>
1648 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
1649 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
1650 ; SSE2: # %bb.0: # %entry
1651 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1652 ; SSE2-NEXT: pxor %xmm2, %xmm2
1653 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1654 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1655 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1656 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1659 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
1660 ; SSSE3: # %bb.0: # %entry
1661 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1662 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1663 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1664 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1665 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1666 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1669 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1670 ; SSE41: # %bb.0: # %entry
1671 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1672 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1673 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1674 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1677 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
1678 ; AVX1: # %bb.0: # %entry
1679 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1680 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1681 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1682 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1685 ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
1686 ; AVX2: # %bb.0: # %entry
1687 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1690 ; AVX512-LABEL: shuf_zext_8i8_to_8i32:
1691 ; AVX512: # %bb.0: # %entry
1692 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1695 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
1696 %Z = bitcast <32 x i8> %B to <8 x i32>
1700 define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
1701 ; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
1702 ; SSE2: # %bb.0: # %entry
1703 ; SSE2-NEXT: pxor %xmm1, %xmm1
1704 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1705 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1706 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1709 ; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
1710 ; SSSE3: # %bb.0: # %entry
1711 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1714 ; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
1715 ; SSE41: # %bb.0: # %entry
1716 ; SSE41-NEXT: psrlq $48, %xmm0
1717 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1720 ; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6:
1721 ; AVX: # %bb.0: # %entry
1722 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
1723 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1726 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1727 %Z = bitcast <16 x i8> %B to <2 x i64>
1731 define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
1732 ; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1733 ; SSE2: # %bb.0: # %entry
1734 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1735 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
1736 ; SSE2-NEXT: pxor %xmm2, %xmm2
1737 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1738 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1739 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1740 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1741 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1742 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1745 ; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
1746 ; SSSE3: # %bb.0: # %entry
1747 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1748 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
1749 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
1752 ; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
1753 ; SSE41: # %bb.0: # %entry
1754 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1755 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1756 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1757 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1758 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1759 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1762 ; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
1763 ; AVX1: # %bb.0: # %entry
1764 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1765 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1766 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1767 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1768 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1771 ; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1772 ; AVX2: # %bb.0: # %entry
1773 ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1774 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1777 ; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11:
1778 ; AVX512: # %bb.0: # %entry
1779 ; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1780 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1783 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1784 %Z = bitcast <32 x i8> %B to <4 x i64>
1788 define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
1789 ; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
1790 ; SSE2: # %bb.0: # %entry
1791 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1792 ; SSE2-NEXT: pxor %xmm1, %xmm1
1793 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1794 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1797 ; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
1798 ; SSSE3: # %bb.0: # %entry
1799 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1802 ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
1803 ; SSE41: # %bb.0: # %entry
1804 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1805 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1808 ; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6:
1809 ; AVX: # %bb.0: # %entry
1810 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1811 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1814 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
1815 %Z = bitcast <8 x i16> %B to <2 x i64>
1819 define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
1820 ; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1821 ; SSE2: # %bb.0: # %entry
1822 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1823 ; SSE2-NEXT: pxor %xmm2, %xmm2
1824 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1825 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1826 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1827 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1830 ; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
1831 ; SSSE3: # %bb.0: # %entry
1832 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1833 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1834 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1835 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1836 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1837 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1840 ; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
1841 ; SSE41: # %bb.0: # %entry
1842 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1843 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1844 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1845 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1846 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1849 ; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
1850 ; AVX1: # %bb.0: # %entry
1851 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1852 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1853 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1854 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1855 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1858 ; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1859 ; AVX2: # %bb.0: # %entry
1860 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1861 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1864 ; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2:
1865 ; AVX512: # %bb.0: # %entry
1866 ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1867 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1870 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
1871 %Z = bitcast <16 x i16> %B to <4 x i64>
1875 define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
1876 ; SSE2-LABEL: shuf_zext_8i16_to_4i32_offset1:
1877 ; SSE2: # %bb.0: # %entry
1878 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1879 ; SSE2-NEXT: pxor %xmm1, %xmm1
1880 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1883 ; SSSE3-LABEL: shuf_zext_8i16_to_4i32_offset1:
1884 ; SSSE3: # %bb.0: # %entry
1885 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1886 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1887 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1890 ; SSE41-LABEL: shuf_zext_8i16_to_4i32_offset1:
1891 ; SSE41: # %bb.0: # %entry
1892 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1893 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1896 ; AVX1-LABEL: shuf_zext_8i16_to_4i32_offset1:
1897 ; AVX1: # %bb.0: # %entry
1898 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1899 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1902 ; AVX2-SLOW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1903 ; AVX2-SLOW: # %bb.0: # %entry
1904 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1905 ; AVX2-SLOW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1906 ; AVX2-SLOW-NEXT: retq
1908 ; AVX2-FAST-LABEL: shuf_zext_8i16_to_4i32_offset1:
1909 ; AVX2-FAST: # %bb.0: # %entry
1910 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1911 ; AVX2-FAST-NEXT: retq
1913 ; AVX512F-LABEL: shuf_zext_8i16_to_4i32_offset1:
1914 ; AVX512F: # %bb.0: # %entry
1915 ; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1916 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1917 ; AVX512F-NEXT: retq
1919 ; AVX512BW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1920 ; AVX512BW: # %bb.0: # %entry
1921 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1922 ; AVX512BW-NEXT: retq
1924 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
1925 %Z = bitcast <8 x i16> %B to <4 x i32>
1929 define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
1930 ; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1931 ; SSE2: # %bb.0: # %entry
1932 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1933 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1934 ; SSE2-NEXT: pxor %xmm2, %xmm2
1935 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1936 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1939 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
1940 ; SSSE3: # %bb.0: # %entry
1941 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1942 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1943 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1944 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1945 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1948 ; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
1949 ; SSE41: # %bb.0: # %entry
1950 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1951 ; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1952 ; SSE41-NEXT: pxor %xmm2, %xmm2
1953 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1954 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1957 ; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
1958 ; AVX1: # %bb.0: # %entry
1959 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1960 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1961 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1962 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1963 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1966 ; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1967 ; AVX2: # %bb.0: # %entry
1968 ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1969 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1972 ; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3:
1973 ; AVX512: # %bb.0: # %entry
1974 ; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1975 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1978 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
1979 %Z = bitcast <16 x i16> %B to <8 x i32>
1983 define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
1984 ; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
1985 ; SSE2: # %bb.0: # %entry
1986 ; SSE2-NEXT: pxor %xmm2, %xmm2
1987 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1988 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1989 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1992 ; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
1993 ; SSSE3: # %bb.0: # %entry
1994 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1995 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1996 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1997 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2000 ; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
2001 ; SSE41: # %bb.0: # %entry
2002 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
2003 ; SSE41-NEXT: pxor %xmm2, %xmm2
2004 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
2005 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
2006 ; SSE41-NEXT: movdqa %xmm2, %xmm1
2009 ; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
2010 ; AVX1: # %bb.0: # %entry
2011 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2012 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
2013 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2014 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
2015 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2016 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2019 ; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2020 ; AVX2: # %bb.0: # %entry
2021 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2022 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2025 ; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8:
2026 ; AVX512: # %bb.0: # %entry
2027 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
2028 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2031 %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
2032 %Z = bitcast <16 x i16> %B to <8 x i32>
2036 define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
2037 ; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
2038 ; SSE: # %bb.0: # %entry
2039 ; SSE-NEXT: xorps %xmm1, %xmm1
2040 ; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2043 ; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
2044 ; AVX: # %bb.0: # %entry
2045 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
2046 ; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2049 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
2050 %Z = bitcast <4 x i32> %B to <2 x i64>
2054 define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
2055 ; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2056 ; SSE2: # %bb.0: # %entry
2057 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2058 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2059 ; SSE2-NEXT: pand %xmm1, %xmm0
2060 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2063 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
2064 ; SSSE3: # %bb.0: # %entry
2065 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2066 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2067 ; SSSE3-NEXT: pand %xmm1, %xmm0
2068 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2071 ; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
2072 ; SSE41: # %bb.0: # %entry
2073 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2074 ; SSE41-NEXT: pxor %xmm0, %xmm0
2075 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
2076 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2079 ; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
2080 ; AVX1: # %bb.0: # %entry
2081 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2082 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2083 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2084 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2087 ; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2088 ; AVX2: # %bb.0: # %entry
2089 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2090 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2093 ; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1:
2094 ; AVX512: # %bb.0: # %entry
2095 ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2096 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2099 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
2100 %Z = bitcast <8 x i32> %B to <4 x i64>
2104 define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) {
2105 ; SSE2-LABEL: zext_32i8_to_32i32:
2107 ; SSE2-NEXT: movq %rdi, %rax
2108 ; SSE2-NEXT: pxor %xmm2, %xmm2
2109 ; SSE2-NEXT: movdqa %xmm0, %xmm3
2110 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2111 ; SSE2-NEXT: movdqa %xmm3, %xmm8
2112 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2113 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2114 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2115 ; SSE2-NEXT: movdqa %xmm0, %xmm5
2116 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2117 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2118 ; SSE2-NEXT: movdqa %xmm1, %xmm6
2119 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2120 ; SSE2-NEXT: movdqa %xmm6, %xmm7
2121 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2122 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2123 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2124 ; SSE2-NEXT: movdqa %xmm1, %xmm4
2125 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2126 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2127 ; SSE2-NEXT: movdqa %xmm1, 112(%rdi)
2128 ; SSE2-NEXT: movdqa %xmm4, 96(%rdi)
2129 ; SSE2-NEXT: movdqa %xmm6, 80(%rdi)
2130 ; SSE2-NEXT: movdqa %xmm7, 64(%rdi)
2131 ; SSE2-NEXT: movdqa %xmm0, 48(%rdi)
2132 ; SSE2-NEXT: movdqa %xmm5, 32(%rdi)
2133 ; SSE2-NEXT: movdqa %xmm3, 16(%rdi)
2134 ; SSE2-NEXT: movdqa %xmm8, (%rdi)
2137 ; SSSE3-LABEL: zext_32i8_to_32i32:
2139 ; SSSE3-NEXT: movq %rdi, %rax
2140 ; SSSE3-NEXT: pxor %xmm2, %xmm2
2141 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
2142 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2143 ; SSSE3-NEXT: movdqa %xmm3, %xmm8
2144 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2145 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2146 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2147 ; SSSE3-NEXT: movdqa %xmm0, %xmm5
2148 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2149 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2150 ; SSSE3-NEXT: movdqa %xmm1, %xmm6
2151 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2152 ; SSSE3-NEXT: movdqa %xmm6, %xmm7
2153 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2154 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2155 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2156 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
2157 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2158 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2159 ; SSSE3-NEXT: movdqa %xmm1, 112(%rdi)
2160 ; SSSE3-NEXT: movdqa %xmm4, 96(%rdi)
2161 ; SSSE3-NEXT: movdqa %xmm6, 80(%rdi)
2162 ; SSSE3-NEXT: movdqa %xmm7, 64(%rdi)
2163 ; SSSE3-NEXT: movdqa %xmm0, 48(%rdi)
2164 ; SSSE3-NEXT: movdqa %xmm5, 32(%rdi)
2165 ; SSSE3-NEXT: movdqa %xmm3, 16(%rdi)
2166 ; SSSE3-NEXT: movdqa %xmm8, (%rdi)
2169 ; SSE41-LABEL: zext_32i8_to_32i32:
2171 ; SSE41-NEXT: movq %rdi, %rax
2172 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2173 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
2174 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2175 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
2176 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
2177 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
2178 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2179 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2180 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,2,3]
2181 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
2182 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,0,1]
2183 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
2184 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
2185 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2186 ; SSE41-NEXT: movdqa %xmm1, 112(%rdi)
2187 ; SSE41-NEXT: movdqa %xmm7, 96(%rdi)
2188 ; SSE41-NEXT: movdqa %xmm6, 80(%rdi)
2189 ; SSE41-NEXT: movdqa %xmm5, 64(%rdi)
2190 ; SSE41-NEXT: movdqa %xmm0, 48(%rdi)
2191 ; SSE41-NEXT: movdqa %xmm4, 32(%rdi)
2192 ; SSE41-NEXT: movdqa %xmm3, 16(%rdi)
2193 ; SSE41-NEXT: movdqa %xmm2, (%rdi)
2196 ; AVX1-LABEL: zext_32i8_to_32i32:
2198 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2199 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
2200 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2201 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4
2202 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2203 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2204 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[1,1,2,3]
2205 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2206 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
2207 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
2208 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2209 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,0,1]
2210 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2211 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
2212 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
2213 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2214 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,3,0,1]
2215 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2216 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3
2217 ; AVX1-NEXT: vmovaps %ymm4, %ymm0
2220 ; AVX2-LABEL: zext_32i8_to_32i32:
2222 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2223 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
2224 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
2225 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
2226 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2227 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
2228 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2229 ; AVX2-NEXT: vmovdqa %ymm4, %ymm0
2232 ; AVX512-LABEL: zext_32i8_to_32i32:
2234 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2235 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
2236 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2237 ; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
2239 %res = zext <32 x i8>%x to <32 x i32>
2243 define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) {
2244 ; SSE2-LABEL: zext_2i8_to_2i32:
2246 ; SSE2-NEXT: movzwl (%rdi), %eax
2247 ; SSE2-NEXT: movd %eax, %xmm0
2248 ; SSE2-NEXT: pxor %xmm1, %xmm1
2249 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2250 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2251 ; SSE2-NEXT: paddd %xmm0, %xmm0
2254 ; SSSE3-LABEL: zext_2i8_to_2i32:
2256 ; SSSE3-NEXT: movzwl (%rdi), %eax
2257 ; SSSE3-NEXT: movd %eax, %xmm0
2258 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2259 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2260 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2261 ; SSSE3-NEXT: paddd %xmm0, %xmm0
2264 ; SSE41-LABEL: zext_2i8_to_2i32:
2266 ; SSE41-NEXT: movzwl (%rdi), %eax
2267 ; SSE41-NEXT: movd %eax, %xmm0
2268 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2269 ; SSE41-NEXT: paddd %xmm0, %xmm0
2272 ; AVX-LABEL: zext_2i8_to_2i32:
2274 ; AVX-NEXT: movzwl (%rdi), %eax
2275 ; AVX-NEXT: vmovd %eax, %xmm0
2276 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2277 ; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
2279 %x = load <2 x i8>, <2 x i8>* %addr, align 1
2280 %y = zext <2 x i8> %x to <2 x i32>
2281 %z = add <2 x i32>%y, %y