1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
11 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
12 ; SSE2-LABEL: zext_16i8_to_8i16:
13 ; SSE2: # %bb.0: # %entry
14 ; SSE2-NEXT: pxor %xmm1, %xmm1
15 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
18 ; SSSE3-LABEL: zext_16i8_to_8i16:
19 ; SSSE3: # %bb.0: # %entry
20 ; SSSE3-NEXT: pxor %xmm1, %xmm1
21 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
24 ; SSE41-LABEL: zext_16i8_to_8i16:
25 ; SSE41: # %bb.0: # %entry
26 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
29 ; AVX-LABEL: zext_16i8_to_8i16:
30 ; AVX: # %bb.0: # %entry
31 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
34 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
35 %C = zext <8 x i8> %B to <8 x i16>
40 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
41 ; SSE2-LABEL: zext_16i8_to_16i16:
42 ; SSE2: # %bb.0: # %entry
43 ; SSE2-NEXT: movdqa %xmm0, %xmm1
44 ; SSE2-NEXT: pxor %xmm2, %xmm2
45 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
46 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
49 ; SSSE3-LABEL: zext_16i8_to_16i16:
50 ; SSSE3: # %bb.0: # %entry
51 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
52 ; SSSE3-NEXT: pxor %xmm2, %xmm2
53 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
54 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
57 ; SSE41-LABEL: zext_16i8_to_16i16:
58 ; SSE41: # %bb.0: # %entry
59 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
60 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
61 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
62 ; SSE41-NEXT: movdqa %xmm2, %xmm0
65 ; AVX1-LABEL: zext_16i8_to_16i16:
66 ; AVX1: # %bb.0: # %entry
67 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
68 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
69 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
70 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
73 ; AVX2-LABEL: zext_16i8_to_16i16:
74 ; AVX2: # %bb.0: # %entry
75 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
78 ; AVX512-LABEL: zext_16i8_to_16i16:
79 ; AVX512: # %bb.0: # %entry
80 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
83 %B = zext <16 x i8> %A to <16 x i16>
87 define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) {
88 ; SSE2-LABEL: zext_32i8_to_32i16:
89 ; SSE2: # %bb.0: # %entry
90 ; SSE2-NEXT: movdqa %xmm1, %xmm3
91 ; SSE2-NEXT: movdqa %xmm0, %xmm1
92 ; SSE2-NEXT: pxor %xmm4, %xmm4
93 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
94 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
95 ; SSE2-NEXT: movdqa %xmm3, %xmm2
96 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
97 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
100 ; SSSE3-LABEL: zext_32i8_to_32i16:
101 ; SSSE3: # %bb.0: # %entry
102 ; SSSE3-NEXT: movdqa %xmm1, %xmm3
103 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
104 ; SSSE3-NEXT: pxor %xmm4, %xmm4
105 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
106 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
107 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
108 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
109 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
112 ; SSE41-LABEL: zext_32i8_to_32i16:
113 ; SSE41: # %bb.0: # %entry
114 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
115 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
116 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
117 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
118 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
119 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
120 ; SSE41-NEXT: movdqa %xmm5, %xmm0
121 ; SSE41-NEXT: movdqa %xmm4, %xmm1
124 ; AVX1-LABEL: zext_32i8_to_32i16:
125 ; AVX1: # %bb.0: # %entry
126 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
127 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
128 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
129 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
130 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
131 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
132 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
133 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
134 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
137 ; AVX2-LABEL: zext_32i8_to_32i16:
138 ; AVX2: # %bb.0: # %entry
139 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
140 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
141 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
142 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
145 ; AVX512F-LABEL: zext_32i8_to_32i16:
146 ; AVX512F: # %bb.0: # %entry
147 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
148 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
149 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
150 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
153 ; AVX512BW-LABEL: zext_32i8_to_32i16:
154 ; AVX512BW: # %bb.0: # %entry
155 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
156 ; AVX512BW-NEXT: retq
158 %B = zext <32 x i8> %A to <32 x i16>
162 define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
163 ; SSE2-LABEL: zext_16i8_to_4i32:
164 ; SSE2: # %bb.0: # %entry
165 ; SSE2-NEXT: pxor %xmm1, %xmm1
166 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
167 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
170 ; SSSE3-LABEL: zext_16i8_to_4i32:
171 ; SSSE3: # %bb.0: # %entry
172 ; SSSE3-NEXT: pxor %xmm1, %xmm1
173 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
174 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
177 ; SSE41-LABEL: zext_16i8_to_4i32:
178 ; SSE41: # %bb.0: # %entry
179 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
182 ; AVX-LABEL: zext_16i8_to_4i32:
183 ; AVX: # %bb.0: # %entry
184 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
187 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
188 %C = zext <4 x i8> %B to <4 x i32>
192 define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
193 ; SSE2-LABEL: zext_16i8_to_8i32:
194 ; SSE2: # %bb.0: # %entry
195 ; SSE2-NEXT: movdqa %xmm0, %xmm1
196 ; SSE2-NEXT: pxor %xmm2, %xmm2
197 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
198 ; SSE2-NEXT: movdqa %xmm1, %xmm0
199 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
200 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
203 ; SSSE3-LABEL: zext_16i8_to_8i32:
204 ; SSSE3: # %bb.0: # %entry
205 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
206 ; SSSE3-NEXT: pxor %xmm2, %xmm2
207 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
208 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
209 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
210 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
213 ; SSE41-LABEL: zext_16i8_to_8i32:
214 ; SSE41: # %bb.0: # %entry
215 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
216 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
217 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
218 ; SSE41-NEXT: movdqa %xmm2, %xmm0
221 ; AVX1-LABEL: zext_16i8_to_8i32:
222 ; AVX1: # %bb.0: # %entry
223 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
224 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
225 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
226 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
229 ; AVX2-LABEL: zext_16i8_to_8i32:
230 ; AVX2: # %bb.0: # %entry
231 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
234 ; AVX512-LABEL: zext_16i8_to_8i32:
235 ; AVX512: # %bb.0: # %entry
236 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
239 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
240 %C = zext <8 x i8> %B to <8 x i32>
244 define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp {
245 ; SSE2-LABEL: zext_16i8_to_16i32:
246 ; SSE2: # %bb.0: # %entry
247 ; SSE2-NEXT: movdqa %xmm0, %xmm3
248 ; SSE2-NEXT: pxor %xmm4, %xmm4
249 ; SSE2-NEXT: movdqa %xmm0, %xmm1
250 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
251 ; SSE2-NEXT: movdqa %xmm1, %xmm0
252 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
253 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
254 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
255 ; SSE2-NEXT: movdqa %xmm3, %xmm2
256 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
257 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
260 ; SSSE3-LABEL: zext_16i8_to_16i32:
261 ; SSSE3: # %bb.0: # %entry
262 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
263 ; SSSE3-NEXT: pxor %xmm4, %xmm4
264 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
265 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
266 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
267 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
268 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
269 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
270 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
271 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
272 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
275 ; SSE41-LABEL: zext_16i8_to_16i32:
276 ; SSE41: # %bb.0: # %entry
277 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
278 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
279 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
280 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
281 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
282 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
283 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
284 ; SSE41-NEXT: movdqa %xmm4, %xmm0
287 ; AVX1-LABEL: zext_16i8_to_16i32:
288 ; AVX1: # %bb.0: # %entry
289 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
290 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
291 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
292 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
293 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
294 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
295 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,0,1]
296 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
297 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
298 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
301 ; AVX2-LABEL: zext_16i8_to_16i32:
302 ; AVX2: # %bb.0: # %entry
303 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
304 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
305 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
306 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
309 ; AVX512-LABEL: zext_16i8_to_16i32:
310 ; AVX512: # %bb.0: # %entry
311 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
314 %B = zext <16 x i8> %A to <16 x i32>
318 define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
319 ; SSE2-LABEL: zext_16i8_to_2i64:
320 ; SSE2: # %bb.0: # %entry
321 ; SSE2-NEXT: pxor %xmm1, %xmm1
322 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
323 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
324 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
327 ; SSSE3-LABEL: zext_16i8_to_2i64:
328 ; SSSE3: # %bb.0: # %entry
329 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
332 ; SSE41-LABEL: zext_16i8_to_2i64:
333 ; SSE41: # %bb.0: # %entry
334 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
337 ; AVX-LABEL: zext_16i8_to_2i64:
338 ; AVX: # %bb.0: # %entry
339 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
342 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
343 %C = zext <2 x i8> %B to <2 x i64>
347 define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
348 ; SSE2-LABEL: zext_16i8_to_4i64:
349 ; SSE2: # %bb.0: # %entry
350 ; SSE2-NEXT: movdqa %xmm0, %xmm1
351 ; SSE2-NEXT: pxor %xmm2, %xmm2
352 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
353 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
354 ; SSE2-NEXT: movdqa %xmm1, %xmm0
355 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
356 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
359 ; SSSE3-LABEL: zext_16i8_to_4i64:
360 ; SSSE3: # %bb.0: # %entry
361 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
362 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
363 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
366 ; SSE41-LABEL: zext_16i8_to_4i64:
367 ; SSE41: # %bb.0: # %entry
368 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
369 ; SSE41-NEXT: psrld $16, %xmm0
370 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
371 ; SSE41-NEXT: movdqa %xmm2, %xmm0
374 ; AVX1-LABEL: zext_16i8_to_4i64:
375 ; AVX1: # %bb.0: # %entry
376 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
377 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
378 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
379 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
382 ; AVX2-LABEL: zext_16i8_to_4i64:
383 ; AVX2: # %bb.0: # %entry
384 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
387 ; AVX512-LABEL: zext_16i8_to_4i64:
388 ; AVX512: # %bb.0: # %entry
389 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
392 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
393 %C = zext <4 x i8> %B to <4 x i64>
397 define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp {
398 ; SSE2-LABEL: zext_16i8_to_8i64:
399 ; SSE2: # %bb.0: # %entry
400 ; SSE2-NEXT: movdqa %xmm0, %xmm3
401 ; SSE2-NEXT: pxor %xmm4, %xmm4
402 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
403 ; SSE2-NEXT: movdqa %xmm3, %xmm1
404 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
405 ; SSE2-NEXT: movdqa %xmm1, %xmm0
406 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
407 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
408 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
409 ; SSE2-NEXT: movdqa %xmm3, %xmm2
410 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
411 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
414 ; SSSE3-LABEL: zext_16i8_to_8i64:
415 ; SSSE3: # %bb.0: # %entry
416 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
417 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
418 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
419 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
420 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
421 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero
422 ; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero
425 ; SSE41-LABEL: zext_16i8_to_8i64:
426 ; SSE41: # %bb.0: # %entry
427 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
428 ; SSE41-NEXT: movdqa %xmm0, %xmm1
429 ; SSE41-NEXT: psrld $16, %xmm1
430 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
431 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
432 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
433 ; SSE41-NEXT: psrlq $48, %xmm0
434 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
435 ; SSE41-NEXT: movdqa %xmm4, %xmm0
438 ; AVX1-LABEL: zext_16i8_to_8i64:
439 ; AVX1: # %bb.0: # %entry
440 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
441 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2
442 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
443 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
444 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
445 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
446 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
447 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
448 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
449 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
452 ; AVX2-LABEL: zext_16i8_to_8i64:
453 ; AVX2: # %bb.0: # %entry
454 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
455 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
456 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
457 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
460 ; AVX512-LABEL: zext_16i8_to_8i64:
461 ; AVX512: # %bb.0: # %entry
462 ; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
465 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
466 %C = zext <8 x i8> %B to <8 x i64>
470 define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
471 ; SSE2-LABEL: zext_8i16_to_4i32:
472 ; SSE2: # %bb.0: # %entry
473 ; SSE2-NEXT: pxor %xmm1, %xmm1
474 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
477 ; SSSE3-LABEL: zext_8i16_to_4i32:
478 ; SSSE3: # %bb.0: # %entry
479 ; SSSE3-NEXT: pxor %xmm1, %xmm1
480 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
483 ; SSE41-LABEL: zext_8i16_to_4i32:
484 ; SSE41: # %bb.0: # %entry
485 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
488 ; AVX-LABEL: zext_8i16_to_4i32:
489 ; AVX: # %bb.0: # %entry
490 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
493 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
494 %C = zext <4 x i16> %B to <4 x i32>
498 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
499 ; SSE2-LABEL: zext_8i16_to_8i32:
500 ; SSE2: # %bb.0: # %entry
501 ; SSE2-NEXT: movdqa %xmm0, %xmm1
502 ; SSE2-NEXT: pxor %xmm2, %xmm2
503 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
504 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
507 ; SSSE3-LABEL: zext_8i16_to_8i32:
508 ; SSSE3: # %bb.0: # %entry
509 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
510 ; SSSE3-NEXT: pxor %xmm2, %xmm2
511 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
512 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
515 ; SSE41-LABEL: zext_8i16_to_8i32:
516 ; SSE41: # %bb.0: # %entry
517 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
518 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
519 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
520 ; SSE41-NEXT: movdqa %xmm2, %xmm0
523 ; AVX1-LABEL: zext_8i16_to_8i32:
524 ; AVX1: # %bb.0: # %entry
525 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
526 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
527 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
528 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
531 ; AVX2-LABEL: zext_8i16_to_8i32:
532 ; AVX2: # %bb.0: # %entry
533 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
536 ; AVX512-LABEL: zext_8i16_to_8i32:
537 ; AVX512: # %bb.0: # %entry
538 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
541 %B = zext <8 x i16> %A to <8 x i32>
545 define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp {
546 ; SSE2-LABEL: zext_16i16_to_16i32:
547 ; SSE2: # %bb.0: # %entry
548 ; SSE2-NEXT: movdqa %xmm1, %xmm3
549 ; SSE2-NEXT: movdqa %xmm0, %xmm1
550 ; SSE2-NEXT: pxor %xmm4, %xmm4
551 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
552 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
553 ; SSE2-NEXT: movdqa %xmm3, %xmm2
554 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
555 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
558 ; SSSE3-LABEL: zext_16i16_to_16i32:
559 ; SSSE3: # %bb.0: # %entry
560 ; SSSE3-NEXT: movdqa %xmm1, %xmm3
561 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
562 ; SSSE3-NEXT: pxor %xmm4, %xmm4
563 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
564 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
565 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
566 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
567 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
570 ; SSE41-LABEL: zext_16i16_to_16i32:
571 ; SSE41: # %bb.0: # %entry
572 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
573 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
574 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
575 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
576 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
577 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
578 ; SSE41-NEXT: movdqa %xmm5, %xmm0
579 ; SSE41-NEXT: movdqa %xmm4, %xmm1
582 ; AVX1-LABEL: zext_16i16_to_16i32:
583 ; AVX1: # %bb.0: # %entry
584 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
585 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
586 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
587 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
588 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
589 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
590 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
591 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
592 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
595 ; AVX2-LABEL: zext_16i16_to_16i32:
596 ; AVX2: # %bb.0: # %entry
597 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
598 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
599 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
600 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
603 ; AVX512-LABEL: zext_16i16_to_16i32:
604 ; AVX512: # %bb.0: # %entry
605 ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
608 %B = zext <16 x i16> %A to <16 x i32>
612 define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
613 ; SSE2-LABEL: zext_8i16_to_2i64:
614 ; SSE2: # %bb.0: # %entry
615 ; SSE2-NEXT: pxor %xmm1, %xmm1
616 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
617 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
620 ; SSSE3-LABEL: zext_8i16_to_2i64:
621 ; SSSE3: # %bb.0: # %entry
622 ; SSSE3-NEXT: pxor %xmm1, %xmm1
623 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
624 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
627 ; SSE41-LABEL: zext_8i16_to_2i64:
628 ; SSE41: # %bb.0: # %entry
629 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
632 ; AVX-LABEL: zext_8i16_to_2i64:
633 ; AVX: # %bb.0: # %entry
634 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
637 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
638 %C = zext <2 x i16> %B to <2 x i64>
642 define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
643 ; SSE2-LABEL: zext_8i16_to_4i64:
644 ; SSE2: # %bb.0: # %entry
645 ; SSE2-NEXT: movdqa %xmm0, %xmm1
646 ; SSE2-NEXT: pxor %xmm2, %xmm2
647 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
648 ; SSE2-NEXT: movdqa %xmm1, %xmm0
649 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
650 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
653 ; SSSE3-LABEL: zext_8i16_to_4i64:
654 ; SSSE3: # %bb.0: # %entry
655 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
656 ; SSSE3-NEXT: pxor %xmm2, %xmm2
657 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
658 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
659 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
660 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
663 ; SSE41-LABEL: zext_8i16_to_4i64:
664 ; SSE41: # %bb.0: # %entry
665 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
666 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
667 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
668 ; SSE41-NEXT: movdqa %xmm2, %xmm0
671 ; AVX1-LABEL: zext_8i16_to_4i64:
672 ; AVX1: # %bb.0: # %entry
673 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
674 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
675 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
676 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
679 ; AVX2-LABEL: zext_8i16_to_4i64:
680 ; AVX2: # %bb.0: # %entry
681 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
684 ; AVX512-LABEL: zext_8i16_to_4i64:
685 ; AVX512: # %bb.0: # %entry
686 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
689 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
690 %C = zext <4 x i16> %B to <4 x i64>
694 define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp {
695 ; SSE2-LABEL: zext_8i16_to_8i64:
696 ; SSE2: # %bb.0: # %entry
697 ; SSE2-NEXT: movdqa %xmm0, %xmm3
698 ; SSE2-NEXT: pxor %xmm4, %xmm4
699 ; SSE2-NEXT: movdqa %xmm0, %xmm1
700 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
701 ; SSE2-NEXT: movdqa %xmm1, %xmm0
702 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
703 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
704 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
705 ; SSE2-NEXT: movdqa %xmm3, %xmm2
706 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
707 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
710 ; SSSE3-LABEL: zext_8i16_to_8i64:
711 ; SSSE3: # %bb.0: # %entry
712 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
713 ; SSSE3-NEXT: pxor %xmm4, %xmm4
714 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
715 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
716 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
717 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
718 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
719 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
720 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
721 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
722 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
725 ; SSE41-LABEL: zext_8i16_to_8i64:
726 ; SSE41: # %bb.0: # %entry
727 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
728 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
729 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
730 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
731 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
732 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
733 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
734 ; SSE41-NEXT: movdqa %xmm4, %xmm0
737 ; AVX1-LABEL: zext_8i16_to_8i64:
738 ; AVX1: # %bb.0: # %entry
739 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
740 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
741 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
742 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
743 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
744 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
745 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,0,1]
746 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
747 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
748 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
751 ; AVX2-LABEL: zext_8i16_to_8i64:
752 ; AVX2: # %bb.0: # %entry
753 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
754 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
755 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
756 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
759 ; AVX512-LABEL: zext_8i16_to_8i64:
760 ; AVX512: # %bb.0: # %entry
761 ; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
764 %B = zext <8 x i16> %A to <8 x i64>
768 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
769 ; SSE2-LABEL: zext_4i32_to_2i64:
770 ; SSE2: # %bb.0: # %entry
771 ; SSE2-NEXT: xorps %xmm1, %xmm1
772 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
775 ; SSSE3-LABEL: zext_4i32_to_2i64:
776 ; SSSE3: # %bb.0: # %entry
777 ; SSSE3-NEXT: xorps %xmm1, %xmm1
778 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
781 ; SSE41-LABEL: zext_4i32_to_2i64:
782 ; SSE41: # %bb.0: # %entry
783 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
786 ; AVX-LABEL: zext_4i32_to_2i64:
787 ; AVX: # %bb.0: # %entry
788 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
791 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
792 %C = zext <2 x i32> %B to <2 x i64>
796 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
797 ; SSE2-LABEL: zext_4i32_to_4i64:
798 ; SSE2: # %bb.0: # %entry
799 ; SSE2-NEXT: movaps %xmm0, %xmm1
800 ; SSE2-NEXT: xorps %xmm2, %xmm2
801 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
802 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
805 ; SSSE3-LABEL: zext_4i32_to_4i64:
806 ; SSSE3: # %bb.0: # %entry
807 ; SSSE3-NEXT: movaps %xmm0, %xmm1
808 ; SSSE3-NEXT: xorps %xmm2, %xmm2
809 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
810 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
813 ; SSE41-LABEL: zext_4i32_to_4i64:
814 ; SSE41: # %bb.0: # %entry
815 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
816 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
817 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
818 ; SSE41-NEXT: movdqa %xmm2, %xmm0
821 ; AVX1-LABEL: zext_4i32_to_4i64:
822 ; AVX1: # %bb.0: # %entry
823 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
824 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
825 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
826 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
829 ; AVX2-LABEL: zext_4i32_to_4i64:
830 ; AVX2: # %bb.0: # %entry
831 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
834 ; AVX512-LABEL: zext_4i32_to_4i64:
835 ; AVX512: # %bb.0: # %entry
836 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
839 %B = zext <4 x i32> %A to <4 x i64>
843 define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp {
844 ; SSE2-LABEL: zext_8i32_to_8i64:
845 ; SSE2: # %bb.0: # %entry
846 ; SSE2-NEXT: movaps %xmm1, %xmm3
847 ; SSE2-NEXT: movaps %xmm0, %xmm1
848 ; SSE2-NEXT: xorps %xmm4, %xmm4
849 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
850 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
851 ; SSE2-NEXT: movaps %xmm3, %xmm2
852 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
853 ; SSE2-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
856 ; SSSE3-LABEL: zext_8i32_to_8i64:
857 ; SSSE3: # %bb.0: # %entry
858 ; SSSE3-NEXT: movaps %xmm1, %xmm3
859 ; SSSE3-NEXT: movaps %xmm0, %xmm1
860 ; SSSE3-NEXT: xorps %xmm4, %xmm4
861 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
862 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
863 ; SSSE3-NEXT: movaps %xmm3, %xmm2
864 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
865 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
868 ; SSE41-LABEL: zext_8i32_to_8i64:
869 ; SSE41: # %bb.0: # %entry
870 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero
871 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
872 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
873 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
874 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
875 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
876 ; SSE41-NEXT: movdqa %xmm5, %xmm0
877 ; SSE41-NEXT: movdqa %xmm4, %xmm1
880 ; AVX1-LABEL: zext_8i32_to_8i64:
881 ; AVX1: # %bb.0: # %entry
882 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
883 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
884 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
885 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
886 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
887 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
888 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
889 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
890 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
893 ; AVX2-LABEL: zext_8i32_to_8i64:
894 ; AVX2: # %bb.0: # %entry
895 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
896 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
897 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
898 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
901 ; AVX512-LABEL: zext_8i32_to_8i64:
902 ; AVX512: # %bb.0: # %entry
903 ; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
906 %B = zext <8 x i32> %A to <8 x i64>
910 define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
911 ; SSE2-LABEL: load_zext_2i8_to_2i64:
912 ; SSE2: # %bb.0: # %entry
913 ; SSE2-NEXT: movzwl (%rdi), %eax
914 ; SSE2-NEXT: movd %eax, %xmm0
915 ; SSE2-NEXT: pxor %xmm1, %xmm1
916 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
917 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
918 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
921 ; SSSE3-LABEL: load_zext_2i8_to_2i64:
922 ; SSSE3: # %bb.0: # %entry
923 ; SSSE3-NEXT: movzwl (%rdi), %eax
924 ; SSSE3-NEXT: movd %eax, %xmm0
925 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
928 ; SSE41-LABEL: load_zext_2i8_to_2i64:
929 ; SSE41: # %bb.0: # %entry
930 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
933 ; AVX-LABEL: load_zext_2i8_to_2i64:
934 ; AVX: # %bb.0: # %entry
935 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
938 %X = load <2 x i8>, <2 x i8>* %ptr
939 %Y = zext <2 x i8> %X to <2 x i64>
943 define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
944 ; SSE2-LABEL: load_zext_4i8_to_4i32:
945 ; SSE2: # %bb.0: # %entry
946 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
947 ; SSE2-NEXT: pxor %xmm1, %xmm1
948 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
949 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
952 ; SSSE3-LABEL: load_zext_4i8_to_4i32:
953 ; SSSE3: # %bb.0: # %entry
954 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
955 ; SSSE3-NEXT: pxor %xmm1, %xmm1
956 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
957 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
960 ; SSE41-LABEL: load_zext_4i8_to_4i32:
961 ; SSE41: # %bb.0: # %entry
962 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
965 ; AVX-LABEL: load_zext_4i8_to_4i32:
966 ; AVX: # %bb.0: # %entry
967 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
970 %X = load <4 x i8>, <4 x i8>* %ptr
971 %Y = zext <4 x i8> %X to <4 x i32>
975 define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
976 ; SSE2-LABEL: load_zext_4i8_to_4i64:
977 ; SSE2: # %bb.0: # %entry
978 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
979 ; SSE2-NEXT: pxor %xmm2, %xmm2
980 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
981 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
982 ; SSE2-NEXT: movdqa %xmm1, %xmm0
983 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
984 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
987 ; SSSE3-LABEL: load_zext_4i8_to_4i64:
988 ; SSSE3: # %bb.0: # %entry
989 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
990 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
991 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
992 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
995 ; SSE41-LABEL: load_zext_4i8_to_4i64:
996 ; SSE41: # %bb.0: # %entry
997 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
998 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1001 ; AVX1-LABEL: load_zext_4i8_to_4i64:
1002 ; AVX1: # %bb.0: # %entry
1003 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1004 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1005 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1008 ; AVX2-LABEL: load_zext_4i8_to_4i64:
1009 ; AVX2: # %bb.0: # %entry
1010 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1013 ; AVX512-LABEL: load_zext_4i8_to_4i64:
1014 ; AVX512: # %bb.0: # %entry
1015 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1018 %X = load <4 x i8>, <4 x i8>* %ptr
1019 %Y = zext <4 x i8> %X to <4 x i64>
1023 define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
1024 ; SSE2-LABEL: load_zext_8i8_to_8i16:
1025 ; SSE2: # %bb.0: # %entry
1026 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1027 ; SSE2-NEXT: pxor %xmm1, %xmm1
1028 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1031 ; SSSE3-LABEL: load_zext_8i8_to_8i16:
1032 ; SSSE3: # %bb.0: # %entry
1033 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1034 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1035 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1038 ; SSE41-LABEL: load_zext_8i8_to_8i16:
1039 ; SSE41: # %bb.0: # %entry
1040 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1043 ; AVX-LABEL: load_zext_8i8_to_8i16:
1044 ; AVX: # %bb.0: # %entry
1045 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1048 %X = load <8 x i8>, <8 x i8>* %ptr
1049 %Y = zext <8 x i8> %X to <8 x i16>
1053 define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
1054 ; SSE2-LABEL: load_zext_8i8_to_8i32:
1055 ; SSE2: # %bb.0: # %entry
1056 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1057 ; SSE2-NEXT: pxor %xmm2, %xmm2
1058 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1059 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1060 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1061 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1064 ; SSSE3-LABEL: load_zext_8i8_to_8i32:
1065 ; SSSE3: # %bb.0: # %entry
1066 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1067 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1068 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1069 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1070 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1071 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1074 ; SSE41-LABEL: load_zext_8i8_to_8i32:
1075 ; SSE41: # %bb.0: # %entry
1076 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1077 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1080 ; AVX1-LABEL: load_zext_8i8_to_8i32:
1081 ; AVX1: # %bb.0: # %entry
1082 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1083 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1084 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1087 ; AVX2-LABEL: load_zext_8i8_to_8i32:
1088 ; AVX2: # %bb.0: # %entry
1089 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1092 ; AVX512-LABEL: load_zext_8i8_to_8i32:
1093 ; AVX512: # %bb.0: # %entry
1094 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1097 %X = load <8 x i8>, <8 x i8>* %ptr
1098 %Y = zext <8 x i8> %X to <8 x i32>
1102 define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) {
1103 ; SSE2-LABEL: load_zext_16i8_to_8i32:
1104 ; SSE2: # %bb.0: # %entry
1105 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1106 ; SSE2-NEXT: pxor %xmm2, %xmm2
1107 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1108 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1109 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1110 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1113 ; SSSE3-LABEL: load_zext_16i8_to_8i32:
1114 ; SSSE3: # %bb.0: # %entry
1115 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1116 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1117 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1118 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1119 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1120 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1123 ; SSE41-LABEL: load_zext_16i8_to_8i32:
1124 ; SSE41: # %bb.0: # %entry
1125 ; SSE41-NEXT: movdqa (%rdi), %xmm1
1126 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1127 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
1128 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1131 ; AVX1-LABEL: load_zext_16i8_to_8i32:
1132 ; AVX1: # %bb.0: # %entry
1133 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
1134 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1135 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1136 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1137 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1140 ; AVX2-LABEL: load_zext_16i8_to_8i32:
1141 ; AVX2: # %bb.0: # %entry
1142 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1145 ; AVX512-LABEL: load_zext_16i8_to_8i32:
1146 ; AVX512: # %bb.0: # %entry
1147 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1150 %X = load <16 x i8>, <16 x i8>* %ptr
1151 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1152 %Z = zext <8 x i8> %Y to <8 x i32>
1156 define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) {
1157 ; SSE2-LABEL: load_zext_8i8_to_8i64:
1158 ; SSE2: # %bb.0: # %entry
1159 ; SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
1160 ; SSE2-NEXT: pxor %xmm4, %xmm4
1161 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1162 ; SSE2-NEXT: movdqa %xmm3, %xmm1
1163 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1164 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1165 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
1166 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1167 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1168 ; SSE2-NEXT: movdqa %xmm3, %xmm2
1169 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
1170 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
1173 ; SSSE3-LABEL: load_zext_8i8_to_8i64:
1174 ; SSSE3: # %bb.0: # %entry
1175 ; SSSE3-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
1176 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
1177 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1178 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
1179 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
1180 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
1181 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero
1182 ; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero
1185 ; SSE41-LABEL: load_zext_8i8_to_8i64:
1186 ; SSE41: # %bb.0: # %entry
1187 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1188 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1189 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1190 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1193 ; AVX1-LABEL: load_zext_8i8_to_8i64:
1194 ; AVX1: # %bb.0: # %entry
1195 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1196 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1197 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1198 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1199 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
1200 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
1203 ; AVX2-LABEL: load_zext_8i8_to_8i64:
1204 ; AVX2: # %bb.0: # %entry
1205 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1206 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1209 ; AVX512-LABEL: load_zext_8i8_to_8i64:
1210 ; AVX512: # %bb.0: # %entry
1211 ; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
1214 %X = load <8 x i8>, <8 x i8>* %ptr
1215 %Y = zext <8 x i8> %X to <8 x i64>
1219 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
1220 ; SSE2-LABEL: load_zext_16i8_to_16i16:
1221 ; SSE2: # %bb.0: # %entry
1222 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1223 ; SSE2-NEXT: pxor %xmm2, %xmm2
1224 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1225 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1226 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1229 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
1230 ; SSSE3: # %bb.0: # %entry
1231 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1232 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1233 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1234 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1235 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1238 ; SSE41-LABEL: load_zext_16i8_to_16i16:
1239 ; SSE41: # %bb.0: # %entry
1240 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1241 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1244 ; AVX1-LABEL: load_zext_16i8_to_16i16:
1245 ; AVX1: # %bb.0: # %entry
1246 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1247 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1248 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1251 ; AVX2-LABEL: load_zext_16i8_to_16i16:
1252 ; AVX2: # %bb.0: # %entry
1253 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1256 ; AVX512-LABEL: load_zext_16i8_to_16i16:
1257 ; AVX512: # %bb.0: # %entry
1258 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1261 %X = load <16 x i8>, <16 x i8>* %ptr
1262 %Y = zext <16 x i8> %X to <16 x i16>
1266 define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
1267 ; SSE2-LABEL: load_zext_2i16_to_2i64:
1268 ; SSE2: # %bb.0: # %entry
1269 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1270 ; SSE2-NEXT: pxor %xmm1, %xmm1
1271 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1272 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1275 ; SSSE3-LABEL: load_zext_2i16_to_2i64:
1276 ; SSSE3: # %bb.0: # %entry
1277 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1278 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1279 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1280 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1283 ; SSE41-LABEL: load_zext_2i16_to_2i64:
1284 ; SSE41: # %bb.0: # %entry
1285 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1288 ; AVX-LABEL: load_zext_2i16_to_2i64:
1289 ; AVX: # %bb.0: # %entry
1290 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1293 %X = load <2 x i16>, <2 x i16>* %ptr
1294 %Y = zext <2 x i16> %X to <2 x i64>
1298 define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
1299 ; SSE2-LABEL: load_zext_4i16_to_4i32:
1300 ; SSE2: # %bb.0: # %entry
1301 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1302 ; SSE2-NEXT: pxor %xmm1, %xmm1
1303 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1306 ; SSSE3-LABEL: load_zext_4i16_to_4i32:
1307 ; SSSE3: # %bb.0: # %entry
1308 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1309 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1310 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1313 ; SSE41-LABEL: load_zext_4i16_to_4i32:
1314 ; SSE41: # %bb.0: # %entry
1315 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1318 ; AVX-LABEL: load_zext_4i16_to_4i32:
1319 ; AVX: # %bb.0: # %entry
1320 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1323 %X = load <4 x i16>, <4 x i16>* %ptr
1324 %Y = zext <4 x i16> %X to <4 x i32>
1328 define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
1329 ; SSE2-LABEL: load_zext_4i16_to_4i64:
1330 ; SSE2: # %bb.0: # %entry
1331 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1332 ; SSE2-NEXT: pxor %xmm2, %xmm2
1333 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1334 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1335 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1336 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1339 ; SSSE3-LABEL: load_zext_4i16_to_4i64:
1340 ; SSSE3: # %bb.0: # %entry
1341 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1342 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1343 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1344 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1345 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1346 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1349 ; SSE41-LABEL: load_zext_4i16_to_4i64:
1350 ; SSE41: # %bb.0: # %entry
1351 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1352 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1355 ; AVX1-LABEL: load_zext_4i16_to_4i64:
1356 ; AVX1: # %bb.0: # %entry
1357 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1358 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1359 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1362 ; AVX2-LABEL: load_zext_4i16_to_4i64:
1363 ; AVX2: # %bb.0: # %entry
1364 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1367 ; AVX512-LABEL: load_zext_4i16_to_4i64:
1368 ; AVX512: # %bb.0: # %entry
1369 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1372 %X = load <4 x i16>, <4 x i16>* %ptr
1373 %Y = zext <4 x i16> %X to <4 x i64>
1377 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
1378 ; SSE2-LABEL: load_zext_8i16_to_8i32:
1379 ; SSE2: # %bb.0: # %entry
1380 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1381 ; SSE2-NEXT: pxor %xmm2, %xmm2
1382 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1383 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1384 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1387 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
1388 ; SSSE3: # %bb.0: # %entry
1389 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1390 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1391 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1392 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1393 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1396 ; SSE41-LABEL: load_zext_8i16_to_8i32:
1397 ; SSE41: # %bb.0: # %entry
1398 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1399 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1402 ; AVX1-LABEL: load_zext_8i16_to_8i32:
1403 ; AVX1: # %bb.0: # %entry
1404 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1405 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1406 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1409 ; AVX2-LABEL: load_zext_8i16_to_8i32:
1410 ; AVX2: # %bb.0: # %entry
1411 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1414 ; AVX512-LABEL: load_zext_8i16_to_8i32:
1415 ; AVX512: # %bb.0: # %entry
1416 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1419 %X = load <8 x i16>, <8 x i16>* %ptr
1420 %Y = zext <8 x i16> %X to <8 x i32>
1424 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
1425 ; SSE2-LABEL: load_zext_2i32_to_2i64:
1426 ; SSE2: # %bb.0: # %entry
1427 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1428 ; SSE2-NEXT: xorps %xmm1, %xmm1
1429 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1432 ; SSSE3-LABEL: load_zext_2i32_to_2i64:
1433 ; SSSE3: # %bb.0: # %entry
1434 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1435 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1436 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1439 ; SSE41-LABEL: load_zext_2i32_to_2i64:
1440 ; SSE41: # %bb.0: # %entry
1441 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1444 ; AVX-LABEL: load_zext_2i32_to_2i64:
1445 ; AVX: # %bb.0: # %entry
1446 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1449 %X = load <2 x i32>, <2 x i32>* %ptr
1450 %Y = zext <2 x i32> %X to <2 x i64>
1454 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
1455 ; SSE2-LABEL: load_zext_4i32_to_4i64:
1456 ; SSE2: # %bb.0: # %entry
1457 ; SSE2-NEXT: movaps (%rdi), %xmm1
1458 ; SSE2-NEXT: xorps %xmm2, %xmm2
1459 ; SSE2-NEXT: movaps %xmm1, %xmm0
1460 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1461 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1464 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
1465 ; SSSE3: # %bb.0: # %entry
1466 ; SSSE3-NEXT: movaps (%rdi), %xmm1
1467 ; SSSE3-NEXT: xorps %xmm2, %xmm2
1468 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1469 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1470 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1473 ; SSE41-LABEL: load_zext_4i32_to_4i64:
1474 ; SSE41: # %bb.0: # %entry
1475 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1476 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1479 ; AVX1-LABEL: load_zext_4i32_to_4i64:
1480 ; AVX1: # %bb.0: # %entry
1481 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1482 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1483 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1486 ; AVX2-LABEL: load_zext_4i32_to_4i64:
1487 ; AVX2: # %bb.0: # %entry
1488 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1491 ; AVX512-LABEL: load_zext_4i32_to_4i64:
1492 ; AVX512: # %bb.0: # %entry
1493 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1496 %X = load <4 x i32>, <4 x i32>* %ptr
1497 %Y = zext <4 x i32> %X to <4 x i64>
1501 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
1502 ; SSE2-LABEL: zext_8i8_to_8i32:
1503 ; SSE2: # %bb.0: # %entry
1504 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1505 ; SSE2-NEXT: pxor %xmm2, %xmm2
1506 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1507 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1508 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1509 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1512 ; SSSE3-LABEL: zext_8i8_to_8i32:
1513 ; SSSE3: # %bb.0: # %entry
1514 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1515 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1516 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1517 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1518 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1519 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1522 ; SSE41-LABEL: zext_8i8_to_8i32:
1523 ; SSE41: # %bb.0: # %entry
1524 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1525 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1526 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1527 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1530 ; AVX1-LABEL: zext_8i8_to_8i32:
1531 ; AVX1: # %bb.0: # %entry
1532 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1533 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1534 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1535 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1538 ; AVX2-LABEL: zext_8i8_to_8i32:
1539 ; AVX2: # %bb.0: # %entry
1540 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1543 ; AVX512-LABEL: zext_8i8_to_8i32:
1544 ; AVX512: # %bb.0: # %entry
1545 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1548 %t = zext <8 x i8> %z to <8 x i32>
1552 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
1553 ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
1554 ; SSE2: # %bb.0: # %entry
1555 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1556 ; SSE2-NEXT: pxor %xmm2, %xmm2
1557 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1558 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1561 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
1562 ; SSSE3: # %bb.0: # %entry
1563 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1564 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1565 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1566 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1569 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
1570 ; SSE41: # %bb.0: # %entry
1571 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1572 ; SSE41-NEXT: pxor %xmm2, %xmm2
1573 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1574 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1577 ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
1578 ; AVX1: # %bb.0: # %entry
1579 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1580 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1581 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1582 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1585 ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
1586 ; AVX2: # %bb.0: # %entry
1587 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1590 ; AVX512-LABEL: shuf_zext_8i16_to_8i32:
1591 ; AVX512: # %bb.0: # %entry
1592 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1595 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
1596 %Z = bitcast <16 x i16> %B to <8 x i32>
1600 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1601 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
1602 ; SSE2: # %bb.0: # %entry
1603 ; SSE2-NEXT: movaps %xmm0, %xmm1
1604 ; SSE2-NEXT: xorps %xmm2, %xmm2
1605 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1606 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1609 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
1610 ; SSSE3: # %bb.0: # %entry
1611 ; SSSE3-NEXT: movaps %xmm0, %xmm1
1612 ; SSSE3-NEXT: xorps %xmm2, %xmm2
1613 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1614 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1617 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1618 ; SSE41: # %bb.0: # %entry
1619 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1620 ; SSE41-NEXT: pxor %xmm2, %xmm2
1621 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1622 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1625 ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
1626 ; AVX1: # %bb.0: # %entry
1627 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1628 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1629 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1630 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1633 ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
1634 ; AVX2: # %bb.0: # %entry
1635 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1638 ; AVX512-LABEL: shuf_zext_4i32_to_4i64:
1639 ; AVX512: # %bb.0: # %entry
1640 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1643 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
1644 %Z = bitcast <8 x i32> %B to <4 x i64>
1648 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
1649 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
1650 ; SSE2: # %bb.0: # %entry
1651 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1652 ; SSE2-NEXT: pxor %xmm2, %xmm2
1653 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1654 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1655 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1656 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1659 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
1660 ; SSSE3: # %bb.0: # %entry
1661 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1662 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1663 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1664 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1665 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1666 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1669 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1670 ; SSE41: # %bb.0: # %entry
1671 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1672 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1673 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1674 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1677 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
1678 ; AVX1: # %bb.0: # %entry
1679 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1680 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1681 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1682 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1685 ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
1686 ; AVX2: # %bb.0: # %entry
1687 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1690 ; AVX512-LABEL: shuf_zext_8i8_to_8i32:
1691 ; AVX512: # %bb.0: # %entry
1692 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1695 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
1696 %Z = bitcast <32 x i8> %B to <8 x i32>
1700 define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
1701 ; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
1702 ; SSE2: # %bb.0: # %entry
1703 ; SSE2-NEXT: pxor %xmm1, %xmm1
1704 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1705 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1706 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1709 ; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
1710 ; SSSE3: # %bb.0: # %entry
1711 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1714 ; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
1715 ; SSE41: # %bb.0: # %entry
1716 ; SSE41-NEXT: psrlq $48, %xmm0
1717 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1720 ; AVX1-LABEL: shuf_zext_16i8_to_2i64_offset6:
1721 ; AVX1: # %bb.0: # %entry
1722 ; AVX1-NEXT: vpsrlq $48, %xmm0, %xmm0
1723 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1726 ; AVX2-SLOW-LABEL: shuf_zext_16i8_to_2i64_offset6:
1727 ; AVX2-SLOW: # %bb.0: # %entry
1728 ; AVX2-SLOW-NEXT: vpsrlq $48, %xmm0, %xmm0
1729 ; AVX2-SLOW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1730 ; AVX2-SLOW-NEXT: retq
1732 ; AVX2-FAST-LABEL: shuf_zext_16i8_to_2i64_offset6:
1733 ; AVX2-FAST: # %bb.0: # %entry
1734 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1735 ; AVX2-FAST-NEXT: retq
1737 ; AVX512F-LABEL: shuf_zext_16i8_to_2i64_offset6:
1738 ; AVX512F: # %bb.0: # %entry
1739 ; AVX512F-NEXT: vpsrlq $48, %xmm0, %xmm0
1740 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1741 ; AVX512F-NEXT: retq
1743 ; AVX512BW-LABEL: shuf_zext_16i8_to_2i64_offset6:
1744 ; AVX512BW: # %bb.0: # %entry
1745 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1746 ; AVX512BW-NEXT: retq
1748 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1749 %Z = bitcast <16 x i8> %B to <2 x i64>
1753 define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
1754 ; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1755 ; SSE2: # %bb.0: # %entry
1756 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1757 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
1758 ; SSE2-NEXT: pxor %xmm2, %xmm2
1759 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1760 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1761 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1762 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1763 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1764 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1767 ; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
1768 ; SSSE3: # %bb.0: # %entry
1769 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1770 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
1771 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
1774 ; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
1775 ; SSE41: # %bb.0: # %entry
1776 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1777 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1778 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1779 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1780 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1781 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1784 ; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
1785 ; AVX1: # %bb.0: # %entry
1786 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1787 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1788 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1789 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1790 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1793 ; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1794 ; AVX2: # %bb.0: # %entry
1795 ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1796 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1799 ; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11:
1800 ; AVX512: # %bb.0: # %entry
1801 ; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1802 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1805 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1806 %Z = bitcast <32 x i8> %B to <4 x i64>
1810 define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
1811 ; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
1812 ; SSE2: # %bb.0: # %entry
1813 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1814 ; SSE2-NEXT: pxor %xmm1, %xmm1
1815 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1816 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1819 ; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
1820 ; SSSE3: # %bb.0: # %entry
1821 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1824 ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
1825 ; SSE41: # %bb.0: # %entry
1826 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1827 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1830 ; AVX1-LABEL: shuf_zext_8i16_to_2i64_offset6:
1831 ; AVX1: # %bb.0: # %entry
1832 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1833 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1836 ; AVX2-SLOW-LABEL: shuf_zext_8i16_to_2i64_offset6:
1837 ; AVX2-SLOW: # %bb.0: # %entry
1838 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1839 ; AVX2-SLOW-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1840 ; AVX2-SLOW-NEXT: retq
1842 ; AVX2-FAST-LABEL: shuf_zext_8i16_to_2i64_offset6:
1843 ; AVX2-FAST: # %bb.0: # %entry
1844 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1845 ; AVX2-FAST-NEXT: retq
1847 ; AVX512F-LABEL: shuf_zext_8i16_to_2i64_offset6:
1848 ; AVX512F: # %bb.0: # %entry
1849 ; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1850 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1851 ; AVX512F-NEXT: retq
1853 ; AVX512BW-LABEL: shuf_zext_8i16_to_2i64_offset6:
1854 ; AVX512BW: # %bb.0: # %entry
1855 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1856 ; AVX512BW-NEXT: retq
1858 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
1859 %Z = bitcast <8 x i16> %B to <2 x i64>
1863 define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
1864 ; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1865 ; SSE2: # %bb.0: # %entry
1866 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1867 ; SSE2-NEXT: pxor %xmm2, %xmm2
1868 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1869 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1870 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1871 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1874 ; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
1875 ; SSSE3: # %bb.0: # %entry
1876 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1877 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1878 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1879 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1880 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1881 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1884 ; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
1885 ; SSE41: # %bb.0: # %entry
1886 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1887 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1888 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1889 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1890 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1893 ; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
1894 ; AVX1: # %bb.0: # %entry
1895 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1896 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1897 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1898 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1899 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1902 ; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1903 ; AVX2: # %bb.0: # %entry
1904 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1905 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1908 ; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2:
1909 ; AVX512: # %bb.0: # %entry
1910 ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1911 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1914 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
1915 %Z = bitcast <16 x i16> %B to <4 x i64>
1919 define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
1920 ; SSE2-LABEL: shuf_zext_8i16_to_4i32_offset1:
1921 ; SSE2: # %bb.0: # %entry
1922 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1923 ; SSE2-NEXT: pxor %xmm1, %xmm1
1924 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1927 ; SSSE3-LABEL: shuf_zext_8i16_to_4i32_offset1:
1928 ; SSSE3: # %bb.0: # %entry
1929 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1930 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1931 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1934 ; SSE41-LABEL: shuf_zext_8i16_to_4i32_offset1:
1935 ; SSE41: # %bb.0: # %entry
1936 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1937 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1940 ; AVX1-LABEL: shuf_zext_8i16_to_4i32_offset1:
1941 ; AVX1: # %bb.0: # %entry
1942 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1943 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1946 ; AVX2-SLOW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1947 ; AVX2-SLOW: # %bb.0: # %entry
1948 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1949 ; AVX2-SLOW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1950 ; AVX2-SLOW-NEXT: retq
1952 ; AVX2-FAST-LABEL: shuf_zext_8i16_to_4i32_offset1:
1953 ; AVX2-FAST: # %bb.0: # %entry
1954 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1955 ; AVX2-FAST-NEXT: retq
1957 ; AVX512F-LABEL: shuf_zext_8i16_to_4i32_offset1:
1958 ; AVX512F: # %bb.0: # %entry
1959 ; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1960 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1961 ; AVX512F-NEXT: retq
1963 ; AVX512BW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1964 ; AVX512BW: # %bb.0: # %entry
1965 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1966 ; AVX512BW-NEXT: retq
1968 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
1969 %Z = bitcast <8 x i16> %B to <4 x i32>
1973 define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
1974 ; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1975 ; SSE2: # %bb.0: # %entry
1976 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1977 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1978 ; SSE2-NEXT: pxor %xmm2, %xmm2
1979 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1980 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1983 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
1984 ; SSSE3: # %bb.0: # %entry
1985 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1986 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1987 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1988 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1989 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1992 ; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
1993 ; SSE41: # %bb.0: # %entry
1994 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1995 ; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1996 ; SSE41-NEXT: pxor %xmm2, %xmm2
1997 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1998 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2001 ; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
2002 ; AVX1: # %bb.0: # %entry
2003 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2004 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2005 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2006 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2007 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2010 ; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
2011 ; AVX2: # %bb.0: # %entry
2012 ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2013 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2016 ; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3:
2017 ; AVX512: # %bb.0: # %entry
2018 ; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2019 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2022 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
2023 %Z = bitcast <16 x i16> %B to <8 x i32>
2027 define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
2028 ; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2029 ; SSE2: # %bb.0: # %entry
2030 ; SSE2-NEXT: pxor %xmm2, %xmm2
2031 ; SSE2-NEXT: movdqa %xmm1, %xmm0
2032 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2033 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2036 ; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
2037 ; SSSE3: # %bb.0: # %entry
2038 ; SSSE3-NEXT: pxor %xmm2, %xmm2
2039 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2040 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2041 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2044 ; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
2045 ; SSE41: # %bb.0: # %entry
2046 ; SSE41-NEXT: pxor %xmm2, %xmm2
2047 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
2048 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2051 ; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
2052 ; AVX1: # %bb.0: # %entry
2053 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2054 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2055 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2056 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2057 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2060 ; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2061 ; AVX2: # %bb.0: # %entry
2062 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2063 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2066 ; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8:
2067 ; AVX512: # %bb.0: # %entry
2068 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
2069 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2072 %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
2073 %Z = bitcast <16 x i16> %B to <8 x i32>
2077 define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
2078 ; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
2079 ; SSE: # %bb.0: # %entry
2080 ; SSE-NEXT: xorps %xmm1, %xmm1
2081 ; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2084 ; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
2085 ; AVX: # %bb.0: # %entry
2086 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
2087 ; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2090 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
2091 %Z = bitcast <4 x i32> %B to <2 x i64>
2095 define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
2096 ; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2097 ; SSE2: # %bb.0: # %entry
2098 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2099 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2100 ; SSE2-NEXT: pand %xmm1, %xmm0
2101 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2104 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
2105 ; SSSE3: # %bb.0: # %entry
2106 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2107 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2108 ; SSSE3-NEXT: pand %xmm1, %xmm0
2109 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2112 ; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
2113 ; SSE41: # %bb.0: # %entry
2114 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2115 ; SSE41-NEXT: pxor %xmm0, %xmm0
2116 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
2117 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2120 ; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
2121 ; AVX1: # %bb.0: # %entry
2122 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2123 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2124 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2125 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2128 ; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2129 ; AVX2: # %bb.0: # %entry
2130 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2131 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2134 ; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1:
2135 ; AVX512: # %bb.0: # %entry
2136 ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2137 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2140 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
2141 %Z = bitcast <8 x i32> %B to <4 x i64>
2145 define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) {
2146 ; SSE2-LABEL: zext_32i8_to_32i32:
2148 ; SSE2-NEXT: movq %rdi, %rax
2149 ; SSE2-NEXT: pxor %xmm2, %xmm2
2150 ; SSE2-NEXT: movdqa %xmm0, %xmm3
2151 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2152 ; SSE2-NEXT: movdqa %xmm3, %xmm8
2153 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2154 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2155 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2156 ; SSE2-NEXT: movdqa %xmm0, %xmm5
2157 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2158 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2159 ; SSE2-NEXT: movdqa %xmm1, %xmm6
2160 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2161 ; SSE2-NEXT: movdqa %xmm6, %xmm7
2162 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2163 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2164 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2165 ; SSE2-NEXT: movdqa %xmm1, %xmm4
2166 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2167 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2168 ; SSE2-NEXT: movdqa %xmm1, 112(%rdi)
2169 ; SSE2-NEXT: movdqa %xmm4, 96(%rdi)
2170 ; SSE2-NEXT: movdqa %xmm6, 80(%rdi)
2171 ; SSE2-NEXT: movdqa %xmm7, 64(%rdi)
2172 ; SSE2-NEXT: movdqa %xmm0, 48(%rdi)
2173 ; SSE2-NEXT: movdqa %xmm5, 32(%rdi)
2174 ; SSE2-NEXT: movdqa %xmm3, 16(%rdi)
2175 ; SSE2-NEXT: movdqa %xmm8, (%rdi)
2178 ; SSSE3-LABEL: zext_32i8_to_32i32:
2180 ; SSSE3-NEXT: movq %rdi, %rax
2181 ; SSSE3-NEXT: pxor %xmm2, %xmm2
2182 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
2183 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2184 ; SSSE3-NEXT: movdqa %xmm3, %xmm8
2185 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2186 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2187 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2188 ; SSSE3-NEXT: movdqa %xmm0, %xmm5
2189 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2190 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2191 ; SSSE3-NEXT: movdqa %xmm1, %xmm6
2192 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2193 ; SSSE3-NEXT: movdqa %xmm6, %xmm7
2194 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2195 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2196 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2197 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
2198 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2199 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2200 ; SSSE3-NEXT: movdqa %xmm1, 112(%rdi)
2201 ; SSSE3-NEXT: movdqa %xmm4, 96(%rdi)
2202 ; SSSE3-NEXT: movdqa %xmm6, 80(%rdi)
2203 ; SSSE3-NEXT: movdqa %xmm7, 64(%rdi)
2204 ; SSSE3-NEXT: movdqa %xmm0, 48(%rdi)
2205 ; SSSE3-NEXT: movdqa %xmm5, 32(%rdi)
2206 ; SSSE3-NEXT: movdqa %xmm3, 16(%rdi)
2207 ; SSSE3-NEXT: movdqa %xmm8, (%rdi)
2210 ; SSE41-LABEL: zext_32i8_to_32i32:
2212 ; SSE41-NEXT: movq %rdi, %rax
2213 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2214 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
2215 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2216 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
2217 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
2218 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
2219 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2220 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2221 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,2,3]
2222 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
2223 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,0,1]
2224 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
2225 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
2226 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2227 ; SSE41-NEXT: movdqa %xmm1, 112(%rdi)
2228 ; SSE41-NEXT: movdqa %xmm7, 96(%rdi)
2229 ; SSE41-NEXT: movdqa %xmm6, 80(%rdi)
2230 ; SSE41-NEXT: movdqa %xmm5, 64(%rdi)
2231 ; SSE41-NEXT: movdqa %xmm0, 48(%rdi)
2232 ; SSE41-NEXT: movdqa %xmm4, 32(%rdi)
2233 ; SSE41-NEXT: movdqa %xmm3, 16(%rdi)
2234 ; SSE41-NEXT: movdqa %xmm2, (%rdi)
2237 ; AVX1-LABEL: zext_32i8_to_32i32:
2239 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2240 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
2241 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2242 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4
2243 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2244 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2245 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[1,1,2,3]
2246 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2247 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
2248 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
2249 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2250 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,0,1]
2251 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2252 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
2253 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
2254 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2255 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,3,0,1]
2256 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2257 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3
2258 ; AVX1-NEXT: vmovaps %ymm4, %ymm0
2261 ; AVX2-LABEL: zext_32i8_to_32i32:
2263 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2264 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
2265 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
2266 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
2267 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2268 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
2269 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2270 ; AVX2-NEXT: vmovdqa %ymm4, %ymm0
2273 ; AVX512-LABEL: zext_32i8_to_32i32:
2275 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2276 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
2277 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2278 ; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
2280 %res = zext <32 x i8>%x to <32 x i32>
2284 define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) {
2285 ; SSE2-LABEL: zext_2i8_to_2i32:
2287 ; SSE2-NEXT: movzwl (%rdi), %eax
2288 ; SSE2-NEXT: movd %eax, %xmm0
2289 ; SSE2-NEXT: pxor %xmm1, %xmm1
2290 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2291 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2292 ; SSE2-NEXT: paddd %xmm0, %xmm0
2295 ; SSSE3-LABEL: zext_2i8_to_2i32:
2297 ; SSSE3-NEXT: movzwl (%rdi), %eax
2298 ; SSSE3-NEXT: movd %eax, %xmm0
2299 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2300 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2301 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2302 ; SSSE3-NEXT: paddd %xmm0, %xmm0
2305 ; SSE41-LABEL: zext_2i8_to_2i32:
2307 ; SSE41-NEXT: movzwl (%rdi), %eax
2308 ; SSE41-NEXT: movd %eax, %xmm0
2309 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2310 ; SSE41-NEXT: paddd %xmm0, %xmm0
2313 ; AVX-LABEL: zext_2i8_to_2i32:
2315 ; AVX-NEXT: movzwl (%rdi), %eax
2316 ; AVX-NEXT: vmovd %eax, %xmm0
2317 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2318 ; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
2320 %x = load <2 x i8>, <2 x i8>* %addr, align 1
2321 %y = zext <2 x i8> %x to <2 x i32>
2322 %z = add <2 x i32>%y, %y
2326 define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) {
2327 ; SSE2-LABEL: zext_4i17_to_4i32:
2329 ; SSE2-NEXT: movq (%rdi), %rax
2330 ; SSE2-NEXT: movd %eax, %xmm0
2331 ; SSE2-NEXT: movq %rax, %rcx
2332 ; SSE2-NEXT: shrq $17, %rcx
2333 ; SSE2-NEXT: movd %ecx, %xmm1
2334 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2335 ; SSE2-NEXT: movl 8(%rdi), %ecx
2336 ; SSE2-NEXT: shll $13, %ecx
2337 ; SSE2-NEXT: movq %rax, %rdx
2338 ; SSE2-NEXT: shrq $51, %rdx
2339 ; SSE2-NEXT: orl %ecx, %edx
2340 ; SSE2-NEXT: movd %edx, %xmm1
2341 ; SSE2-NEXT: shrq $34, %rax
2342 ; SSE2-NEXT: movd %eax, %xmm2
2343 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2344 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2345 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2348 ; SSSE3-LABEL: zext_4i17_to_4i32:
2350 ; SSSE3-NEXT: movq (%rdi), %rax
2351 ; SSSE3-NEXT: movd %eax, %xmm0
2352 ; SSSE3-NEXT: movq %rax, %rcx
2353 ; SSSE3-NEXT: shrq $17, %rcx
2354 ; SSSE3-NEXT: movd %ecx, %xmm1
2355 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2356 ; SSSE3-NEXT: movl 8(%rdi), %ecx
2357 ; SSSE3-NEXT: shll $13, %ecx
2358 ; SSSE3-NEXT: movq %rax, %rdx
2359 ; SSSE3-NEXT: shrq $51, %rdx
2360 ; SSSE3-NEXT: orl %ecx, %edx
2361 ; SSSE3-NEXT: movd %edx, %xmm1
2362 ; SSSE3-NEXT: shrq $34, %rax
2363 ; SSSE3-NEXT: movd %eax, %xmm2
2364 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2365 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2366 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
2369 ; SSE41-LABEL: zext_4i17_to_4i32:
2371 ; SSE41-NEXT: movl 8(%rdi), %eax
2372 ; SSE41-NEXT: shll $13, %eax
2373 ; SSE41-NEXT: movq (%rdi), %rcx
2374 ; SSE41-NEXT: movq %rcx, %rdx
2375 ; SSE41-NEXT: shrq $51, %rdx
2376 ; SSE41-NEXT: orl %eax, %edx
2377 ; SSE41-NEXT: movq %rcx, %rax
2378 ; SSE41-NEXT: shrq $17, %rax
2379 ; SSE41-NEXT: movd %ecx, %xmm0
2380 ; SSE41-NEXT: pinsrd $1, %eax, %xmm0
2381 ; SSE41-NEXT: shrq $34, %rcx
2382 ; SSE41-NEXT: pinsrd $2, %ecx, %xmm0
2383 ; SSE41-NEXT: pinsrd $3, %edx, %xmm0
2384 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
2387 ; AVX1-LABEL: zext_4i17_to_4i32:
2389 ; AVX1-NEXT: movl 8(%rdi), %eax
2390 ; AVX1-NEXT: shll $13, %eax
2391 ; AVX1-NEXT: movq (%rdi), %rcx
2392 ; AVX1-NEXT: movq %rcx, %rdx
2393 ; AVX1-NEXT: shrq $51, %rdx
2394 ; AVX1-NEXT: orl %eax, %edx
2395 ; AVX1-NEXT: movq %rcx, %rax
2396 ; AVX1-NEXT: shrq $17, %rax
2397 ; AVX1-NEXT: vmovd %ecx, %xmm0
2398 ; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
2399 ; AVX1-NEXT: shrq $34, %rcx
2400 ; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
2401 ; AVX1-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
2402 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
2405 ; AVX2-LABEL: zext_4i17_to_4i32:
2407 ; AVX2-NEXT: movl 8(%rdi), %eax
2408 ; AVX2-NEXT: shll $13, %eax
2409 ; AVX2-NEXT: movq (%rdi), %rcx
2410 ; AVX2-NEXT: movq %rcx, %rdx
2411 ; AVX2-NEXT: shrq $51, %rdx
2412 ; AVX2-NEXT: orl %eax, %edx
2413 ; AVX2-NEXT: movq %rcx, %rax
2414 ; AVX2-NEXT: shrq $17, %rax
2415 ; AVX2-NEXT: vmovd %ecx, %xmm0
2416 ; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
2417 ; AVX2-NEXT: shrq $34, %rcx
2418 ; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
2419 ; AVX2-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
2420 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
2421 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2424 ; AVX512-LABEL: zext_4i17_to_4i32:
2426 ; AVX512-NEXT: movl 8(%rdi), %eax
2427 ; AVX512-NEXT: shll $13, %eax
2428 ; AVX512-NEXT: movq (%rdi), %rcx
2429 ; AVX512-NEXT: movq %rcx, %rdx
2430 ; AVX512-NEXT: shrq $51, %rdx
2431 ; AVX512-NEXT: orl %eax, %edx
2432 ; AVX512-NEXT: movq %rcx, %rax
2433 ; AVX512-NEXT: shrq $17, %rax
2434 ; AVX512-NEXT: vmovd %ecx, %xmm0
2435 ; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
2436 ; AVX512-NEXT: shrq $34, %rcx
2437 ; AVX512-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
2438 ; AVX512-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
2439 ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
2440 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
2442 %a = load <4 x i17>, <4 x i17>* %ptr
2443 %b = zext <4 x i17> %a to <4 x i32>
2447 define <8 x i64> @zext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp {
2448 ; SSE2-LABEL: zext_8i6_to_8i64:
2449 ; SSE2: # %bb.0: # %entry
2450 ; SSE2-NEXT: movd %edi, %xmm0
2451 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2452 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2453 ; SSE2-NEXT: paddw {{.*}}(%rip), %xmm3
2454 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3]
2455 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
2456 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [63,63]
2457 ; SSE2-NEXT: pand %xmm4, %xmm0
2458 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3]
2459 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7]
2460 ; SSE2-NEXT: pand %xmm4, %xmm1
2461 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3]
2462 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7]
2463 ; SSE2-NEXT: pand %xmm4, %xmm2
2464 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3]
2465 ; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7]
2466 ; SSE2-NEXT: pand %xmm4, %xmm3
2469 ; SSSE3-LABEL: zext_8i6_to_8i64:
2470 ; SSSE3: # %bb.0: # %entry
2471 ; SSSE3-NEXT: movd %edi, %xmm0
2472 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2473 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2474 ; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm3
2475 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3]
2476 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
2477 ; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [63,63]
2478 ; SSSE3-NEXT: pand %xmm4, %xmm0
2479 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3]
2480 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7]
2481 ; SSSE3-NEXT: pand %xmm4, %xmm1
2482 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3]
2483 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7]
2484 ; SSSE3-NEXT: pand %xmm4, %xmm2
2485 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3]
2486 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7]
2487 ; SSSE3-NEXT: pand %xmm4, %xmm3
2490 ; SSE41-LABEL: zext_8i6_to_8i64:
2491 ; SSE41: # %bb.0: # %entry
2492 ; SSE41-NEXT: movd %edi, %xmm0
2493 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2494 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2495 ; SSE41-NEXT: paddw {{.*}}(%rip), %xmm3
2496 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
2497 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [63,63]
2498 ; SSE41-NEXT: pand %xmm4, %xmm0
2499 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,2,3]
2500 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2501 ; SSE41-NEXT: pand %xmm4, %xmm1
2502 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
2503 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2504 ; SSE41-NEXT: pand %xmm4, %xmm2
2505 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,2,3]
2506 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
2507 ; SSE41-NEXT: pand %xmm4, %xmm3
2510 ; AVX1-LABEL: zext_8i6_to_8i64:
2511 ; AVX1: # %bb.0: # %entry
2512 ; AVX1-NEXT: vmovd %edi, %xmm0
2513 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2514 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2515 ; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
2516 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1
2517 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2518 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
2519 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2520 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2521 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
2522 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2523 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
2524 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2525 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2528 ; AVX2-LABEL: zext_8i6_to_8i64:
2529 ; AVX2: # %bb.0: # %entry
2530 ; AVX2-NEXT: vmovd %edi, %xmm0
2531 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2532 ; AVX2-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
2533 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1
2534 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2535 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2536 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2539 ; AVX512-LABEL: zext_8i6_to_8i64:
2540 ; AVX512: # %bb.0: # %entry
2541 ; AVX512-NEXT: vmovd %edi, %xmm0
2542 ; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0
2543 ; AVX512-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
2544 ; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2545 ; AVX512-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
2548 %a = trunc i32 %x to i6
2549 %b = insertelement <8 x i6> undef, i6 %a, i32 0
2550 %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer
2551 %d = add <8 x i6> %c, <i6 0, i6 1, i6 2, i6 3, i6 4, i6 5, i6 6, i6 7>
2552 %e = zext <8 x i6> %d to <8 x i64>
2556 define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
2557 ; SSE2-LABEL: splatshuf_zext_v4i64:
2559 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2560 ; SSE2-NEXT: pxor %xmm1, %xmm1
2561 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2562 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2565 ; SSSE3-LABEL: splatshuf_zext_v4i64:
2567 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2568 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2569 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2570 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2573 ; SSE41-LABEL: splatshuf_zext_v4i64:
2575 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2576 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2577 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2580 ; AVX1-LABEL: splatshuf_zext_v4i64:
2582 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2583 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2584 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2587 ; AVX2-LABEL: splatshuf_zext_v4i64:
2589 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
2590 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2593 ; AVX512-LABEL: splatshuf_zext_v4i64:
2595 ; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
2596 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2598 %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
2599 %ext = zext <4 x i32> %shuf to <4 x i64>
2603 define <8 x i32> @splatshuf_zext_v8i32_matching_undefs(<8 x i16> %x) {
2604 ; SSE2-LABEL: splatshuf_zext_v8i32_matching_undefs:
2606 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
2607 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
2608 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2609 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2612 ; SSSE3-LABEL: splatshuf_zext_v8i32_matching_undefs:
2614 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[u,u],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2615 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2618 ; SSE41-LABEL: splatshuf_zext_v8i32_matching_undefs:
2620 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,8,9,10,11,12,13,14,15]
2621 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2622 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2625 ; AVX1-LABEL: splatshuf_zext_v8i32_matching_undefs:
2627 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[6,7],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2628 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2631 ; AVX2-LABEL: splatshuf_zext_v8i32_matching_undefs:
2633 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
2634 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2637 ; AVX512-LABEL: splatshuf_zext_v8i32_matching_undefs:
2639 ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
2640 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2642 %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7>
2643 %ext = zext <8 x i16> %shuf to <8 x i32>
2647 define <8 x i32> @splatshuf_zext_v8i32_unmatched_undef(<8 x i16> %x) {
2648 ; SSE2-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2650 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
2651 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7]
2652 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2653 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7]
2654 ; SSE2-NEXT: pxor %xmm1, %xmm1
2655 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2656 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2659 ; SSSE3-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2661 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2662 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2665 ; SSE41-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2667 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,14,15,6,7,12,13,14,15]
2668 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2669 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2672 ; AVX1-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2674 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2675 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2676 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2677 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2678 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2681 ; AVX2-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2683 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2684 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2687 ; AVX512-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2689 ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2690 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2692 %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7>
2693 %ext = zext <8 x i16> %shuf to <8 x i32>
2697 define <16 x i16> @splatshuf_zext_v16i16(<16 x i8> %x) {
2698 ; SSE2-LABEL: splatshuf_zext_v16i16:
2700 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2701 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,7]
2702 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,3]
2703 ; SSE2-NEXT: pxor %xmm1, %xmm1
2704 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2705 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2708 ; SSSE3-LABEL: splatshuf_zext_v16i16:
2710 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero
2711 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2714 ; SSE41-LABEL: splatshuf_zext_v16i16:
2716 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15]
2717 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2718 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2721 ; AVX1-LABEL: splatshuf_zext_v16i16:
2723 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero
2724 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2727 ; AVX2-LABEL: splatshuf_zext_v16i16:
2729 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14]
2730 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2733 ; AVX512-LABEL: splatshuf_zext_v16i16:
2735 ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14]
2736 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2738 %shuf = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14>
2739 %ext = zext <16 x i8> %shuf to <16 x i16>