1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
11 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
12 ; SSE2-LABEL: zext_16i8_to_8i16:
13 ; SSE2: # %bb.0: # %entry
14 ; SSE2-NEXT: pxor %xmm1, %xmm1
15 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
18 ; SSSE3-LABEL: zext_16i8_to_8i16:
19 ; SSSE3: # %bb.0: # %entry
20 ; SSSE3-NEXT: pxor %xmm1, %xmm1
21 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
24 ; SSE41-LABEL: zext_16i8_to_8i16:
25 ; SSE41: # %bb.0: # %entry
26 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
29 ; AVX-LABEL: zext_16i8_to_8i16:
30 ; AVX: # %bb.0: # %entry
31 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
34 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
35 %C = zext <8 x i8> %B to <8 x i16>
40 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
41 ; SSE2-LABEL: zext_16i8_to_16i16:
42 ; SSE2: # %bb.0: # %entry
43 ; SSE2-NEXT: movdqa %xmm0, %xmm1
44 ; SSE2-NEXT: pxor %xmm2, %xmm2
45 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
46 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
49 ; SSSE3-LABEL: zext_16i8_to_16i16:
50 ; SSSE3: # %bb.0: # %entry
51 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
52 ; SSSE3-NEXT: pxor %xmm2, %xmm2
53 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
54 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
57 ; SSE41-LABEL: zext_16i8_to_16i16:
58 ; SSE41: # %bb.0: # %entry
59 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
60 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
61 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
62 ; SSE41-NEXT: movdqa %xmm2, %xmm0
65 ; AVX1-LABEL: zext_16i8_to_16i16:
66 ; AVX1: # %bb.0: # %entry
67 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
68 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
69 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
70 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
73 ; AVX2-LABEL: zext_16i8_to_16i16:
74 ; AVX2: # %bb.0: # %entry
75 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
78 ; AVX512-LABEL: zext_16i8_to_16i16:
79 ; AVX512: # %bb.0: # %entry
80 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
83 %B = zext <16 x i8> %A to <16 x i16>
87 define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) {
88 ; SSE2-LABEL: zext_32i8_to_32i16:
89 ; SSE2: # %bb.0: # %entry
90 ; SSE2-NEXT: movdqa %xmm1, %xmm3
91 ; SSE2-NEXT: movdqa %xmm0, %xmm1
92 ; SSE2-NEXT: pxor %xmm4, %xmm4
93 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
94 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
95 ; SSE2-NEXT: movdqa %xmm3, %xmm2
96 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
97 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
100 ; SSSE3-LABEL: zext_32i8_to_32i16:
101 ; SSSE3: # %bb.0: # %entry
102 ; SSSE3-NEXT: movdqa %xmm1, %xmm3
103 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
104 ; SSSE3-NEXT: pxor %xmm4, %xmm4
105 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
106 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
107 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
108 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
109 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
112 ; SSE41-LABEL: zext_32i8_to_32i16:
113 ; SSE41: # %bb.0: # %entry
114 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
115 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
116 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
117 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
118 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
119 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
120 ; SSE41-NEXT: movdqa %xmm5, %xmm0
121 ; SSE41-NEXT: movdqa %xmm4, %xmm1
124 ; AVX1-LABEL: zext_32i8_to_32i16:
125 ; AVX1: # %bb.0: # %entry
126 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
127 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
128 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
129 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
130 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
131 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
132 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
133 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
134 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
137 ; AVX2-LABEL: zext_32i8_to_32i16:
138 ; AVX2: # %bb.0: # %entry
139 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
140 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
141 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
142 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
145 ; AVX512F-LABEL: zext_32i8_to_32i16:
146 ; AVX512F: # %bb.0: # %entry
147 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
148 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
149 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
150 ; AVX512F-NEXT: vmovdqa %ymm2, %ymm0
153 ; AVX512BW-LABEL: zext_32i8_to_32i16:
154 ; AVX512BW: # %bb.0: # %entry
155 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
156 ; AVX512BW-NEXT: retq
158 %B = zext <32 x i8> %A to <32 x i16>
162 define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
163 ; SSE2-LABEL: zext_16i8_to_4i32:
164 ; SSE2: # %bb.0: # %entry
165 ; SSE2-NEXT: pxor %xmm1, %xmm1
166 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
167 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
170 ; SSSE3-LABEL: zext_16i8_to_4i32:
171 ; SSSE3: # %bb.0: # %entry
172 ; SSSE3-NEXT: pxor %xmm1, %xmm1
173 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
174 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
177 ; SSE41-LABEL: zext_16i8_to_4i32:
178 ; SSE41: # %bb.0: # %entry
179 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
182 ; AVX-LABEL: zext_16i8_to_4i32:
183 ; AVX: # %bb.0: # %entry
184 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
187 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
188 %C = zext <4 x i8> %B to <4 x i32>
192 define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
193 ; SSE2-LABEL: zext_16i8_to_8i32:
194 ; SSE2: # %bb.0: # %entry
195 ; SSE2-NEXT: movdqa %xmm0, %xmm1
196 ; SSE2-NEXT: pxor %xmm2, %xmm2
197 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
198 ; SSE2-NEXT: movdqa %xmm1, %xmm0
199 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
200 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
203 ; SSSE3-LABEL: zext_16i8_to_8i32:
204 ; SSSE3: # %bb.0: # %entry
205 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
206 ; SSSE3-NEXT: pxor %xmm2, %xmm2
207 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
208 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
209 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
210 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
213 ; SSE41-LABEL: zext_16i8_to_8i32:
214 ; SSE41: # %bb.0: # %entry
215 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
216 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
217 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
218 ; SSE41-NEXT: movdqa %xmm2, %xmm0
221 ; AVX1-LABEL: zext_16i8_to_8i32:
222 ; AVX1: # %bb.0: # %entry
223 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
224 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
225 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
226 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
229 ; AVX2-LABEL: zext_16i8_to_8i32:
230 ; AVX2: # %bb.0: # %entry
231 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
234 ; AVX512-LABEL: zext_16i8_to_8i32:
235 ; AVX512: # %bb.0: # %entry
236 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
239 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
240 %C = zext <8 x i8> %B to <8 x i32>
244 define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp {
245 ; SSE2-LABEL: zext_16i8_to_16i32:
246 ; SSE2: # %bb.0: # %entry
247 ; SSE2-NEXT: movdqa %xmm0, %xmm3
248 ; SSE2-NEXT: pxor %xmm4, %xmm4
249 ; SSE2-NEXT: movdqa %xmm0, %xmm1
250 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
251 ; SSE2-NEXT: movdqa %xmm1, %xmm0
252 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
253 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
254 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
255 ; SSE2-NEXT: movdqa %xmm3, %xmm2
256 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
257 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
260 ; SSSE3-LABEL: zext_16i8_to_16i32:
261 ; SSSE3: # %bb.0: # %entry
262 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
263 ; SSSE3-NEXT: pxor %xmm4, %xmm4
264 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
265 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
266 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
267 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
268 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
269 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
270 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
271 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
272 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
275 ; SSE41-LABEL: zext_16i8_to_16i32:
276 ; SSE41: # %bb.0: # %entry
277 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
278 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
279 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
280 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
281 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
282 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
283 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
284 ; SSE41-NEXT: movdqa %xmm4, %xmm0
287 ; AVX1-LABEL: zext_16i8_to_16i32:
288 ; AVX1: # %bb.0: # %entry
289 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
290 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
291 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
292 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
293 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
294 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
295 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,0,1]
296 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
297 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
298 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
301 ; AVX2-LABEL: zext_16i8_to_16i32:
302 ; AVX2: # %bb.0: # %entry
303 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
304 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
305 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
306 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
309 ; AVX512-LABEL: zext_16i8_to_16i32:
310 ; AVX512: # %bb.0: # %entry
311 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
314 %B = zext <16 x i8> %A to <16 x i32>
318 define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
319 ; SSE2-LABEL: zext_16i8_to_2i64:
320 ; SSE2: # %bb.0: # %entry
321 ; SSE2-NEXT: pxor %xmm1, %xmm1
322 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
323 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
324 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
327 ; SSSE3-LABEL: zext_16i8_to_2i64:
328 ; SSSE3: # %bb.0: # %entry
329 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
332 ; SSE41-LABEL: zext_16i8_to_2i64:
333 ; SSE41: # %bb.0: # %entry
334 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
337 ; AVX-LABEL: zext_16i8_to_2i64:
338 ; AVX: # %bb.0: # %entry
339 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
342 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
343 %C = zext <2 x i8> %B to <2 x i64>
347 define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
348 ; SSE2-LABEL: zext_16i8_to_4i64:
349 ; SSE2: # %bb.0: # %entry
350 ; SSE2-NEXT: movdqa %xmm0, %xmm1
351 ; SSE2-NEXT: pxor %xmm2, %xmm2
352 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
353 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
354 ; SSE2-NEXT: movdqa %xmm1, %xmm0
355 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
356 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
359 ; SSSE3-LABEL: zext_16i8_to_4i64:
360 ; SSSE3: # %bb.0: # %entry
361 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
362 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
363 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
366 ; SSE41-LABEL: zext_16i8_to_4i64:
367 ; SSE41: # %bb.0: # %entry
368 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
369 ; SSE41-NEXT: psrld $16, %xmm0
370 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
371 ; SSE41-NEXT: movdqa %xmm2, %xmm0
374 ; AVX1-LABEL: zext_16i8_to_4i64:
375 ; AVX1: # %bb.0: # %entry
376 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
377 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
378 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
379 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
382 ; AVX2-LABEL: zext_16i8_to_4i64:
383 ; AVX2: # %bb.0: # %entry
384 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
387 ; AVX512-LABEL: zext_16i8_to_4i64:
388 ; AVX512: # %bb.0: # %entry
389 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
392 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
393 %C = zext <4 x i8> %B to <4 x i64>
397 define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp {
398 ; SSE2-LABEL: zext_16i8_to_8i64:
399 ; SSE2: # %bb.0: # %entry
400 ; SSE2-NEXT: movdqa %xmm0, %xmm1
401 ; SSE2-NEXT: pxor %xmm4, %xmm4
402 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
403 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
404 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
405 ; SSE2-NEXT: movdqa %xmm1, %xmm0
406 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
407 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
408 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
409 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
410 ; SSE2-NEXT: movdqa %xmm3, %xmm2
411 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
412 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
415 ; SSSE3-LABEL: zext_16i8_to_8i64:
416 ; SSSE3: # %bb.0: # %entry
417 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
418 ; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128]
419 ; SSSE3-NEXT: pshufb %xmm4, %xmm0
420 ; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128]
421 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
422 ; SSSE3-NEXT: pshufb %xmm5, %xmm1
423 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
424 ; SSSE3-NEXT: pshufb %xmm4, %xmm2
425 ; SSSE3-NEXT: pshufb %xmm5, %xmm3
428 ; SSE41-LABEL: zext_16i8_to_8i64:
429 ; SSE41: # %bb.0: # %entry
430 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
431 ; SSE41-NEXT: movdqa %xmm0, %xmm1
432 ; SSE41-NEXT: psrld $16, %xmm1
433 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
434 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
435 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
436 ; SSE41-NEXT: psrlq $48, %xmm0
437 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
438 ; SSE41-NEXT: movdqa %xmm4, %xmm0
441 ; AVX1-LABEL: zext_16i8_to_8i64:
442 ; AVX1: # %bb.0: # %entry
443 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
444 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2
445 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
446 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
447 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
448 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
449 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
450 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
451 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
452 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
455 ; AVX2-LABEL: zext_16i8_to_8i64:
456 ; AVX2: # %bb.0: # %entry
457 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
458 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
459 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
460 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
463 ; AVX512-LABEL: zext_16i8_to_8i64:
464 ; AVX512: # %bb.0: # %entry
465 ; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
468 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
469 %C = zext <8 x i8> %B to <8 x i64>
473 define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
474 ; SSE2-LABEL: zext_8i16_to_4i32:
475 ; SSE2: # %bb.0: # %entry
476 ; SSE2-NEXT: pxor %xmm1, %xmm1
477 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
480 ; SSSE3-LABEL: zext_8i16_to_4i32:
481 ; SSSE3: # %bb.0: # %entry
482 ; SSSE3-NEXT: pxor %xmm1, %xmm1
483 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
486 ; SSE41-LABEL: zext_8i16_to_4i32:
487 ; SSE41: # %bb.0: # %entry
488 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
491 ; AVX-LABEL: zext_8i16_to_4i32:
492 ; AVX: # %bb.0: # %entry
493 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
496 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
497 %C = zext <4 x i16> %B to <4 x i32>
501 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
502 ; SSE2-LABEL: zext_8i16_to_8i32:
503 ; SSE2: # %bb.0: # %entry
504 ; SSE2-NEXT: movdqa %xmm0, %xmm1
505 ; SSE2-NEXT: pxor %xmm2, %xmm2
506 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
507 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
510 ; SSSE3-LABEL: zext_8i16_to_8i32:
511 ; SSSE3: # %bb.0: # %entry
512 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
513 ; SSSE3-NEXT: pxor %xmm2, %xmm2
514 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
515 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
518 ; SSE41-LABEL: zext_8i16_to_8i32:
519 ; SSE41: # %bb.0: # %entry
520 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
521 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
522 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
523 ; SSE41-NEXT: movdqa %xmm2, %xmm0
526 ; AVX1-LABEL: zext_8i16_to_8i32:
527 ; AVX1: # %bb.0: # %entry
528 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
529 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
530 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
531 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
534 ; AVX2-LABEL: zext_8i16_to_8i32:
535 ; AVX2: # %bb.0: # %entry
536 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
539 ; AVX512-LABEL: zext_8i16_to_8i32:
540 ; AVX512: # %bb.0: # %entry
541 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
544 %B = zext <8 x i16> %A to <8 x i32>
548 define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp {
549 ; SSE2-LABEL: zext_16i16_to_16i32:
550 ; SSE2: # %bb.0: # %entry
551 ; SSE2-NEXT: movdqa %xmm1, %xmm3
552 ; SSE2-NEXT: movdqa %xmm0, %xmm1
553 ; SSE2-NEXT: pxor %xmm4, %xmm4
554 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
555 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
556 ; SSE2-NEXT: movdqa %xmm3, %xmm2
557 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
558 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
561 ; SSSE3-LABEL: zext_16i16_to_16i32:
562 ; SSSE3: # %bb.0: # %entry
563 ; SSSE3-NEXT: movdqa %xmm1, %xmm3
564 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
565 ; SSSE3-NEXT: pxor %xmm4, %xmm4
566 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
567 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
568 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
569 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
570 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
573 ; SSE41-LABEL: zext_16i16_to_16i32:
574 ; SSE41: # %bb.0: # %entry
575 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
576 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
577 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
578 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
579 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
580 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
581 ; SSE41-NEXT: movdqa %xmm5, %xmm0
582 ; SSE41-NEXT: movdqa %xmm4, %xmm1
585 ; AVX1-LABEL: zext_16i16_to_16i32:
586 ; AVX1: # %bb.0: # %entry
587 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
588 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
589 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
590 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
591 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
592 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
593 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
594 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
595 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
598 ; AVX2-LABEL: zext_16i16_to_16i32:
599 ; AVX2: # %bb.0: # %entry
600 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
601 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
602 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
603 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
606 ; AVX512-LABEL: zext_16i16_to_16i32:
607 ; AVX512: # %bb.0: # %entry
608 ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
611 %B = zext <16 x i16> %A to <16 x i32>
615 define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
616 ; SSE2-LABEL: zext_8i16_to_2i64:
617 ; SSE2: # %bb.0: # %entry
618 ; SSE2-NEXT: pxor %xmm1, %xmm1
619 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
620 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
623 ; SSSE3-LABEL: zext_8i16_to_2i64:
624 ; SSSE3: # %bb.0: # %entry
625 ; SSSE3-NEXT: pxor %xmm1, %xmm1
626 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
627 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
630 ; SSE41-LABEL: zext_8i16_to_2i64:
631 ; SSE41: # %bb.0: # %entry
632 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
635 ; AVX-LABEL: zext_8i16_to_2i64:
636 ; AVX: # %bb.0: # %entry
637 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
640 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
641 %C = zext <2 x i16> %B to <2 x i64>
645 define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
646 ; SSE2-LABEL: zext_8i16_to_4i64:
647 ; SSE2: # %bb.0: # %entry
648 ; SSE2-NEXT: movdqa %xmm0, %xmm1
649 ; SSE2-NEXT: pxor %xmm2, %xmm2
650 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
651 ; SSE2-NEXT: movdqa %xmm1, %xmm0
652 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
653 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
656 ; SSSE3-LABEL: zext_8i16_to_4i64:
657 ; SSSE3: # %bb.0: # %entry
658 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
659 ; SSSE3-NEXT: pxor %xmm2, %xmm2
660 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
661 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
662 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
663 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
666 ; SSE41-LABEL: zext_8i16_to_4i64:
667 ; SSE41: # %bb.0: # %entry
668 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
669 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
670 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
671 ; SSE41-NEXT: movdqa %xmm2, %xmm0
674 ; AVX1-LABEL: zext_8i16_to_4i64:
675 ; AVX1: # %bb.0: # %entry
676 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
677 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
678 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
679 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
682 ; AVX2-LABEL: zext_8i16_to_4i64:
683 ; AVX2: # %bb.0: # %entry
684 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
687 ; AVX512-LABEL: zext_8i16_to_4i64:
688 ; AVX512: # %bb.0: # %entry
689 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
692 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
693 %C = zext <4 x i16> %B to <4 x i64>
697 define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp {
698 ; SSE2-LABEL: zext_8i16_to_8i64:
699 ; SSE2: # %bb.0: # %entry
700 ; SSE2-NEXT: movdqa %xmm0, %xmm3
701 ; SSE2-NEXT: pxor %xmm4, %xmm4
702 ; SSE2-NEXT: movdqa %xmm0, %xmm1
703 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
704 ; SSE2-NEXT: movdqa %xmm1, %xmm0
705 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
706 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
707 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
708 ; SSE2-NEXT: movdqa %xmm3, %xmm2
709 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
710 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
713 ; SSSE3-LABEL: zext_8i16_to_8i64:
714 ; SSSE3: # %bb.0: # %entry
715 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
716 ; SSSE3-NEXT: pxor %xmm4, %xmm4
717 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
718 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
719 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
720 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
721 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
722 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
723 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
724 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
725 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
728 ; SSE41-LABEL: zext_8i16_to_8i64:
729 ; SSE41: # %bb.0: # %entry
730 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
731 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
732 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
733 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
734 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
735 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
736 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
737 ; SSE41-NEXT: movdqa %xmm4, %xmm0
740 ; AVX1-LABEL: zext_8i16_to_8i64:
741 ; AVX1: # %bb.0: # %entry
742 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
743 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
744 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
745 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
746 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
747 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
748 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,0,1]
749 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
750 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
751 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
754 ; AVX2-LABEL: zext_8i16_to_8i64:
755 ; AVX2: # %bb.0: # %entry
756 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
757 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
758 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
759 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
762 ; AVX512-LABEL: zext_8i16_to_8i64:
763 ; AVX512: # %bb.0: # %entry
764 ; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
767 %B = zext <8 x i16> %A to <8 x i64>
771 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
772 ; SSE2-LABEL: zext_4i32_to_2i64:
773 ; SSE2: # %bb.0: # %entry
774 ; SSE2-NEXT: xorps %xmm1, %xmm1
775 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
778 ; SSSE3-LABEL: zext_4i32_to_2i64:
779 ; SSSE3: # %bb.0: # %entry
780 ; SSSE3-NEXT: xorps %xmm1, %xmm1
781 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
784 ; SSE41-LABEL: zext_4i32_to_2i64:
785 ; SSE41: # %bb.0: # %entry
786 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
789 ; AVX-LABEL: zext_4i32_to_2i64:
790 ; AVX: # %bb.0: # %entry
791 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
794 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
795 %C = zext <2 x i32> %B to <2 x i64>
799 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
800 ; SSE2-LABEL: zext_4i32_to_4i64:
801 ; SSE2: # %bb.0: # %entry
802 ; SSE2-NEXT: movaps %xmm0, %xmm1
803 ; SSE2-NEXT: xorps %xmm2, %xmm2
804 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
805 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
808 ; SSSE3-LABEL: zext_4i32_to_4i64:
809 ; SSSE3: # %bb.0: # %entry
810 ; SSSE3-NEXT: movaps %xmm0, %xmm1
811 ; SSSE3-NEXT: xorps %xmm2, %xmm2
812 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
813 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
816 ; SSE41-LABEL: zext_4i32_to_4i64:
817 ; SSE41: # %bb.0: # %entry
818 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
819 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
820 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
821 ; SSE41-NEXT: movdqa %xmm2, %xmm0
824 ; AVX1-LABEL: zext_4i32_to_4i64:
825 ; AVX1: # %bb.0: # %entry
826 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
827 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
828 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
829 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
832 ; AVX2-LABEL: zext_4i32_to_4i64:
833 ; AVX2: # %bb.0: # %entry
834 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
837 ; AVX512-LABEL: zext_4i32_to_4i64:
838 ; AVX512: # %bb.0: # %entry
839 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
842 %B = zext <4 x i32> %A to <4 x i64>
846 define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp {
847 ; SSE2-LABEL: zext_8i32_to_8i64:
848 ; SSE2: # %bb.0: # %entry
849 ; SSE2-NEXT: movaps %xmm1, %xmm3
850 ; SSE2-NEXT: movaps %xmm0, %xmm1
851 ; SSE2-NEXT: xorps %xmm4, %xmm4
852 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
853 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
854 ; SSE2-NEXT: movaps %xmm3, %xmm2
855 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
856 ; SSE2-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
859 ; SSSE3-LABEL: zext_8i32_to_8i64:
860 ; SSSE3: # %bb.0: # %entry
861 ; SSSE3-NEXT: movaps %xmm1, %xmm3
862 ; SSSE3-NEXT: movaps %xmm0, %xmm1
863 ; SSSE3-NEXT: xorps %xmm4, %xmm4
864 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
865 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
866 ; SSSE3-NEXT: movaps %xmm3, %xmm2
867 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
868 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
871 ; SSE41-LABEL: zext_8i32_to_8i64:
872 ; SSE41: # %bb.0: # %entry
873 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero
874 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
875 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
876 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
877 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
878 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
879 ; SSE41-NEXT: movdqa %xmm5, %xmm0
880 ; SSE41-NEXT: movdqa %xmm4, %xmm1
883 ; AVX1-LABEL: zext_8i32_to_8i64:
884 ; AVX1: # %bb.0: # %entry
885 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
886 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
887 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
888 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
889 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
890 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
891 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
892 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
893 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
896 ; AVX2-LABEL: zext_8i32_to_8i64:
897 ; AVX2: # %bb.0: # %entry
898 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
899 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
900 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
901 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
904 ; AVX512-LABEL: zext_8i32_to_8i64:
905 ; AVX512: # %bb.0: # %entry
906 ; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
909 %B = zext <8 x i32> %A to <8 x i64>
913 define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
914 ; SSE2-LABEL: load_zext_2i8_to_2i64:
915 ; SSE2: # %bb.0: # %entry
916 ; SSE2-NEXT: movzwl (%rdi), %eax
917 ; SSE2-NEXT: movd %eax, %xmm0
918 ; SSE2-NEXT: pxor %xmm1, %xmm1
919 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
920 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
921 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
924 ; SSSE3-LABEL: load_zext_2i8_to_2i64:
925 ; SSSE3: # %bb.0: # %entry
926 ; SSSE3-NEXT: movzwl (%rdi), %eax
927 ; SSSE3-NEXT: movd %eax, %xmm0
928 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
931 ; SSE41-LABEL: load_zext_2i8_to_2i64:
932 ; SSE41: # %bb.0: # %entry
933 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
936 ; AVX-LABEL: load_zext_2i8_to_2i64:
937 ; AVX: # %bb.0: # %entry
938 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
941 %X = load <2 x i8>, <2 x i8>* %ptr
942 %Y = zext <2 x i8> %X to <2 x i64>
946 define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
947 ; SSE2-LABEL: load_zext_4i8_to_4i32:
948 ; SSE2: # %bb.0: # %entry
949 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
950 ; SSE2-NEXT: pxor %xmm1, %xmm1
951 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
952 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
955 ; SSSE3-LABEL: load_zext_4i8_to_4i32:
956 ; SSSE3: # %bb.0: # %entry
957 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
958 ; SSSE3-NEXT: pxor %xmm1, %xmm1
959 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
960 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
963 ; SSE41-LABEL: load_zext_4i8_to_4i32:
964 ; SSE41: # %bb.0: # %entry
965 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
968 ; AVX-LABEL: load_zext_4i8_to_4i32:
969 ; AVX: # %bb.0: # %entry
970 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
973 %X = load <4 x i8>, <4 x i8>* %ptr
974 %Y = zext <4 x i8> %X to <4 x i32>
978 define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
979 ; SSE2-LABEL: load_zext_4i8_to_4i64:
980 ; SSE2: # %bb.0: # %entry
981 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
982 ; SSE2-NEXT: pxor %xmm2, %xmm2
983 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
984 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
985 ; SSE2-NEXT: movdqa %xmm1, %xmm0
986 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
987 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
990 ; SSSE3-LABEL: load_zext_4i8_to_4i64:
991 ; SSSE3: # %bb.0: # %entry
992 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
993 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
994 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
995 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
998 ; SSE41-LABEL: load_zext_4i8_to_4i64:
999 ; SSE41: # %bb.0: # %entry
1000 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1001 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1004 ; AVX1-LABEL: load_zext_4i8_to_4i64:
1005 ; AVX1: # %bb.0: # %entry
1006 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1007 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1008 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1011 ; AVX2-LABEL: load_zext_4i8_to_4i64:
1012 ; AVX2: # %bb.0: # %entry
1013 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1016 ; AVX512-LABEL: load_zext_4i8_to_4i64:
1017 ; AVX512: # %bb.0: # %entry
1018 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1021 %X = load <4 x i8>, <4 x i8>* %ptr
1022 %Y = zext <4 x i8> %X to <4 x i64>
1026 define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
1027 ; SSE2-LABEL: load_zext_8i8_to_8i16:
1028 ; SSE2: # %bb.0: # %entry
1029 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1030 ; SSE2-NEXT: pxor %xmm1, %xmm1
1031 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1034 ; SSSE3-LABEL: load_zext_8i8_to_8i16:
1035 ; SSSE3: # %bb.0: # %entry
1036 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1037 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1038 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1041 ; SSE41-LABEL: load_zext_8i8_to_8i16:
1042 ; SSE41: # %bb.0: # %entry
1043 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1046 ; AVX-LABEL: load_zext_8i8_to_8i16:
1047 ; AVX: # %bb.0: # %entry
1048 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1051 %X = load <8 x i8>, <8 x i8>* %ptr
1052 %Y = zext <8 x i8> %X to <8 x i16>
1056 define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
1057 ; SSE2-LABEL: load_zext_8i8_to_8i32:
1058 ; SSE2: # %bb.0: # %entry
1059 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1060 ; SSE2-NEXT: pxor %xmm2, %xmm2
1061 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1062 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1063 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1064 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1067 ; SSSE3-LABEL: load_zext_8i8_to_8i32:
1068 ; SSSE3: # %bb.0: # %entry
1069 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1070 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1071 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1072 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1073 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1074 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1077 ; SSE41-LABEL: load_zext_8i8_to_8i32:
1078 ; SSE41: # %bb.0: # %entry
1079 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1080 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1083 ; AVX1-LABEL: load_zext_8i8_to_8i32:
1084 ; AVX1: # %bb.0: # %entry
1085 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1086 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1087 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1090 ; AVX2-LABEL: load_zext_8i8_to_8i32:
1091 ; AVX2: # %bb.0: # %entry
1092 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1095 ; AVX512-LABEL: load_zext_8i8_to_8i32:
1096 ; AVX512: # %bb.0: # %entry
1097 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1100 %X = load <8 x i8>, <8 x i8>* %ptr
1101 %Y = zext <8 x i8> %X to <8 x i32>
1105 define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) {
1106 ; SSE2-LABEL: load_zext_16i8_to_8i32:
1107 ; SSE2: # %bb.0: # %entry
1108 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1109 ; SSE2-NEXT: pxor %xmm2, %xmm2
1110 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1111 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1112 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1113 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1116 ; SSSE3-LABEL: load_zext_16i8_to_8i32:
1117 ; SSSE3: # %bb.0: # %entry
1118 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1119 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1120 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1121 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1122 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1123 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1126 ; SSE41-LABEL: load_zext_16i8_to_8i32:
1127 ; SSE41: # %bb.0: # %entry
1128 ; SSE41-NEXT: movdqa (%rdi), %xmm1
1129 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1130 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
1131 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1134 ; AVX1-LABEL: load_zext_16i8_to_8i32:
1135 ; AVX1: # %bb.0: # %entry
1136 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
1137 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1138 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1139 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1140 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1143 ; AVX2-LABEL: load_zext_16i8_to_8i32:
1144 ; AVX2: # %bb.0: # %entry
1145 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1148 ; AVX512-LABEL: load_zext_16i8_to_8i32:
1149 ; AVX512: # %bb.0: # %entry
1150 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1153 %X = load <16 x i8>, <16 x i8>* %ptr
1154 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1155 %Z = zext <8 x i8> %Y to <8 x i32>
1159 define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) {
1160 ; SSE2-LABEL: load_zext_8i8_to_8i64:
1161 ; SSE2: # %bb.0: # %entry
1162 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1163 ; SSE2-NEXT: pxor %xmm4, %xmm4
1164 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
1165 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
1166 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1167 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1168 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
1169 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1170 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1171 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
1172 ; SSE2-NEXT: movdqa %xmm3, %xmm2
1173 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
1174 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
1177 ; SSSE3-LABEL: load_zext_8i8_to_8i64:
1178 ; SSSE3: # %bb.0: # %entry
1179 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1180 ; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128]
1181 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1182 ; SSSE3-NEXT: pshufb %xmm4, %xmm0
1183 ; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128]
1184 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
1185 ; SSSE3-NEXT: pshufb %xmm5, %xmm1
1186 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
1187 ; SSSE3-NEXT: pshufb %xmm4, %xmm2
1188 ; SSSE3-NEXT: pshufb %xmm5, %xmm3
1191 ; SSE41-LABEL: load_zext_8i8_to_8i64:
1192 ; SSE41: # %bb.0: # %entry
1193 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1194 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1195 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1196 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1199 ; AVX1-LABEL: load_zext_8i8_to_8i64:
1200 ; AVX1: # %bb.0: # %entry
1201 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1202 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1203 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1204 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1205 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
1206 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
1209 ; AVX2-LABEL: load_zext_8i8_to_8i64:
1210 ; AVX2: # %bb.0: # %entry
1211 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1212 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1215 ; AVX512-LABEL: load_zext_8i8_to_8i64:
1216 ; AVX512: # %bb.0: # %entry
1217 ; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
1220 %X = load <8 x i8>, <8 x i8>* %ptr
1221 %Y = zext <8 x i8> %X to <8 x i64>
1225 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
1226 ; SSE2-LABEL: load_zext_16i8_to_16i16:
1227 ; SSE2: # %bb.0: # %entry
1228 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1229 ; SSE2-NEXT: pxor %xmm2, %xmm2
1230 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1231 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1232 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1235 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
1236 ; SSSE3: # %bb.0: # %entry
1237 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1238 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1239 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1240 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1241 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1244 ; SSE41-LABEL: load_zext_16i8_to_16i16:
1245 ; SSE41: # %bb.0: # %entry
1246 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1247 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1250 ; AVX1-LABEL: load_zext_16i8_to_16i16:
1251 ; AVX1: # %bb.0: # %entry
1252 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1253 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1254 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1257 ; AVX2-LABEL: load_zext_16i8_to_16i16:
1258 ; AVX2: # %bb.0: # %entry
1259 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1262 ; AVX512-LABEL: load_zext_16i8_to_16i16:
1263 ; AVX512: # %bb.0: # %entry
1264 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1267 %X = load <16 x i8>, <16 x i8>* %ptr
1268 %Y = zext <16 x i8> %X to <16 x i16>
1272 define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
1273 ; SSE2-LABEL: load_zext_2i16_to_2i64:
1274 ; SSE2: # %bb.0: # %entry
1275 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1276 ; SSE2-NEXT: pxor %xmm1, %xmm1
1277 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1278 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1281 ; SSSE3-LABEL: load_zext_2i16_to_2i64:
1282 ; SSSE3: # %bb.0: # %entry
1283 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1284 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1285 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1286 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1289 ; SSE41-LABEL: load_zext_2i16_to_2i64:
1290 ; SSE41: # %bb.0: # %entry
1291 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1294 ; AVX-LABEL: load_zext_2i16_to_2i64:
1295 ; AVX: # %bb.0: # %entry
1296 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1299 %X = load <2 x i16>, <2 x i16>* %ptr
1300 %Y = zext <2 x i16> %X to <2 x i64>
1304 define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
1305 ; SSE2-LABEL: load_zext_4i16_to_4i32:
1306 ; SSE2: # %bb.0: # %entry
1307 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1308 ; SSE2-NEXT: pxor %xmm1, %xmm1
1309 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1312 ; SSSE3-LABEL: load_zext_4i16_to_4i32:
1313 ; SSSE3: # %bb.0: # %entry
1314 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1315 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1316 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1319 ; SSE41-LABEL: load_zext_4i16_to_4i32:
1320 ; SSE41: # %bb.0: # %entry
1321 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1324 ; AVX-LABEL: load_zext_4i16_to_4i32:
1325 ; AVX: # %bb.0: # %entry
1326 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1329 %X = load <4 x i16>, <4 x i16>* %ptr
1330 %Y = zext <4 x i16> %X to <4 x i32>
1334 define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
1335 ; SSE2-LABEL: load_zext_4i16_to_4i64:
1336 ; SSE2: # %bb.0: # %entry
1337 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1338 ; SSE2-NEXT: pxor %xmm2, %xmm2
1339 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1340 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1341 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1342 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1345 ; SSSE3-LABEL: load_zext_4i16_to_4i64:
1346 ; SSSE3: # %bb.0: # %entry
1347 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1348 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1349 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1350 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1351 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1352 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1355 ; SSE41-LABEL: load_zext_4i16_to_4i64:
1356 ; SSE41: # %bb.0: # %entry
1357 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1358 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1361 ; AVX1-LABEL: load_zext_4i16_to_4i64:
1362 ; AVX1: # %bb.0: # %entry
1363 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1364 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1365 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1368 ; AVX2-LABEL: load_zext_4i16_to_4i64:
1369 ; AVX2: # %bb.0: # %entry
1370 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1373 ; AVX512-LABEL: load_zext_4i16_to_4i64:
1374 ; AVX512: # %bb.0: # %entry
1375 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1378 %X = load <4 x i16>, <4 x i16>* %ptr
1379 %Y = zext <4 x i16> %X to <4 x i64>
1383 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
1384 ; SSE2-LABEL: load_zext_8i16_to_8i32:
1385 ; SSE2: # %bb.0: # %entry
1386 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1387 ; SSE2-NEXT: pxor %xmm2, %xmm2
1388 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1389 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1390 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1393 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
1394 ; SSSE3: # %bb.0: # %entry
1395 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1396 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1397 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1398 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1399 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1402 ; SSE41-LABEL: load_zext_8i16_to_8i32:
1403 ; SSE41: # %bb.0: # %entry
1404 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1405 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1408 ; AVX1-LABEL: load_zext_8i16_to_8i32:
1409 ; AVX1: # %bb.0: # %entry
1410 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1411 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1412 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1415 ; AVX2-LABEL: load_zext_8i16_to_8i32:
1416 ; AVX2: # %bb.0: # %entry
1417 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1420 ; AVX512-LABEL: load_zext_8i16_to_8i32:
1421 ; AVX512: # %bb.0: # %entry
1422 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1425 %X = load <8 x i16>, <8 x i16>* %ptr
1426 %Y = zext <8 x i16> %X to <8 x i32>
1430 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
1431 ; SSE2-LABEL: load_zext_2i32_to_2i64:
1432 ; SSE2: # %bb.0: # %entry
1433 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1434 ; SSE2-NEXT: xorps %xmm1, %xmm1
1435 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1438 ; SSSE3-LABEL: load_zext_2i32_to_2i64:
1439 ; SSSE3: # %bb.0: # %entry
1440 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1441 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1442 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1445 ; SSE41-LABEL: load_zext_2i32_to_2i64:
1446 ; SSE41: # %bb.0: # %entry
1447 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1450 ; AVX-LABEL: load_zext_2i32_to_2i64:
1451 ; AVX: # %bb.0: # %entry
1452 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1455 %X = load <2 x i32>, <2 x i32>* %ptr
1456 %Y = zext <2 x i32> %X to <2 x i64>
1460 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
1461 ; SSE2-LABEL: load_zext_4i32_to_4i64:
1462 ; SSE2: # %bb.0: # %entry
1463 ; SSE2-NEXT: movaps (%rdi), %xmm1
1464 ; SSE2-NEXT: xorps %xmm2, %xmm2
1465 ; SSE2-NEXT: movaps %xmm1, %xmm0
1466 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1467 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1470 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
1471 ; SSSE3: # %bb.0: # %entry
1472 ; SSSE3-NEXT: movaps (%rdi), %xmm1
1473 ; SSSE3-NEXT: xorps %xmm2, %xmm2
1474 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1475 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1476 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1479 ; SSE41-LABEL: load_zext_4i32_to_4i64:
1480 ; SSE41: # %bb.0: # %entry
1481 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1482 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1485 ; AVX1-LABEL: load_zext_4i32_to_4i64:
1486 ; AVX1: # %bb.0: # %entry
1487 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1488 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1489 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1492 ; AVX2-LABEL: load_zext_4i32_to_4i64:
1493 ; AVX2: # %bb.0: # %entry
1494 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1497 ; AVX512-LABEL: load_zext_4i32_to_4i64:
1498 ; AVX512: # %bb.0: # %entry
1499 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1502 %X = load <4 x i32>, <4 x i32>* %ptr
1503 %Y = zext <4 x i32> %X to <4 x i64>
1507 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
1508 ; SSE2-LABEL: zext_8i8_to_8i32:
1509 ; SSE2: # %bb.0: # %entry
1510 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1511 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
1512 ; SSE2-NEXT: pxor %xmm2, %xmm2
1513 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1514 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1515 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1518 ; SSSE3-LABEL: zext_8i8_to_8i32:
1519 ; SSSE3: # %bb.0: # %entry
1520 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1521 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
1522 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1523 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1524 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1525 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1528 ; SSE41-LABEL: zext_8i8_to_8i32:
1529 ; SSE41: # %bb.0: # %entry
1530 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
1531 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1532 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1533 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1534 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1537 ; AVX1-LABEL: zext_8i8_to_8i32:
1538 ; AVX1: # %bb.0: # %entry
1539 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1540 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1541 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1542 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1543 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1546 ; AVX2-LABEL: zext_8i8_to_8i32:
1547 ; AVX2: # %bb.0: # %entry
1548 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1549 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1552 ; AVX512-LABEL: zext_8i8_to_8i32:
1553 ; AVX512: # %bb.0: # %entry
1554 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1555 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1558 %t = zext <8 x i8> %z to <8 x i32>
1562 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
1563 ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
1564 ; SSE2: # %bb.0: # %entry
1565 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1566 ; SSE2-NEXT: pxor %xmm2, %xmm2
1567 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1568 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1571 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
1572 ; SSSE3: # %bb.0: # %entry
1573 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1574 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1575 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1576 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1579 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
1580 ; SSE41: # %bb.0: # %entry
1581 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1582 ; SSE41-NEXT: pxor %xmm2, %xmm2
1583 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1584 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1587 ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
1588 ; AVX1: # %bb.0: # %entry
1589 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1590 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1591 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1592 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1595 ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
1596 ; AVX2: # %bb.0: # %entry
1597 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1600 ; AVX512-LABEL: shuf_zext_8i16_to_8i32:
1601 ; AVX512: # %bb.0: # %entry
1602 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1605 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
1606 %Z = bitcast <16 x i16> %B to <8 x i32>
1610 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1611 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
1612 ; SSE2: # %bb.0: # %entry
1613 ; SSE2-NEXT: movaps %xmm0, %xmm1
1614 ; SSE2-NEXT: xorps %xmm2, %xmm2
1615 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1616 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1619 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
1620 ; SSSE3: # %bb.0: # %entry
1621 ; SSSE3-NEXT: movaps %xmm0, %xmm1
1622 ; SSSE3-NEXT: xorps %xmm2, %xmm2
1623 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1624 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1627 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1628 ; SSE41: # %bb.0: # %entry
1629 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1630 ; SSE41-NEXT: pxor %xmm2, %xmm2
1631 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1632 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1635 ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
1636 ; AVX1: # %bb.0: # %entry
1637 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1638 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1639 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1640 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1643 ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
1644 ; AVX2: # %bb.0: # %entry
1645 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1648 ; AVX512-LABEL: shuf_zext_4i32_to_4i64:
1649 ; AVX512: # %bb.0: # %entry
1650 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1653 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
1654 %Z = bitcast <8 x i32> %B to <4 x i64>
1658 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
1659 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
1660 ; SSE2: # %bb.0: # %entry
1661 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1662 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
1663 ; SSE2-NEXT: pxor %xmm2, %xmm2
1664 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1665 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1666 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1669 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
1670 ; SSSE3: # %bb.0: # %entry
1671 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1672 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
1673 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1674 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1675 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1676 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1679 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1680 ; SSE41: # %bb.0: # %entry
1681 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1682 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1683 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1684 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1685 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1688 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
1689 ; AVX1: # %bb.0: # %entry
1690 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1691 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1692 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1693 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1694 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1697 ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
1698 ; AVX2: # %bb.0: # %entry
1699 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1700 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1703 ; AVX512-LABEL: shuf_zext_8i8_to_8i32:
1704 ; AVX512: # %bb.0: # %entry
1705 ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1706 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1709 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
1710 %Z = bitcast <32 x i8> %B to <8 x i32>
1714 define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
1715 ; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
1716 ; SSE2: # %bb.0: # %entry
1717 ; SSE2-NEXT: pxor %xmm1, %xmm1
1718 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1719 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1720 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1723 ; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
1724 ; SSSE3: # %bb.0: # %entry
1725 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1728 ; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
1729 ; SSE41: # %bb.0: # %entry
1730 ; SSE41-NEXT: psrlq $48, %xmm0
1731 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1734 ; AVX1-LABEL: shuf_zext_16i8_to_2i64_offset6:
1735 ; AVX1: # %bb.0: # %entry
1736 ; AVX1-NEXT: vpsrlq $48, %xmm0, %xmm0
1737 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1740 ; AVX2-SLOW-LABEL: shuf_zext_16i8_to_2i64_offset6:
1741 ; AVX2-SLOW: # %bb.0: # %entry
1742 ; AVX2-SLOW-NEXT: vpsrlq $48, %xmm0, %xmm0
1743 ; AVX2-SLOW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1744 ; AVX2-SLOW-NEXT: retq
1746 ; AVX2-FAST-LABEL: shuf_zext_16i8_to_2i64_offset6:
1747 ; AVX2-FAST: # %bb.0: # %entry
1748 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1749 ; AVX2-FAST-NEXT: retq
1751 ; AVX512F-LABEL: shuf_zext_16i8_to_2i64_offset6:
1752 ; AVX512F: # %bb.0: # %entry
1753 ; AVX512F-NEXT: vpsrlq $48, %xmm0, %xmm0
1754 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1755 ; AVX512F-NEXT: retq
1757 ; AVX512BW-LABEL: shuf_zext_16i8_to_2i64_offset6:
1758 ; AVX512BW: # %bb.0: # %entry
1759 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1760 ; AVX512BW-NEXT: retq
1762 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1763 %Z = bitcast <16 x i8> %B to <2 x i64>
1767 define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
1768 ; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1769 ; SSE2: # %bb.0: # %entry
1770 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1771 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
1772 ; SSE2-NEXT: pxor %xmm2, %xmm2
1773 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1774 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1775 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1776 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1777 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1778 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1781 ; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
1782 ; SSSE3: # %bb.0: # %entry
1783 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1784 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
1785 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
1788 ; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
1789 ; SSE41: # %bb.0: # %entry
1790 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1791 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1792 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1793 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1794 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1795 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1798 ; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
1799 ; AVX1: # %bb.0: # %entry
1800 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1801 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1802 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1803 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1804 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1807 ; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1808 ; AVX2: # %bb.0: # %entry
1809 ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1810 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1813 ; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11:
1814 ; AVX512: # %bb.0: # %entry
1815 ; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1816 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1819 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1820 %Z = bitcast <32 x i8> %B to <4 x i64>
1824 define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
1825 ; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
1826 ; SSE2: # %bb.0: # %entry
1827 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1828 ; SSE2-NEXT: pxor %xmm1, %xmm1
1829 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1830 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1833 ; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
1834 ; SSSE3: # %bb.0: # %entry
1835 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1838 ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
1839 ; SSE41: # %bb.0: # %entry
1840 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1841 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1844 ; AVX1-LABEL: shuf_zext_8i16_to_2i64_offset6:
1845 ; AVX1: # %bb.0: # %entry
1846 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1847 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1850 ; AVX2-SLOW-LABEL: shuf_zext_8i16_to_2i64_offset6:
1851 ; AVX2-SLOW: # %bb.0: # %entry
1852 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1853 ; AVX2-SLOW-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1854 ; AVX2-SLOW-NEXT: retq
1856 ; AVX2-FAST-LABEL: shuf_zext_8i16_to_2i64_offset6:
1857 ; AVX2-FAST: # %bb.0: # %entry
1858 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1859 ; AVX2-FAST-NEXT: retq
1861 ; AVX512F-LABEL: shuf_zext_8i16_to_2i64_offset6:
1862 ; AVX512F: # %bb.0: # %entry
1863 ; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1864 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1865 ; AVX512F-NEXT: retq
1867 ; AVX512BW-LABEL: shuf_zext_8i16_to_2i64_offset6:
1868 ; AVX512BW: # %bb.0: # %entry
1869 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1870 ; AVX512BW-NEXT: retq
1872 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
1873 %Z = bitcast <8 x i16> %B to <2 x i64>
1877 define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
1878 ; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1879 ; SSE2: # %bb.0: # %entry
1880 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1881 ; SSE2-NEXT: pxor %xmm2, %xmm2
1882 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1883 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1884 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1885 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1888 ; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
1889 ; SSSE3: # %bb.0: # %entry
1890 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1891 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1892 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1893 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1894 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1895 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1898 ; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
1899 ; SSE41: # %bb.0: # %entry
1900 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1901 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1902 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1903 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1904 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1907 ; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
1908 ; AVX1: # %bb.0: # %entry
1909 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1910 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1911 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1912 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1913 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1916 ; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1917 ; AVX2: # %bb.0: # %entry
1918 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1919 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1922 ; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2:
1923 ; AVX512: # %bb.0: # %entry
1924 ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1925 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1928 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
1929 %Z = bitcast <16 x i16> %B to <4 x i64>
1933 define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
1934 ; SSE2-LABEL: shuf_zext_8i16_to_4i32_offset1:
1935 ; SSE2: # %bb.0: # %entry
1936 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1937 ; SSE2-NEXT: pxor %xmm1, %xmm1
1938 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1941 ; SSSE3-LABEL: shuf_zext_8i16_to_4i32_offset1:
1942 ; SSSE3: # %bb.0: # %entry
1943 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1944 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1945 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1948 ; SSE41-LABEL: shuf_zext_8i16_to_4i32_offset1:
1949 ; SSE41: # %bb.0: # %entry
1950 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1951 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1954 ; AVX1-LABEL: shuf_zext_8i16_to_4i32_offset1:
1955 ; AVX1: # %bb.0: # %entry
1956 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1957 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1960 ; AVX2-SLOW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1961 ; AVX2-SLOW: # %bb.0: # %entry
1962 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1963 ; AVX2-SLOW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1964 ; AVX2-SLOW-NEXT: retq
1966 ; AVX2-FAST-LABEL: shuf_zext_8i16_to_4i32_offset1:
1967 ; AVX2-FAST: # %bb.0: # %entry
1968 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1969 ; AVX2-FAST-NEXT: retq
1971 ; AVX512F-LABEL: shuf_zext_8i16_to_4i32_offset1:
1972 ; AVX512F: # %bb.0: # %entry
1973 ; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1974 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1975 ; AVX512F-NEXT: retq
1977 ; AVX512BW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1978 ; AVX512BW: # %bb.0: # %entry
1979 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1980 ; AVX512BW-NEXT: retq
1982 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
1983 %Z = bitcast <8 x i16> %B to <4 x i32>
1987 define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
1988 ; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1989 ; SSE2: # %bb.0: # %entry
1990 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1991 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1992 ; SSE2-NEXT: pxor %xmm2, %xmm2
1993 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1994 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1997 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
1998 ; SSSE3: # %bb.0: # %entry
1999 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2000 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2001 ; SSSE3-NEXT: pxor %xmm2, %xmm2
2002 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2003 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2006 ; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
2007 ; SSE41: # %bb.0: # %entry
2008 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2009 ; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2010 ; SSE41-NEXT: pxor %xmm2, %xmm2
2011 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2012 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2015 ; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
2016 ; AVX1: # %bb.0: # %entry
2017 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2018 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2019 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2020 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2021 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2024 ; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
2025 ; AVX2: # %bb.0: # %entry
2026 ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2027 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2030 ; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3:
2031 ; AVX512: # %bb.0: # %entry
2032 ; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2033 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2036 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
2037 %Z = bitcast <16 x i16> %B to <8 x i32>
2041 define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
2042 ; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2043 ; SSE2: # %bb.0: # %entry
2044 ; SSE2-NEXT: pxor %xmm2, %xmm2
2045 ; SSE2-NEXT: movdqa %xmm1, %xmm0
2046 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2047 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2050 ; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
2051 ; SSSE3: # %bb.0: # %entry
2052 ; SSSE3-NEXT: pxor %xmm2, %xmm2
2053 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2054 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2055 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2058 ; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
2059 ; SSE41: # %bb.0: # %entry
2060 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
2061 ; SSE41-NEXT: pxor %xmm2, %xmm2
2062 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
2063 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
2064 ; SSE41-NEXT: movdqa %xmm2, %xmm1
2067 ; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
2068 ; AVX1: # %bb.0: # %entry
2069 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2070 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
2071 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2072 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
2073 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2074 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2077 ; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2078 ; AVX2: # %bb.0: # %entry
2079 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2080 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2083 ; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8:
2084 ; AVX512: # %bb.0: # %entry
2085 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
2086 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2089 %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
2090 %Z = bitcast <16 x i16> %B to <8 x i32>
2094 define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
2095 ; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
2096 ; SSE: # %bb.0: # %entry
2097 ; SSE-NEXT: xorps %xmm1, %xmm1
2098 ; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2101 ; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
2102 ; AVX: # %bb.0: # %entry
2103 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
2104 ; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2107 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
2108 %Z = bitcast <4 x i32> %B to <2 x i64>
2112 define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
2113 ; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2114 ; SSE2: # %bb.0: # %entry
2115 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2116 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2117 ; SSE2-NEXT: pand %xmm1, %xmm0
2118 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2121 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
2122 ; SSSE3: # %bb.0: # %entry
2123 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2124 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2125 ; SSSE3-NEXT: pand %xmm1, %xmm0
2126 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2129 ; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
2130 ; SSE41: # %bb.0: # %entry
2131 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2132 ; SSE41-NEXT: pxor %xmm0, %xmm0
2133 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
2134 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2137 ; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
2138 ; AVX1: # %bb.0: # %entry
2139 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2140 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2141 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2142 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2145 ; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2146 ; AVX2: # %bb.0: # %entry
2147 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2148 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2151 ; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1:
2152 ; AVX512: # %bb.0: # %entry
2153 ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2154 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2157 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
2158 %Z = bitcast <8 x i32> %B to <4 x i64>
2162 define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) {
2163 ; SSE2-LABEL: zext_32i8_to_32i32:
2165 ; SSE2-NEXT: movq %rdi, %rax
2166 ; SSE2-NEXT: pxor %xmm2, %xmm2
2167 ; SSE2-NEXT: movdqa %xmm0, %xmm3
2168 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2169 ; SSE2-NEXT: movdqa %xmm3, %xmm8
2170 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2171 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2172 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2173 ; SSE2-NEXT: movdqa %xmm0, %xmm5
2174 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2175 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2176 ; SSE2-NEXT: movdqa %xmm1, %xmm6
2177 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2178 ; SSE2-NEXT: movdqa %xmm6, %xmm7
2179 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2180 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2181 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2182 ; SSE2-NEXT: movdqa %xmm1, %xmm4
2183 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2184 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2185 ; SSE2-NEXT: movdqa %xmm1, 112(%rdi)
2186 ; SSE2-NEXT: movdqa %xmm4, 96(%rdi)
2187 ; SSE2-NEXT: movdqa %xmm6, 80(%rdi)
2188 ; SSE2-NEXT: movdqa %xmm7, 64(%rdi)
2189 ; SSE2-NEXT: movdqa %xmm0, 48(%rdi)
2190 ; SSE2-NEXT: movdqa %xmm5, 32(%rdi)
2191 ; SSE2-NEXT: movdqa %xmm3, 16(%rdi)
2192 ; SSE2-NEXT: movdqa %xmm8, (%rdi)
2195 ; SSSE3-LABEL: zext_32i8_to_32i32:
2197 ; SSSE3-NEXT: movq %rdi, %rax
2198 ; SSSE3-NEXT: pxor %xmm2, %xmm2
2199 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
2200 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2201 ; SSSE3-NEXT: movdqa %xmm3, %xmm8
2202 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2203 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2204 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2205 ; SSSE3-NEXT: movdqa %xmm0, %xmm5
2206 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2207 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2208 ; SSSE3-NEXT: movdqa %xmm1, %xmm6
2209 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2210 ; SSSE3-NEXT: movdqa %xmm6, %xmm7
2211 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2212 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2213 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2214 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
2215 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2216 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2217 ; SSSE3-NEXT: movdqa %xmm1, 112(%rdi)
2218 ; SSSE3-NEXT: movdqa %xmm4, 96(%rdi)
2219 ; SSSE3-NEXT: movdqa %xmm6, 80(%rdi)
2220 ; SSSE3-NEXT: movdqa %xmm7, 64(%rdi)
2221 ; SSSE3-NEXT: movdqa %xmm0, 48(%rdi)
2222 ; SSSE3-NEXT: movdqa %xmm5, 32(%rdi)
2223 ; SSSE3-NEXT: movdqa %xmm3, 16(%rdi)
2224 ; SSSE3-NEXT: movdqa %xmm8, (%rdi)
2227 ; SSE41-LABEL: zext_32i8_to_32i32:
2229 ; SSE41-NEXT: movq %rdi, %rax
2230 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2231 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
2232 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2233 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
2234 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
2235 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
2236 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2237 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2238 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,2,3]
2239 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
2240 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,0,1]
2241 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
2242 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
2243 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2244 ; SSE41-NEXT: movdqa %xmm1, 112(%rdi)
2245 ; SSE41-NEXT: movdqa %xmm7, 96(%rdi)
2246 ; SSE41-NEXT: movdqa %xmm6, 80(%rdi)
2247 ; SSE41-NEXT: movdqa %xmm5, 64(%rdi)
2248 ; SSE41-NEXT: movdqa %xmm0, 48(%rdi)
2249 ; SSE41-NEXT: movdqa %xmm4, 32(%rdi)
2250 ; SSE41-NEXT: movdqa %xmm3, 16(%rdi)
2251 ; SSE41-NEXT: movdqa %xmm2, (%rdi)
2254 ; AVX1-LABEL: zext_32i8_to_32i32:
2256 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2257 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
2258 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2259 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4
2260 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2261 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2262 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[1,1,2,3]
2263 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2264 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
2265 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
2266 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2267 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,0,1]
2268 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2269 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
2270 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
2271 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2272 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,3,0,1]
2273 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2274 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3
2275 ; AVX1-NEXT: vmovaps %ymm4, %ymm0
2278 ; AVX2-LABEL: zext_32i8_to_32i32:
2280 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2281 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2282 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
2283 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
2284 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
2285 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,2,3]
2286 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2287 ; AVX2-NEXT: vmovdqa %ymm4, %ymm0
2290 ; AVX512-LABEL: zext_32i8_to_32i32:
2292 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2293 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
2294 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2295 ; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
2297 %res = zext <32 x i8>%x to <32 x i32>
2301 define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) {
2302 ; SSE2-LABEL: zext_2i8_to_2i32:
2304 ; SSE2-NEXT: movzwl (%rdi), %eax
2305 ; SSE2-NEXT: movd %eax, %xmm0
2306 ; SSE2-NEXT: pxor %xmm1, %xmm1
2307 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2308 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2309 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
2310 ; SSE2-NEXT: paddq %xmm0, %xmm0
2313 ; SSSE3-LABEL: zext_2i8_to_2i32:
2315 ; SSSE3-NEXT: movzwl (%rdi), %eax
2316 ; SSSE3-NEXT: movd %eax, %xmm0
2317 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[3],zero,zero,zero
2318 ; SSSE3-NEXT: paddq %xmm0, %xmm0
2321 ; SSE41-LABEL: zext_2i8_to_2i32:
2323 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
2324 ; SSE41-NEXT: paddq %xmm0, %xmm0
2327 ; AVX-LABEL: zext_2i8_to_2i32:
2329 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
2330 ; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
2332 %x = load <2 x i8>, <2 x i8>* %addr, align 1
2333 %y = zext <2 x i8> %x to <2 x i32>
2334 %z = add <2 x i32>%y, %y
2338 define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) {
2339 ; SSE2-LABEL: zext_4i17_to_4i32:
2341 ; SSE2-NEXT: movq (%rdi), %rax
2342 ; SSE2-NEXT: movd %eax, %xmm0
2343 ; SSE2-NEXT: movq %rax, %rcx
2344 ; SSE2-NEXT: shrq $17, %rcx
2345 ; SSE2-NEXT: movd %ecx, %xmm1
2346 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2347 ; SSE2-NEXT: movl 8(%rdi), %ecx
2348 ; SSE2-NEXT: shll $13, %ecx
2349 ; SSE2-NEXT: movq %rax, %rdx
2350 ; SSE2-NEXT: shrq $51, %rdx
2351 ; SSE2-NEXT: orl %ecx, %edx
2352 ; SSE2-NEXT: movd %edx, %xmm1
2353 ; SSE2-NEXT: shrq $34, %rax
2354 ; SSE2-NEXT: movd %eax, %xmm2
2355 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2356 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2357 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2360 ; SSSE3-LABEL: zext_4i17_to_4i32:
2362 ; SSSE3-NEXT: movq (%rdi), %rax
2363 ; SSSE3-NEXT: movd %eax, %xmm0
2364 ; SSSE3-NEXT: movq %rax, %rcx
2365 ; SSSE3-NEXT: shrq $17, %rcx
2366 ; SSSE3-NEXT: movd %ecx, %xmm1
2367 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2368 ; SSSE3-NEXT: movl 8(%rdi), %ecx
2369 ; SSSE3-NEXT: shll $13, %ecx
2370 ; SSSE3-NEXT: movq %rax, %rdx
2371 ; SSSE3-NEXT: shrq $51, %rdx
2372 ; SSSE3-NEXT: orl %ecx, %edx
2373 ; SSSE3-NEXT: movd %edx, %xmm1
2374 ; SSSE3-NEXT: shrq $34, %rax
2375 ; SSSE3-NEXT: movd %eax, %xmm2
2376 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2377 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2378 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
2381 ; SSE41-LABEL: zext_4i17_to_4i32:
2383 ; SSE41-NEXT: movl 8(%rdi), %eax
2384 ; SSE41-NEXT: shll $13, %eax
2385 ; SSE41-NEXT: movq (%rdi), %rcx
2386 ; SSE41-NEXT: movq %rcx, %rdx
2387 ; SSE41-NEXT: shrq $51, %rdx
2388 ; SSE41-NEXT: orl %eax, %edx
2389 ; SSE41-NEXT: movq %rcx, %rax
2390 ; SSE41-NEXT: shrq $17, %rax
2391 ; SSE41-NEXT: movd %ecx, %xmm0
2392 ; SSE41-NEXT: pinsrd $1, %eax, %xmm0
2393 ; SSE41-NEXT: shrq $34, %rcx
2394 ; SSE41-NEXT: pinsrd $2, %ecx, %xmm0
2395 ; SSE41-NEXT: pinsrd $3, %edx, %xmm0
2396 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
2399 ; AVX1-LABEL: zext_4i17_to_4i32:
2401 ; AVX1-NEXT: movl 8(%rdi), %eax
2402 ; AVX1-NEXT: shll $13, %eax
2403 ; AVX1-NEXT: movq (%rdi), %rcx
2404 ; AVX1-NEXT: movq %rcx, %rdx
2405 ; AVX1-NEXT: shrq $51, %rdx
2406 ; AVX1-NEXT: orl %eax, %edx
2407 ; AVX1-NEXT: movq %rcx, %rax
2408 ; AVX1-NEXT: shrq $17, %rax
2409 ; AVX1-NEXT: vmovd %ecx, %xmm0
2410 ; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
2411 ; AVX1-NEXT: shrq $34, %rcx
2412 ; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
2413 ; AVX1-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
2414 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
2417 ; AVX2-LABEL: zext_4i17_to_4i32:
2419 ; AVX2-NEXT: movl 8(%rdi), %eax
2420 ; AVX2-NEXT: shll $13, %eax
2421 ; AVX2-NEXT: movq (%rdi), %rcx
2422 ; AVX2-NEXT: movq %rcx, %rdx
2423 ; AVX2-NEXT: shrq $51, %rdx
2424 ; AVX2-NEXT: orl %eax, %edx
2425 ; AVX2-NEXT: movq %rcx, %rax
2426 ; AVX2-NEXT: shrq $17, %rax
2427 ; AVX2-NEXT: vmovd %ecx, %xmm0
2428 ; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
2429 ; AVX2-NEXT: shrq $34, %rcx
2430 ; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
2431 ; AVX2-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
2432 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
2433 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2436 ; AVX512-LABEL: zext_4i17_to_4i32:
2438 ; AVX512-NEXT: movl 8(%rdi), %eax
2439 ; AVX512-NEXT: shll $13, %eax
2440 ; AVX512-NEXT: movq (%rdi), %rcx
2441 ; AVX512-NEXT: movq %rcx, %rdx
2442 ; AVX512-NEXT: shrq $51, %rdx
2443 ; AVX512-NEXT: orl %eax, %edx
2444 ; AVX512-NEXT: movq %rcx, %rax
2445 ; AVX512-NEXT: shrq $17, %rax
2446 ; AVX512-NEXT: vmovd %ecx, %xmm0
2447 ; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
2448 ; AVX512-NEXT: shrq $34, %rcx
2449 ; AVX512-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
2450 ; AVX512-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
2451 ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
2452 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
2454 %a = load <4 x i17>, <4 x i17>* %ptr
2455 %b = zext <4 x i17> %a to <4 x i32>
2459 define <8 x i64> @zext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp {
2460 ; SSE2-LABEL: zext_8i6_to_8i64:
2461 ; SSE2: # %bb.0: # %entry
2462 ; SSE2-NEXT: movd %edi, %xmm0
2463 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2464 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2465 ; SSE2-NEXT: paddw {{.*}}(%rip), %xmm3
2466 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3]
2467 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
2468 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [63,63]
2469 ; SSE2-NEXT: pand %xmm4, %xmm0
2470 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3]
2471 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7]
2472 ; SSE2-NEXT: pand %xmm4, %xmm1
2473 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3]
2474 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7]
2475 ; SSE2-NEXT: pand %xmm4, %xmm2
2476 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3]
2477 ; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7]
2478 ; SSE2-NEXT: pand %xmm4, %xmm3
2481 ; SSSE3-LABEL: zext_8i6_to_8i64:
2482 ; SSSE3: # %bb.0: # %entry
2483 ; SSSE3-NEXT: movd %edi, %xmm0
2484 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2485 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2486 ; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm3
2487 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3]
2488 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
2489 ; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [63,63]
2490 ; SSSE3-NEXT: pand %xmm4, %xmm0
2491 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3]
2492 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7]
2493 ; SSSE3-NEXT: pand %xmm4, %xmm1
2494 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3]
2495 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7]
2496 ; SSSE3-NEXT: pand %xmm4, %xmm2
2497 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3]
2498 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7]
2499 ; SSSE3-NEXT: pand %xmm4, %xmm3
2502 ; SSE41-LABEL: zext_8i6_to_8i64:
2503 ; SSE41: # %bb.0: # %entry
2504 ; SSE41-NEXT: movd %edi, %xmm0
2505 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2506 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2507 ; SSE41-NEXT: paddw {{.*}}(%rip), %xmm3
2508 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
2509 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [63,63]
2510 ; SSE41-NEXT: pand %xmm4, %xmm0
2511 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,2,3]
2512 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2513 ; SSE41-NEXT: pand %xmm4, %xmm1
2514 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
2515 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2516 ; SSE41-NEXT: pand %xmm4, %xmm2
2517 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,2,3]
2518 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
2519 ; SSE41-NEXT: pand %xmm4, %xmm3
2522 ; AVX1-LABEL: zext_8i6_to_8i64:
2523 ; AVX1: # %bb.0: # %entry
2524 ; AVX1-NEXT: vmovd %edi, %xmm0
2525 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2526 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2527 ; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
2528 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1
2529 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2530 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
2531 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2532 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2533 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
2534 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2535 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
2536 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2537 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2540 ; AVX2-LABEL: zext_8i6_to_8i64:
2541 ; AVX2: # %bb.0: # %entry
2542 ; AVX2-NEXT: vmovd %edi, %xmm0
2543 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2544 ; AVX2-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
2545 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1
2546 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2547 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2548 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2551 ; AVX512-LABEL: zext_8i6_to_8i64:
2552 ; AVX512: # %bb.0: # %entry
2553 ; AVX512-NEXT: vmovd %edi, %xmm0
2554 ; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0
2555 ; AVX512-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
2556 ; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2557 ; AVX512-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
2560 %a = trunc i32 %x to i6
2561 %b = insertelement <8 x i6> undef, i6 %a, i32 0
2562 %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer
2563 %d = add <8 x i6> %c, <i6 0, i6 1, i6 2, i6 3, i6 4, i6 5, i6 6, i6 7>
2564 %e = zext <8 x i6> %d to <8 x i64>
2568 define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
2569 ; SSE2-LABEL: splatshuf_zext_v4i64:
2571 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2572 ; SSE2-NEXT: pxor %xmm1, %xmm1
2573 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2574 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2577 ; SSSE3-LABEL: splatshuf_zext_v4i64:
2579 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2580 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2581 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2582 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2585 ; SSE41-LABEL: splatshuf_zext_v4i64:
2587 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2588 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2589 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2592 ; AVX1-LABEL: splatshuf_zext_v4i64:
2594 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2595 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2596 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2599 ; AVX2-LABEL: splatshuf_zext_v4i64:
2601 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
2602 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2605 ; AVX512-LABEL: splatshuf_zext_v4i64:
2607 ; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
2608 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2610 %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
2611 %ext = zext <4 x i32> %shuf to <4 x i64>
2615 define <8 x i32> @splatshuf_zext_v8i32_matching_undefs(<8 x i16> %x) {
2616 ; SSE2-LABEL: splatshuf_zext_v8i32_matching_undefs:
2618 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
2619 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
2620 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2621 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2624 ; SSSE3-LABEL: splatshuf_zext_v8i32_matching_undefs:
2626 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[u,u],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2627 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2630 ; SSE41-LABEL: splatshuf_zext_v8i32_matching_undefs:
2632 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,8,9,10,11,12,13,14,15]
2633 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2634 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2637 ; AVX1-LABEL: splatshuf_zext_v8i32_matching_undefs:
2639 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[6,7],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2640 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2643 ; AVX2-LABEL: splatshuf_zext_v8i32_matching_undefs:
2645 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
2646 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2649 ; AVX512-LABEL: splatshuf_zext_v8i32_matching_undefs:
2651 ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
2652 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2654 %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7>
2655 %ext = zext <8 x i16> %shuf to <8 x i32>
2659 define <8 x i32> @splatshuf_zext_v8i32_unmatched_undef(<8 x i16> %x) {
2660 ; SSE2-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2662 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
2663 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7]
2664 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2665 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7]
2666 ; SSE2-NEXT: pxor %xmm1, %xmm1
2667 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2668 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2671 ; SSSE3-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2673 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2674 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2677 ; SSE41-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2679 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,14,15,6,7,12,13,14,15]
2680 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2681 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2684 ; AVX1-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2686 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2687 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2688 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2689 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2690 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2693 ; AVX2-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2695 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2696 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2699 ; AVX512-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2701 ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2702 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2704 %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7>
2705 %ext = zext <8 x i16> %shuf to <8 x i32>
2709 define <16 x i16> @splatshuf_zext_v16i16(<16 x i8> %x) {
2710 ; SSE2-LABEL: splatshuf_zext_v16i16:
2712 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2713 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,7]
2714 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,3]
2715 ; SSE2-NEXT: pxor %xmm1, %xmm1
2716 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2717 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2720 ; SSSE3-LABEL: splatshuf_zext_v16i16:
2722 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero
2723 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2726 ; SSE41-LABEL: splatshuf_zext_v16i16:
2728 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15]
2729 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2730 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2733 ; AVX1-LABEL: splatshuf_zext_v16i16:
2735 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero
2736 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2739 ; AVX2-LABEL: splatshuf_zext_v16i16:
2741 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14]
2742 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2745 ; AVX512-LABEL: splatshuf_zext_v16i16:
2747 ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14]
2748 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2750 %shuf = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14>
2751 %ext = zext <16 x i8> %shuf to <16 x i16>