1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
11 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
12 ; SSE2-LABEL: zext_16i8_to_8i16:
13 ; SSE2: # %bb.0: # %entry
14 ; SSE2-NEXT: pxor %xmm1, %xmm1
15 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
18 ; SSSE3-LABEL: zext_16i8_to_8i16:
19 ; SSSE3: # %bb.0: # %entry
20 ; SSSE3-NEXT: pxor %xmm1, %xmm1
21 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
24 ; SSE41-LABEL: zext_16i8_to_8i16:
25 ; SSE41: # %bb.0: # %entry
26 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
29 ; AVX-LABEL: zext_16i8_to_8i16:
30 ; AVX: # %bb.0: # %entry
31 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
34 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
35 %C = zext <8 x i8> %B to <8 x i16>
40 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
41 ; SSE2-LABEL: zext_16i8_to_16i16:
42 ; SSE2: # %bb.0: # %entry
43 ; SSE2-NEXT: movdqa %xmm0, %xmm1
44 ; SSE2-NEXT: pxor %xmm2, %xmm2
45 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
46 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
49 ; SSSE3-LABEL: zext_16i8_to_16i16:
50 ; SSSE3: # %bb.0: # %entry
51 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
52 ; SSSE3-NEXT: pxor %xmm2, %xmm2
53 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
54 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
57 ; SSE41-LABEL: zext_16i8_to_16i16:
58 ; SSE41: # %bb.0: # %entry
59 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
60 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
61 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
62 ; SSE41-NEXT: movdqa %xmm2, %xmm0
65 ; AVX1-LABEL: zext_16i8_to_16i16:
66 ; AVX1: # %bb.0: # %entry
67 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
68 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
69 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
70 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
73 ; AVX2-LABEL: zext_16i8_to_16i16:
74 ; AVX2: # %bb.0: # %entry
75 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
78 ; AVX512-LABEL: zext_16i8_to_16i16:
79 ; AVX512: # %bb.0: # %entry
80 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
83 %B = zext <16 x i8> %A to <16 x i16>
87 define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) {
88 ; SSE2-LABEL: zext_32i8_to_32i16:
89 ; SSE2: # %bb.0: # %entry
90 ; SSE2-NEXT: movdqa %xmm1, %xmm3
91 ; SSE2-NEXT: movdqa %xmm0, %xmm1
92 ; SSE2-NEXT: pxor %xmm4, %xmm4
93 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
94 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
95 ; SSE2-NEXT: movdqa %xmm3, %xmm2
96 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
97 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
100 ; SSSE3-LABEL: zext_32i8_to_32i16:
101 ; SSSE3: # %bb.0: # %entry
102 ; SSSE3-NEXT: movdqa %xmm1, %xmm3
103 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
104 ; SSSE3-NEXT: pxor %xmm4, %xmm4
105 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
106 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
107 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
108 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
109 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
112 ; SSE41-LABEL: zext_32i8_to_32i16:
113 ; SSE41: # %bb.0: # %entry
114 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
115 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
116 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
117 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
118 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
119 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
120 ; SSE41-NEXT: movdqa %xmm5, %xmm0
121 ; SSE41-NEXT: movdqa %xmm4, %xmm1
124 ; AVX1-LABEL: zext_32i8_to_32i16:
125 ; AVX1: # %bb.0: # %entry
126 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
127 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
128 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
129 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
130 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
131 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
132 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
133 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
134 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
135 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
138 ; AVX2-LABEL: zext_32i8_to_32i16:
139 ; AVX2: # %bb.0: # %entry
140 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
141 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
142 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
143 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
146 ; AVX512F-LABEL: zext_32i8_to_32i16:
147 ; AVX512F: # %bb.0: # %entry
148 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
149 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
150 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
151 ; AVX512F-NEXT: vmovdqa %ymm2, %ymm0
154 ; AVX512BW-LABEL: zext_32i8_to_32i16:
155 ; AVX512BW: # %bb.0: # %entry
156 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
157 ; AVX512BW-NEXT: retq
159 %B = zext <32 x i8> %A to <32 x i16>
163 define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
164 ; SSE2-LABEL: zext_16i8_to_4i32:
165 ; SSE2: # %bb.0: # %entry
166 ; SSE2-NEXT: pxor %xmm1, %xmm1
167 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
168 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
171 ; SSSE3-LABEL: zext_16i8_to_4i32:
172 ; SSSE3: # %bb.0: # %entry
173 ; SSSE3-NEXT: pxor %xmm1, %xmm1
174 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
175 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
178 ; SSE41-LABEL: zext_16i8_to_4i32:
179 ; SSE41: # %bb.0: # %entry
180 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
183 ; AVX-LABEL: zext_16i8_to_4i32:
184 ; AVX: # %bb.0: # %entry
185 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
188 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
189 %C = zext <4 x i8> %B to <4 x i32>
193 define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
194 ; SSE2-LABEL: zext_16i8_to_8i32:
195 ; SSE2: # %bb.0: # %entry
196 ; SSE2-NEXT: movdqa %xmm0, %xmm1
197 ; SSE2-NEXT: pxor %xmm2, %xmm2
198 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
199 ; SSE2-NEXT: movdqa %xmm1, %xmm0
200 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
201 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
204 ; SSSE3-LABEL: zext_16i8_to_8i32:
205 ; SSSE3: # %bb.0: # %entry
206 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
207 ; SSSE3-NEXT: pxor %xmm2, %xmm2
208 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
209 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
210 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
211 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
214 ; SSE41-LABEL: zext_16i8_to_8i32:
215 ; SSE41: # %bb.0: # %entry
216 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
217 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
218 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
219 ; SSE41-NEXT: movdqa %xmm2, %xmm0
222 ; AVX1-LABEL: zext_16i8_to_8i32:
223 ; AVX1: # %bb.0: # %entry
224 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
225 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
226 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
227 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
230 ; AVX2-LABEL: zext_16i8_to_8i32:
231 ; AVX2: # %bb.0: # %entry
232 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
235 ; AVX512-LABEL: zext_16i8_to_8i32:
236 ; AVX512: # %bb.0: # %entry
237 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
240 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
241 %C = zext <8 x i8> %B to <8 x i32>
245 define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp {
246 ; SSE2-LABEL: zext_16i8_to_16i32:
247 ; SSE2: # %bb.0: # %entry
248 ; SSE2-NEXT: movdqa %xmm0, %xmm3
249 ; SSE2-NEXT: pxor %xmm4, %xmm4
250 ; SSE2-NEXT: movdqa %xmm0, %xmm1
251 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
252 ; SSE2-NEXT: movdqa %xmm1, %xmm0
253 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
254 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
255 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
256 ; SSE2-NEXT: movdqa %xmm3, %xmm2
257 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
258 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
261 ; SSSE3-LABEL: zext_16i8_to_16i32:
262 ; SSSE3: # %bb.0: # %entry
263 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
264 ; SSSE3-NEXT: pxor %xmm4, %xmm4
265 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
266 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
267 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
268 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
269 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
270 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
271 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
272 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
273 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
276 ; SSE41-LABEL: zext_16i8_to_16i32:
277 ; SSE41: # %bb.0: # %entry
278 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
279 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
280 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
281 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
282 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
283 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
284 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
285 ; SSE41-NEXT: movdqa %xmm4, %xmm0
288 ; AVX1-LABEL: zext_16i8_to_16i32:
289 ; AVX1: # %bb.0: # %entry
290 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
291 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
292 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
293 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
294 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
295 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
296 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,0,1]
297 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
298 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
299 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
302 ; AVX2-LABEL: zext_16i8_to_16i32:
303 ; AVX2: # %bb.0: # %entry
304 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
305 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
306 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
307 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
310 ; AVX512-LABEL: zext_16i8_to_16i32:
311 ; AVX512: # %bb.0: # %entry
312 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
315 %B = zext <16 x i8> %A to <16 x i32>
319 define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
320 ; SSE2-LABEL: zext_16i8_to_2i64:
321 ; SSE2: # %bb.0: # %entry
322 ; SSE2-NEXT: pxor %xmm1, %xmm1
323 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
324 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
325 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
328 ; SSSE3-LABEL: zext_16i8_to_2i64:
329 ; SSSE3: # %bb.0: # %entry
330 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
333 ; SSE41-LABEL: zext_16i8_to_2i64:
334 ; SSE41: # %bb.0: # %entry
335 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
338 ; AVX-LABEL: zext_16i8_to_2i64:
339 ; AVX: # %bb.0: # %entry
340 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
343 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
344 %C = zext <2 x i8> %B to <2 x i64>
348 define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
349 ; SSE2-LABEL: zext_16i8_to_4i64:
350 ; SSE2: # %bb.0: # %entry
351 ; SSE2-NEXT: movdqa %xmm0, %xmm1
352 ; SSE2-NEXT: pxor %xmm2, %xmm2
353 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
354 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
355 ; SSE2-NEXT: movdqa %xmm1, %xmm0
356 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
357 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
360 ; SSSE3-LABEL: zext_16i8_to_4i64:
361 ; SSSE3: # %bb.0: # %entry
362 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
363 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
364 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
367 ; SSE41-LABEL: zext_16i8_to_4i64:
368 ; SSE41: # %bb.0: # %entry
369 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
370 ; SSE41-NEXT: psrld $16, %xmm0
371 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
372 ; SSE41-NEXT: movdqa %xmm2, %xmm0
375 ; AVX1-LABEL: zext_16i8_to_4i64:
376 ; AVX1: # %bb.0: # %entry
377 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
378 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
379 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
380 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
383 ; AVX2-LABEL: zext_16i8_to_4i64:
384 ; AVX2: # %bb.0: # %entry
385 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
388 ; AVX512-LABEL: zext_16i8_to_4i64:
389 ; AVX512: # %bb.0: # %entry
390 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
393 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
394 %C = zext <4 x i8> %B to <4 x i64>
398 define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp {
399 ; SSE2-LABEL: zext_16i8_to_8i64:
400 ; SSE2: # %bb.0: # %entry
401 ; SSE2-NEXT: movdqa %xmm0, %xmm1
402 ; SSE2-NEXT: pxor %xmm4, %xmm4
403 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
404 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
405 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
406 ; SSE2-NEXT: movdqa %xmm1, %xmm0
407 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
408 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
409 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
410 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
411 ; SSE2-NEXT: movdqa %xmm3, %xmm2
412 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
413 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
416 ; SSSE3-LABEL: zext_16i8_to_8i64:
417 ; SSSE3: # %bb.0: # %entry
418 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
419 ; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128]
420 ; SSSE3-NEXT: pshufb %xmm4, %xmm0
421 ; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128]
422 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
423 ; SSSE3-NEXT: pshufb %xmm5, %xmm1
424 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
425 ; SSSE3-NEXT: pshufb %xmm4, %xmm2
426 ; SSSE3-NEXT: pshufb %xmm5, %xmm3
429 ; SSE41-LABEL: zext_16i8_to_8i64:
430 ; SSE41: # %bb.0: # %entry
431 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
432 ; SSE41-NEXT: movdqa %xmm0, %xmm1
433 ; SSE41-NEXT: psrld $16, %xmm1
434 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
435 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
436 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
437 ; SSE41-NEXT: psrlq $48, %xmm0
438 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
439 ; SSE41-NEXT: movdqa %xmm4, %xmm0
442 ; AVX1-LABEL: zext_16i8_to_8i64:
443 ; AVX1: # %bb.0: # %entry
444 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
445 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2
446 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
447 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
448 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
449 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
450 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
451 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
452 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
453 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
456 ; AVX2-LABEL: zext_16i8_to_8i64:
457 ; AVX2: # %bb.0: # %entry
458 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
459 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
460 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
461 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
464 ; AVX512-LABEL: zext_16i8_to_8i64:
465 ; AVX512: # %bb.0: # %entry
466 ; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
469 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
470 %C = zext <8 x i8> %B to <8 x i64>
474 define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
475 ; SSE2-LABEL: zext_8i16_to_4i32:
476 ; SSE2: # %bb.0: # %entry
477 ; SSE2-NEXT: pxor %xmm1, %xmm1
478 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
481 ; SSSE3-LABEL: zext_8i16_to_4i32:
482 ; SSSE3: # %bb.0: # %entry
483 ; SSSE3-NEXT: pxor %xmm1, %xmm1
484 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
487 ; SSE41-LABEL: zext_8i16_to_4i32:
488 ; SSE41: # %bb.0: # %entry
489 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
492 ; AVX-LABEL: zext_8i16_to_4i32:
493 ; AVX: # %bb.0: # %entry
494 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
497 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
498 %C = zext <4 x i16> %B to <4 x i32>
502 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
503 ; SSE2-LABEL: zext_8i16_to_8i32:
504 ; SSE2: # %bb.0: # %entry
505 ; SSE2-NEXT: movdqa %xmm0, %xmm1
506 ; SSE2-NEXT: pxor %xmm2, %xmm2
507 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
508 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
511 ; SSSE3-LABEL: zext_8i16_to_8i32:
512 ; SSSE3: # %bb.0: # %entry
513 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
514 ; SSSE3-NEXT: pxor %xmm2, %xmm2
515 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
516 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
519 ; SSE41-LABEL: zext_8i16_to_8i32:
520 ; SSE41: # %bb.0: # %entry
521 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
522 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
523 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
524 ; SSE41-NEXT: movdqa %xmm2, %xmm0
527 ; AVX1-LABEL: zext_8i16_to_8i32:
528 ; AVX1: # %bb.0: # %entry
529 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
530 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
531 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
532 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
535 ; AVX2-LABEL: zext_8i16_to_8i32:
536 ; AVX2: # %bb.0: # %entry
537 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
540 ; AVX512-LABEL: zext_8i16_to_8i32:
541 ; AVX512: # %bb.0: # %entry
542 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
545 %B = zext <8 x i16> %A to <8 x i32>
549 define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp {
550 ; SSE2-LABEL: zext_16i16_to_16i32:
551 ; SSE2: # %bb.0: # %entry
552 ; SSE2-NEXT: movdqa %xmm1, %xmm3
553 ; SSE2-NEXT: movdqa %xmm0, %xmm1
554 ; SSE2-NEXT: pxor %xmm4, %xmm4
555 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
556 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
557 ; SSE2-NEXT: movdqa %xmm3, %xmm2
558 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
559 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
562 ; SSSE3-LABEL: zext_16i16_to_16i32:
563 ; SSSE3: # %bb.0: # %entry
564 ; SSSE3-NEXT: movdqa %xmm1, %xmm3
565 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
566 ; SSSE3-NEXT: pxor %xmm4, %xmm4
567 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
568 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
569 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
570 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
571 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
574 ; SSE41-LABEL: zext_16i16_to_16i32:
575 ; SSE41: # %bb.0: # %entry
576 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
577 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
578 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
579 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
580 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
581 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
582 ; SSE41-NEXT: movdqa %xmm5, %xmm0
583 ; SSE41-NEXT: movdqa %xmm4, %xmm1
586 ; AVX1-LABEL: zext_16i16_to_16i32:
587 ; AVX1: # %bb.0: # %entry
588 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
589 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
590 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
591 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
592 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
593 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
594 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
595 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
596 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
597 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
600 ; AVX2-LABEL: zext_16i16_to_16i32:
601 ; AVX2: # %bb.0: # %entry
602 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
603 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
604 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
605 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
608 ; AVX512-LABEL: zext_16i16_to_16i32:
609 ; AVX512: # %bb.0: # %entry
610 ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
613 %B = zext <16 x i16> %A to <16 x i32>
617 define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
618 ; SSE2-LABEL: zext_8i16_to_2i64:
619 ; SSE2: # %bb.0: # %entry
620 ; SSE2-NEXT: pxor %xmm1, %xmm1
621 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
622 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
625 ; SSSE3-LABEL: zext_8i16_to_2i64:
626 ; SSSE3: # %bb.0: # %entry
627 ; SSSE3-NEXT: pxor %xmm1, %xmm1
628 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
629 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
632 ; SSE41-LABEL: zext_8i16_to_2i64:
633 ; SSE41: # %bb.0: # %entry
634 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
637 ; AVX-LABEL: zext_8i16_to_2i64:
638 ; AVX: # %bb.0: # %entry
639 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
642 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
643 %C = zext <2 x i16> %B to <2 x i64>
647 define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
648 ; SSE2-LABEL: zext_8i16_to_4i64:
649 ; SSE2: # %bb.0: # %entry
650 ; SSE2-NEXT: movdqa %xmm0, %xmm1
651 ; SSE2-NEXT: pxor %xmm2, %xmm2
652 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
653 ; SSE2-NEXT: movdqa %xmm1, %xmm0
654 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
655 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
658 ; SSSE3-LABEL: zext_8i16_to_4i64:
659 ; SSSE3: # %bb.0: # %entry
660 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
661 ; SSSE3-NEXT: pxor %xmm2, %xmm2
662 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
663 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
664 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
665 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
668 ; SSE41-LABEL: zext_8i16_to_4i64:
669 ; SSE41: # %bb.0: # %entry
670 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
671 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
672 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
673 ; SSE41-NEXT: movdqa %xmm2, %xmm0
676 ; AVX1-LABEL: zext_8i16_to_4i64:
677 ; AVX1: # %bb.0: # %entry
678 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
679 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
680 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
681 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
684 ; AVX2-LABEL: zext_8i16_to_4i64:
685 ; AVX2: # %bb.0: # %entry
686 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
689 ; AVX512-LABEL: zext_8i16_to_4i64:
690 ; AVX512: # %bb.0: # %entry
691 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
694 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
695 %C = zext <4 x i16> %B to <4 x i64>
699 define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp {
700 ; SSE2-LABEL: zext_8i16_to_8i64:
701 ; SSE2: # %bb.0: # %entry
702 ; SSE2-NEXT: movdqa %xmm0, %xmm3
703 ; SSE2-NEXT: pxor %xmm4, %xmm4
704 ; SSE2-NEXT: movdqa %xmm0, %xmm1
705 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
706 ; SSE2-NEXT: movdqa %xmm1, %xmm0
707 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
708 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
709 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
710 ; SSE2-NEXT: movdqa %xmm3, %xmm2
711 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
712 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
715 ; SSSE3-LABEL: zext_8i16_to_8i64:
716 ; SSSE3: # %bb.0: # %entry
717 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
718 ; SSSE3-NEXT: pxor %xmm4, %xmm4
719 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
720 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
721 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
722 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
723 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
724 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
725 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
726 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
727 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
730 ; SSE41-LABEL: zext_8i16_to_8i64:
731 ; SSE41: # %bb.0: # %entry
732 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
733 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
734 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
735 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
736 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
737 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
738 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
739 ; SSE41-NEXT: movdqa %xmm4, %xmm0
742 ; AVX1-LABEL: zext_8i16_to_8i64:
743 ; AVX1: # %bb.0: # %entry
744 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
745 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
746 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
747 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
748 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
749 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
750 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,0,1]
751 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
752 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
753 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
756 ; AVX2-LABEL: zext_8i16_to_8i64:
757 ; AVX2: # %bb.0: # %entry
758 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
759 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
760 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
761 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
764 ; AVX512-LABEL: zext_8i16_to_8i64:
765 ; AVX512: # %bb.0: # %entry
766 ; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
769 %B = zext <8 x i16> %A to <8 x i64>
773 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
774 ; SSE2-LABEL: zext_4i32_to_2i64:
775 ; SSE2: # %bb.0: # %entry
776 ; SSE2-NEXT: xorps %xmm1, %xmm1
777 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
780 ; SSSE3-LABEL: zext_4i32_to_2i64:
781 ; SSSE3: # %bb.0: # %entry
782 ; SSSE3-NEXT: xorps %xmm1, %xmm1
783 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
786 ; SSE41-LABEL: zext_4i32_to_2i64:
787 ; SSE41: # %bb.0: # %entry
788 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
791 ; AVX-LABEL: zext_4i32_to_2i64:
792 ; AVX: # %bb.0: # %entry
793 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
796 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
797 %C = zext <2 x i32> %B to <2 x i64>
801 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
802 ; SSE2-LABEL: zext_4i32_to_4i64:
803 ; SSE2: # %bb.0: # %entry
804 ; SSE2-NEXT: movaps %xmm0, %xmm1
805 ; SSE2-NEXT: xorps %xmm2, %xmm2
806 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
807 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
810 ; SSSE3-LABEL: zext_4i32_to_4i64:
811 ; SSSE3: # %bb.0: # %entry
812 ; SSSE3-NEXT: movaps %xmm0, %xmm1
813 ; SSSE3-NEXT: xorps %xmm2, %xmm2
814 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
815 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
818 ; SSE41-LABEL: zext_4i32_to_4i64:
819 ; SSE41: # %bb.0: # %entry
820 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
821 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
822 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
823 ; SSE41-NEXT: movdqa %xmm2, %xmm0
826 ; AVX1-LABEL: zext_4i32_to_4i64:
827 ; AVX1: # %bb.0: # %entry
828 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
829 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
830 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
831 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
834 ; AVX2-LABEL: zext_4i32_to_4i64:
835 ; AVX2: # %bb.0: # %entry
836 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
839 ; AVX512-LABEL: zext_4i32_to_4i64:
840 ; AVX512: # %bb.0: # %entry
841 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
844 %B = zext <4 x i32> %A to <4 x i64>
848 define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp {
849 ; SSE2-LABEL: zext_8i32_to_8i64:
850 ; SSE2: # %bb.0: # %entry
851 ; SSE2-NEXT: movaps %xmm1, %xmm3
852 ; SSE2-NEXT: movaps %xmm0, %xmm1
853 ; SSE2-NEXT: xorps %xmm4, %xmm4
854 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
855 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
856 ; SSE2-NEXT: movaps %xmm3, %xmm2
857 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
858 ; SSE2-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
861 ; SSSE3-LABEL: zext_8i32_to_8i64:
862 ; SSSE3: # %bb.0: # %entry
863 ; SSSE3-NEXT: movaps %xmm1, %xmm3
864 ; SSSE3-NEXT: movaps %xmm0, %xmm1
865 ; SSSE3-NEXT: xorps %xmm4, %xmm4
866 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
867 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
868 ; SSSE3-NEXT: movaps %xmm3, %xmm2
869 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
870 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
873 ; SSE41-LABEL: zext_8i32_to_8i64:
874 ; SSE41: # %bb.0: # %entry
875 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero
876 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
877 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
878 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
879 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
880 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
881 ; SSE41-NEXT: movdqa %xmm5, %xmm0
882 ; SSE41-NEXT: movdqa %xmm4, %xmm1
885 ; AVX1-LABEL: zext_8i32_to_8i64:
886 ; AVX1: # %bb.0: # %entry
887 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
888 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
889 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
890 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
891 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
892 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
893 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
894 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
895 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
896 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
899 ; AVX2-LABEL: zext_8i32_to_8i64:
900 ; AVX2: # %bb.0: # %entry
901 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
902 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
903 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
904 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
907 ; AVX512-LABEL: zext_8i32_to_8i64:
908 ; AVX512: # %bb.0: # %entry
909 ; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
912 %B = zext <8 x i32> %A to <8 x i64>
916 define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
917 ; SSE2-LABEL: load_zext_2i8_to_2i64:
918 ; SSE2: # %bb.0: # %entry
919 ; SSE2-NEXT: movzwl (%rdi), %eax
920 ; SSE2-NEXT: movd %eax, %xmm0
921 ; SSE2-NEXT: pxor %xmm1, %xmm1
922 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
923 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
924 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
927 ; SSSE3-LABEL: load_zext_2i8_to_2i64:
928 ; SSSE3: # %bb.0: # %entry
929 ; SSSE3-NEXT: movzwl (%rdi), %eax
930 ; SSSE3-NEXT: movd %eax, %xmm0
931 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
934 ; SSE41-LABEL: load_zext_2i8_to_2i64:
935 ; SSE41: # %bb.0: # %entry
936 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
939 ; AVX-LABEL: load_zext_2i8_to_2i64:
940 ; AVX: # %bb.0: # %entry
941 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
944 %X = load <2 x i8>, <2 x i8>* %ptr
945 %Y = zext <2 x i8> %X to <2 x i64>
949 define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
950 ; SSE2-LABEL: load_zext_4i8_to_4i32:
951 ; SSE2: # %bb.0: # %entry
952 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
953 ; SSE2-NEXT: pxor %xmm1, %xmm1
954 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
955 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
958 ; SSSE3-LABEL: load_zext_4i8_to_4i32:
959 ; SSSE3: # %bb.0: # %entry
960 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
961 ; SSSE3-NEXT: pxor %xmm1, %xmm1
962 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
963 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
966 ; SSE41-LABEL: load_zext_4i8_to_4i32:
967 ; SSE41: # %bb.0: # %entry
968 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
971 ; AVX-LABEL: load_zext_4i8_to_4i32:
972 ; AVX: # %bb.0: # %entry
973 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
976 %X = load <4 x i8>, <4 x i8>* %ptr
977 %Y = zext <4 x i8> %X to <4 x i32>
981 define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
982 ; SSE2-LABEL: load_zext_4i8_to_4i64:
983 ; SSE2: # %bb.0: # %entry
984 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
985 ; SSE2-NEXT: pxor %xmm2, %xmm2
986 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
987 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
988 ; SSE2-NEXT: movdqa %xmm1, %xmm0
989 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
990 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
993 ; SSSE3-LABEL: load_zext_4i8_to_4i64:
994 ; SSSE3: # %bb.0: # %entry
995 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
996 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
997 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
998 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
1001 ; SSE41-LABEL: load_zext_4i8_to_4i64:
1002 ; SSE41: # %bb.0: # %entry
1003 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1004 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1007 ; AVX1-LABEL: load_zext_4i8_to_4i64:
1008 ; AVX1: # %bb.0: # %entry
1009 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1010 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1011 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1014 ; AVX2-LABEL: load_zext_4i8_to_4i64:
1015 ; AVX2: # %bb.0: # %entry
1016 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1019 ; AVX512-LABEL: load_zext_4i8_to_4i64:
1020 ; AVX512: # %bb.0: # %entry
1021 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1024 %X = load <4 x i8>, <4 x i8>* %ptr
1025 %Y = zext <4 x i8> %X to <4 x i64>
1029 define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
1030 ; SSE2-LABEL: load_zext_8i8_to_8i16:
1031 ; SSE2: # %bb.0: # %entry
1032 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1033 ; SSE2-NEXT: pxor %xmm1, %xmm1
1034 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1037 ; SSSE3-LABEL: load_zext_8i8_to_8i16:
1038 ; SSSE3: # %bb.0: # %entry
1039 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1040 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1041 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1044 ; SSE41-LABEL: load_zext_8i8_to_8i16:
1045 ; SSE41: # %bb.0: # %entry
1046 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1049 ; AVX-LABEL: load_zext_8i8_to_8i16:
1050 ; AVX: # %bb.0: # %entry
1051 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1054 %X = load <8 x i8>, <8 x i8>* %ptr
1055 %Y = zext <8 x i8> %X to <8 x i16>
1059 define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
1060 ; SSE2-LABEL: load_zext_8i8_to_8i32:
1061 ; SSE2: # %bb.0: # %entry
1062 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1063 ; SSE2-NEXT: pxor %xmm2, %xmm2
1064 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1065 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1066 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1067 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1070 ; SSSE3-LABEL: load_zext_8i8_to_8i32:
1071 ; SSSE3: # %bb.0: # %entry
1072 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1073 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1074 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1075 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1076 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1077 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1080 ; SSE41-LABEL: load_zext_8i8_to_8i32:
1081 ; SSE41: # %bb.0: # %entry
1082 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1083 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1086 ; AVX1-LABEL: load_zext_8i8_to_8i32:
1087 ; AVX1: # %bb.0: # %entry
1088 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1089 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1090 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1093 ; AVX2-LABEL: load_zext_8i8_to_8i32:
1094 ; AVX2: # %bb.0: # %entry
1095 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1098 ; AVX512-LABEL: load_zext_8i8_to_8i32:
1099 ; AVX512: # %bb.0: # %entry
1100 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1103 %X = load <8 x i8>, <8 x i8>* %ptr
1104 %Y = zext <8 x i8> %X to <8 x i32>
1108 define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) {
1109 ; SSE2-LABEL: load_zext_16i8_to_8i32:
1110 ; SSE2: # %bb.0: # %entry
1111 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1112 ; SSE2-NEXT: pxor %xmm2, %xmm2
1113 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1114 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1115 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1116 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1119 ; SSSE3-LABEL: load_zext_16i8_to_8i32:
1120 ; SSSE3: # %bb.0: # %entry
1121 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1122 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1123 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1124 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1125 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1126 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1129 ; SSE41-LABEL: load_zext_16i8_to_8i32:
1130 ; SSE41: # %bb.0: # %entry
1131 ; SSE41-NEXT: movdqa (%rdi), %xmm1
1132 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1133 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
1134 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1137 ; AVX1-LABEL: load_zext_16i8_to_8i32:
1138 ; AVX1: # %bb.0: # %entry
1139 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
1140 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1141 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1142 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1143 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1146 ; AVX2-LABEL: load_zext_16i8_to_8i32:
1147 ; AVX2: # %bb.0: # %entry
1148 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1151 ; AVX512-LABEL: load_zext_16i8_to_8i32:
1152 ; AVX512: # %bb.0: # %entry
1153 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1156 %X = load <16 x i8>, <16 x i8>* %ptr
1157 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1158 %Z = zext <8 x i8> %Y to <8 x i32>
1162 define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) {
1163 ; SSE2-LABEL: load_zext_8i8_to_8i64:
1164 ; SSE2: # %bb.0: # %entry
1165 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1166 ; SSE2-NEXT: pxor %xmm4, %xmm4
1167 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
1168 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
1169 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1170 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1171 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
1172 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1173 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1174 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
1175 ; SSE2-NEXT: movdqa %xmm3, %xmm2
1176 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
1177 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
1180 ; SSSE3-LABEL: load_zext_8i8_to_8i64:
1181 ; SSSE3: # %bb.0: # %entry
1182 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1183 ; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128]
1184 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1185 ; SSSE3-NEXT: pshufb %xmm4, %xmm0
1186 ; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128]
1187 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
1188 ; SSSE3-NEXT: pshufb %xmm5, %xmm1
1189 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
1190 ; SSSE3-NEXT: pshufb %xmm4, %xmm2
1191 ; SSSE3-NEXT: pshufb %xmm5, %xmm3
1194 ; SSE41-LABEL: load_zext_8i8_to_8i64:
1195 ; SSE41: # %bb.0: # %entry
1196 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1197 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1198 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1199 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1202 ; AVX1-LABEL: load_zext_8i8_to_8i64:
1203 ; AVX1: # %bb.0: # %entry
1204 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1205 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1206 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1207 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1208 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
1209 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
1212 ; AVX2-LABEL: load_zext_8i8_to_8i64:
1213 ; AVX2: # %bb.0: # %entry
1214 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1215 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1218 ; AVX512-LABEL: load_zext_8i8_to_8i64:
1219 ; AVX512: # %bb.0: # %entry
1220 ; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
1223 %X = load <8 x i8>, <8 x i8>* %ptr
1224 %Y = zext <8 x i8> %X to <8 x i64>
1228 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
1229 ; SSE2-LABEL: load_zext_16i8_to_16i16:
1230 ; SSE2: # %bb.0: # %entry
1231 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1232 ; SSE2-NEXT: pxor %xmm2, %xmm2
1233 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1234 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1235 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1238 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
1239 ; SSSE3: # %bb.0: # %entry
1240 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1241 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1242 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1243 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1244 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1247 ; SSE41-LABEL: load_zext_16i8_to_16i16:
1248 ; SSE41: # %bb.0: # %entry
1249 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1250 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1253 ; AVX1-LABEL: load_zext_16i8_to_16i16:
1254 ; AVX1: # %bb.0: # %entry
1255 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1256 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1257 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1260 ; AVX2-LABEL: load_zext_16i8_to_16i16:
1261 ; AVX2: # %bb.0: # %entry
1262 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1265 ; AVX512-LABEL: load_zext_16i8_to_16i16:
1266 ; AVX512: # %bb.0: # %entry
1267 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1270 %X = load <16 x i8>, <16 x i8>* %ptr
1271 %Y = zext <16 x i8> %X to <16 x i16>
1275 define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
1276 ; SSE2-LABEL: load_zext_2i16_to_2i64:
1277 ; SSE2: # %bb.0: # %entry
1278 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1279 ; SSE2-NEXT: pxor %xmm1, %xmm1
1280 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1281 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1284 ; SSSE3-LABEL: load_zext_2i16_to_2i64:
1285 ; SSSE3: # %bb.0: # %entry
1286 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1287 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1288 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1289 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1292 ; SSE41-LABEL: load_zext_2i16_to_2i64:
1293 ; SSE41: # %bb.0: # %entry
1294 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1297 ; AVX-LABEL: load_zext_2i16_to_2i64:
1298 ; AVX: # %bb.0: # %entry
1299 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1302 %X = load <2 x i16>, <2 x i16>* %ptr
1303 %Y = zext <2 x i16> %X to <2 x i64>
1307 define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
1308 ; SSE2-LABEL: load_zext_4i16_to_4i32:
1309 ; SSE2: # %bb.0: # %entry
1310 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1311 ; SSE2-NEXT: pxor %xmm1, %xmm1
1312 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1315 ; SSSE3-LABEL: load_zext_4i16_to_4i32:
1316 ; SSSE3: # %bb.0: # %entry
1317 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1318 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1319 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1322 ; SSE41-LABEL: load_zext_4i16_to_4i32:
1323 ; SSE41: # %bb.0: # %entry
1324 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1327 ; AVX-LABEL: load_zext_4i16_to_4i32:
1328 ; AVX: # %bb.0: # %entry
1329 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1332 %X = load <4 x i16>, <4 x i16>* %ptr
1333 %Y = zext <4 x i16> %X to <4 x i32>
1337 define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
1338 ; SSE2-LABEL: load_zext_4i16_to_4i64:
1339 ; SSE2: # %bb.0: # %entry
1340 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1341 ; SSE2-NEXT: pxor %xmm2, %xmm2
1342 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1343 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1344 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1345 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1348 ; SSSE3-LABEL: load_zext_4i16_to_4i64:
1349 ; SSSE3: # %bb.0: # %entry
1350 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1351 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1352 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1353 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1354 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1355 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1358 ; SSE41-LABEL: load_zext_4i16_to_4i64:
1359 ; SSE41: # %bb.0: # %entry
1360 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1361 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1364 ; AVX1-LABEL: load_zext_4i16_to_4i64:
1365 ; AVX1: # %bb.0: # %entry
1366 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1367 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1368 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1371 ; AVX2-LABEL: load_zext_4i16_to_4i64:
1372 ; AVX2: # %bb.0: # %entry
1373 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1376 ; AVX512-LABEL: load_zext_4i16_to_4i64:
1377 ; AVX512: # %bb.0: # %entry
1378 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1381 %X = load <4 x i16>, <4 x i16>* %ptr
1382 %Y = zext <4 x i16> %X to <4 x i64>
1386 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
1387 ; SSE2-LABEL: load_zext_8i16_to_8i32:
1388 ; SSE2: # %bb.0: # %entry
1389 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1390 ; SSE2-NEXT: pxor %xmm2, %xmm2
1391 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1392 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1393 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1396 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
1397 ; SSSE3: # %bb.0: # %entry
1398 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1399 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1400 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1401 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1402 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1405 ; SSE41-LABEL: load_zext_8i16_to_8i32:
1406 ; SSE41: # %bb.0: # %entry
1407 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1408 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1411 ; AVX1-LABEL: load_zext_8i16_to_8i32:
1412 ; AVX1: # %bb.0: # %entry
1413 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1414 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1415 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1418 ; AVX2-LABEL: load_zext_8i16_to_8i32:
1419 ; AVX2: # %bb.0: # %entry
1420 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1423 ; AVX512-LABEL: load_zext_8i16_to_8i32:
1424 ; AVX512: # %bb.0: # %entry
1425 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1428 %X = load <8 x i16>, <8 x i16>* %ptr
1429 %Y = zext <8 x i16> %X to <8 x i32>
1433 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
1434 ; SSE2-LABEL: load_zext_2i32_to_2i64:
1435 ; SSE2: # %bb.0: # %entry
1436 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1437 ; SSE2-NEXT: xorps %xmm1, %xmm1
1438 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1441 ; SSSE3-LABEL: load_zext_2i32_to_2i64:
1442 ; SSSE3: # %bb.0: # %entry
1443 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1444 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1445 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1448 ; SSE41-LABEL: load_zext_2i32_to_2i64:
1449 ; SSE41: # %bb.0: # %entry
1450 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1453 ; AVX-LABEL: load_zext_2i32_to_2i64:
1454 ; AVX: # %bb.0: # %entry
1455 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1458 %X = load <2 x i32>, <2 x i32>* %ptr
1459 %Y = zext <2 x i32> %X to <2 x i64>
1463 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
1464 ; SSE2-LABEL: load_zext_4i32_to_4i64:
1465 ; SSE2: # %bb.0: # %entry
1466 ; SSE2-NEXT: movaps (%rdi), %xmm1
1467 ; SSE2-NEXT: xorps %xmm2, %xmm2
1468 ; SSE2-NEXT: movaps %xmm1, %xmm0
1469 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1470 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1473 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
1474 ; SSSE3: # %bb.0: # %entry
1475 ; SSSE3-NEXT: movaps (%rdi), %xmm1
1476 ; SSSE3-NEXT: xorps %xmm2, %xmm2
1477 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1478 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1479 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1482 ; SSE41-LABEL: load_zext_4i32_to_4i64:
1483 ; SSE41: # %bb.0: # %entry
1484 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1485 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1488 ; AVX1-LABEL: load_zext_4i32_to_4i64:
1489 ; AVX1: # %bb.0: # %entry
1490 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1491 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1492 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1495 ; AVX2-LABEL: load_zext_4i32_to_4i64:
1496 ; AVX2: # %bb.0: # %entry
1497 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1500 ; AVX512-LABEL: load_zext_4i32_to_4i64:
1501 ; AVX512: # %bb.0: # %entry
1502 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1505 %X = load <4 x i32>, <4 x i32>* %ptr
1506 %Y = zext <4 x i32> %X to <4 x i64>
1510 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
1511 ; SSE2-LABEL: zext_8i8_to_8i32:
1512 ; SSE2: # %bb.0: # %entry
1513 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1514 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
1515 ; SSE2-NEXT: pxor %xmm2, %xmm2
1516 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1517 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1518 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1521 ; SSSE3-LABEL: zext_8i8_to_8i32:
1522 ; SSSE3: # %bb.0: # %entry
1523 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1524 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
1525 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1526 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1527 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1528 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1531 ; SSE41-LABEL: zext_8i8_to_8i32:
1532 ; SSE41: # %bb.0: # %entry
1533 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
1534 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1535 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1536 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1537 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1540 ; AVX1-LABEL: zext_8i8_to_8i32:
1541 ; AVX1: # %bb.0: # %entry
1542 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1543 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1544 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1545 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1546 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1549 ; AVX2-LABEL: zext_8i8_to_8i32:
1550 ; AVX2: # %bb.0: # %entry
1551 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1552 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1555 ; AVX512-LABEL: zext_8i8_to_8i32:
1556 ; AVX512: # %bb.0: # %entry
1557 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1558 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1561 %t = zext <8 x i8> %z to <8 x i32>
1565 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
1566 ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
1567 ; SSE2: # %bb.0: # %entry
1568 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1569 ; SSE2-NEXT: pxor %xmm2, %xmm2
1570 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1571 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1574 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
1575 ; SSSE3: # %bb.0: # %entry
1576 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1577 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1578 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1579 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1582 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
1583 ; SSE41: # %bb.0: # %entry
1584 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1585 ; SSE41-NEXT: pxor %xmm2, %xmm2
1586 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1587 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1590 ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
1591 ; AVX1: # %bb.0: # %entry
1592 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1593 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1594 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1595 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1598 ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
1599 ; AVX2: # %bb.0: # %entry
1600 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1603 ; AVX512-LABEL: shuf_zext_8i16_to_8i32:
1604 ; AVX512: # %bb.0: # %entry
1605 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1608 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
1609 %Z = bitcast <16 x i16> %B to <8 x i32>
1613 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1614 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
1615 ; SSE2: # %bb.0: # %entry
1616 ; SSE2-NEXT: movaps %xmm0, %xmm1
1617 ; SSE2-NEXT: xorps %xmm2, %xmm2
1618 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1619 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1622 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
1623 ; SSSE3: # %bb.0: # %entry
1624 ; SSSE3-NEXT: movaps %xmm0, %xmm1
1625 ; SSSE3-NEXT: xorps %xmm2, %xmm2
1626 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1627 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1630 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1631 ; SSE41: # %bb.0: # %entry
1632 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1633 ; SSE41-NEXT: pxor %xmm2, %xmm2
1634 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1635 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1638 ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
1639 ; AVX1: # %bb.0: # %entry
1640 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1641 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1642 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1643 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1646 ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
1647 ; AVX2: # %bb.0: # %entry
1648 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1651 ; AVX512-LABEL: shuf_zext_4i32_to_4i64:
1652 ; AVX512: # %bb.0: # %entry
1653 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1656 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
1657 %Z = bitcast <8 x i32> %B to <4 x i64>
1661 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
1662 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
1663 ; SSE2: # %bb.0: # %entry
1664 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1665 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
1666 ; SSE2-NEXT: packuswb %xmm1, %xmm1
1667 ; SSE2-NEXT: pxor %xmm2, %xmm2
1668 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1669 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1670 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1671 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1674 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
1675 ; SSSE3: # %bb.0: # %entry
1676 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1677 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
1678 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1679 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1680 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1681 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1684 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1685 ; SSE41: # %bb.0: # %entry
1686 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1687 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1688 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1689 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1690 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1693 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
1694 ; AVX1: # %bb.0: # %entry
1695 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1696 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1697 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1698 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1699 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1702 ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
1703 ; AVX2: # %bb.0: # %entry
1704 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1705 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1708 ; AVX512-LABEL: shuf_zext_8i8_to_8i32:
1709 ; AVX512: # %bb.0: # %entry
1710 ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1711 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1714 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
1715 %Z = bitcast <32 x i8> %B to <8 x i32>
1719 define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
1720 ; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
1721 ; SSE2: # %bb.0: # %entry
1722 ; SSE2-NEXT: pxor %xmm1, %xmm1
1723 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1724 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1725 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1728 ; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
1729 ; SSSE3: # %bb.0: # %entry
1730 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1733 ; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
1734 ; SSE41: # %bb.0: # %entry
1735 ; SSE41-NEXT: psrlq $48, %xmm0
1736 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1739 ; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6:
1740 ; AVX: # %bb.0: # %entry
1741 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
1742 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1745 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1746 %Z = bitcast <16 x i8> %B to <2 x i64>
1750 define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
1751 ; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1752 ; SSE2: # %bb.0: # %entry
1753 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1754 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
1755 ; SSE2-NEXT: pxor %xmm2, %xmm2
1756 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1757 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1758 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1759 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1760 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1761 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1764 ; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
1765 ; SSSE3: # %bb.0: # %entry
1766 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1767 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
1768 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
1771 ; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
1772 ; SSE41: # %bb.0: # %entry
1773 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1774 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1775 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1776 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1777 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1778 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1781 ; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
1782 ; AVX1: # %bb.0: # %entry
1783 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1784 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1785 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1786 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1787 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1790 ; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1791 ; AVX2: # %bb.0: # %entry
1792 ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1793 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1796 ; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11:
1797 ; AVX512: # %bb.0: # %entry
1798 ; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1799 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1802 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1803 %Z = bitcast <32 x i8> %B to <4 x i64>
1807 define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
1808 ; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
1809 ; SSE2: # %bb.0: # %entry
1810 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1811 ; SSE2-NEXT: pxor %xmm1, %xmm1
1812 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1813 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1816 ; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
1817 ; SSSE3: # %bb.0: # %entry
1818 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1821 ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
1822 ; SSE41: # %bb.0: # %entry
1823 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1824 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1827 ; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6:
1828 ; AVX: # %bb.0: # %entry
1829 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1830 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1833 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
1834 %Z = bitcast <8 x i16> %B to <2 x i64>
1838 define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
1839 ; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1840 ; SSE2: # %bb.0: # %entry
1841 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1842 ; SSE2-NEXT: pxor %xmm2, %xmm2
1843 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1844 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1845 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1846 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1849 ; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
1850 ; SSSE3: # %bb.0: # %entry
1851 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1852 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1853 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1854 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1855 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1856 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1859 ; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
1860 ; SSE41: # %bb.0: # %entry
1861 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1862 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1863 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1864 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1865 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1868 ; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
1869 ; AVX1: # %bb.0: # %entry
1870 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1871 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1872 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1873 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1874 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1877 ; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1878 ; AVX2: # %bb.0: # %entry
1879 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1880 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1883 ; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2:
1884 ; AVX512: # %bb.0: # %entry
1885 ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1886 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1889 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
1890 %Z = bitcast <16 x i16> %B to <4 x i64>
1894 define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
1895 ; SSE2-LABEL: shuf_zext_8i16_to_4i32_offset1:
1896 ; SSE2: # %bb.0: # %entry
1897 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1898 ; SSE2-NEXT: pxor %xmm1, %xmm1
1899 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1902 ; SSSE3-LABEL: shuf_zext_8i16_to_4i32_offset1:
1903 ; SSSE3: # %bb.0: # %entry
1904 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1905 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1906 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1909 ; SSE41-LABEL: shuf_zext_8i16_to_4i32_offset1:
1910 ; SSE41: # %bb.0: # %entry
1911 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1912 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1915 ; AVX1-LABEL: shuf_zext_8i16_to_4i32_offset1:
1916 ; AVX1: # %bb.0: # %entry
1917 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1918 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1921 ; AVX2-SLOW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1922 ; AVX2-SLOW: # %bb.0: # %entry
1923 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1924 ; AVX2-SLOW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1925 ; AVX2-SLOW-NEXT: retq
1927 ; AVX2-FAST-LABEL: shuf_zext_8i16_to_4i32_offset1:
1928 ; AVX2-FAST: # %bb.0: # %entry
1929 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1930 ; AVX2-FAST-NEXT: retq
1932 ; AVX512F-LABEL: shuf_zext_8i16_to_4i32_offset1:
1933 ; AVX512F: # %bb.0: # %entry
1934 ; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1935 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1936 ; AVX512F-NEXT: retq
1938 ; AVX512BW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1939 ; AVX512BW: # %bb.0: # %entry
1940 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1941 ; AVX512BW-NEXT: retq
1943 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
1944 %Z = bitcast <8 x i16> %B to <4 x i32>
1948 define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
1949 ; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1950 ; SSE2: # %bb.0: # %entry
1951 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1952 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1953 ; SSE2-NEXT: pxor %xmm2, %xmm2
1954 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1955 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1958 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
1959 ; SSSE3: # %bb.0: # %entry
1960 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1961 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1962 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1963 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1964 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1967 ; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
1968 ; SSE41: # %bb.0: # %entry
1969 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1970 ; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1971 ; SSE41-NEXT: pxor %xmm2, %xmm2
1972 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1973 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1976 ; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
1977 ; AVX1: # %bb.0: # %entry
1978 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1979 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1980 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1981 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1982 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1985 ; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1986 ; AVX2: # %bb.0: # %entry
1987 ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1988 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1991 ; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3:
1992 ; AVX512: # %bb.0: # %entry
1993 ; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1994 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1997 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
1998 %Z = bitcast <16 x i16> %B to <8 x i32>
2002 define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
2003 ; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2004 ; SSE2: # %bb.0: # %entry
2005 ; SSE2-NEXT: pxor %xmm2, %xmm2
2006 ; SSE2-NEXT: movdqa %xmm1, %xmm0
2007 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2008 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2011 ; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
2012 ; SSSE3: # %bb.0: # %entry
2013 ; SSSE3-NEXT: pxor %xmm2, %xmm2
2014 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2015 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2016 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2019 ; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
2020 ; SSE41: # %bb.0: # %entry
2021 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
2022 ; SSE41-NEXT: pxor %xmm2, %xmm2
2023 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
2024 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
2025 ; SSE41-NEXT: movdqa %xmm2, %xmm1
2028 ; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
2029 ; AVX1: # %bb.0: # %entry
2030 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2031 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
2032 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2033 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
2034 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2035 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2038 ; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2039 ; AVX2: # %bb.0: # %entry
2040 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2041 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2044 ; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8:
2045 ; AVX512: # %bb.0: # %entry
2046 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
2047 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2050 %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
2051 %Z = bitcast <16 x i16> %B to <8 x i32>
2055 define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
2056 ; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
2057 ; SSE: # %bb.0: # %entry
2058 ; SSE-NEXT: xorps %xmm1, %xmm1
2059 ; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2062 ; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
2063 ; AVX: # %bb.0: # %entry
2064 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
2065 ; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2068 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
2069 %Z = bitcast <4 x i32> %B to <2 x i64>
2073 define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
2074 ; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2075 ; SSE2: # %bb.0: # %entry
2076 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2077 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2078 ; SSE2-NEXT: pand %xmm1, %xmm0
2079 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2082 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
2083 ; SSSE3: # %bb.0: # %entry
2084 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2085 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2086 ; SSSE3-NEXT: pand %xmm1, %xmm0
2087 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2090 ; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
2091 ; SSE41: # %bb.0: # %entry
2092 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2093 ; SSE41-NEXT: pxor %xmm0, %xmm0
2094 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
2095 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2098 ; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
2099 ; AVX1: # %bb.0: # %entry
2100 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2101 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2102 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2103 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2106 ; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2107 ; AVX2: # %bb.0: # %entry
2108 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2109 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2112 ; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1:
2113 ; AVX512: # %bb.0: # %entry
2114 ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2115 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2118 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
2119 %Z = bitcast <8 x i32> %B to <4 x i64>
2123 define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) {
2124 ; SSE2-LABEL: zext_32i8_to_32i32:
2126 ; SSE2-NEXT: movq %rdi, %rax
2127 ; SSE2-NEXT: pxor %xmm2, %xmm2
2128 ; SSE2-NEXT: movdqa %xmm0, %xmm3
2129 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2130 ; SSE2-NEXT: movdqa %xmm3, %xmm8
2131 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2132 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2133 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2134 ; SSE2-NEXT: movdqa %xmm0, %xmm5
2135 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2136 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2137 ; SSE2-NEXT: movdqa %xmm1, %xmm6
2138 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2139 ; SSE2-NEXT: movdqa %xmm6, %xmm7
2140 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2141 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2142 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2143 ; SSE2-NEXT: movdqa %xmm1, %xmm4
2144 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2145 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2146 ; SSE2-NEXT: movdqa %xmm1, 112(%rdi)
2147 ; SSE2-NEXT: movdqa %xmm4, 96(%rdi)
2148 ; SSE2-NEXT: movdqa %xmm6, 80(%rdi)
2149 ; SSE2-NEXT: movdqa %xmm7, 64(%rdi)
2150 ; SSE2-NEXT: movdqa %xmm0, 48(%rdi)
2151 ; SSE2-NEXT: movdqa %xmm5, 32(%rdi)
2152 ; SSE2-NEXT: movdqa %xmm3, 16(%rdi)
2153 ; SSE2-NEXT: movdqa %xmm8, (%rdi)
2156 ; SSSE3-LABEL: zext_32i8_to_32i32:
2158 ; SSSE3-NEXT: movq %rdi, %rax
2159 ; SSSE3-NEXT: pxor %xmm2, %xmm2
2160 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
2161 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2162 ; SSSE3-NEXT: movdqa %xmm3, %xmm8
2163 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2164 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2165 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2166 ; SSSE3-NEXT: movdqa %xmm0, %xmm5
2167 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2168 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2169 ; SSSE3-NEXT: movdqa %xmm1, %xmm6
2170 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2171 ; SSSE3-NEXT: movdqa %xmm6, %xmm7
2172 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2173 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2174 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2175 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
2176 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2177 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2178 ; SSSE3-NEXT: movdqa %xmm1, 112(%rdi)
2179 ; SSSE3-NEXT: movdqa %xmm4, 96(%rdi)
2180 ; SSSE3-NEXT: movdqa %xmm6, 80(%rdi)
2181 ; SSSE3-NEXT: movdqa %xmm7, 64(%rdi)
2182 ; SSSE3-NEXT: movdqa %xmm0, 48(%rdi)
2183 ; SSSE3-NEXT: movdqa %xmm5, 32(%rdi)
2184 ; SSSE3-NEXT: movdqa %xmm3, 16(%rdi)
2185 ; SSSE3-NEXT: movdqa %xmm8, (%rdi)
2188 ; SSE41-LABEL: zext_32i8_to_32i32:
2190 ; SSE41-NEXT: movq %rdi, %rax
2191 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2192 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
2193 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2194 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
2195 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
2196 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
2197 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2198 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2199 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,2,3]
2200 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
2201 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,0,1]
2202 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
2203 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
2204 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2205 ; SSE41-NEXT: movdqa %xmm1, 112(%rdi)
2206 ; SSE41-NEXT: movdqa %xmm7, 96(%rdi)
2207 ; SSE41-NEXT: movdqa %xmm6, 80(%rdi)
2208 ; SSE41-NEXT: movdqa %xmm5, 64(%rdi)
2209 ; SSE41-NEXT: movdqa %xmm0, 48(%rdi)
2210 ; SSE41-NEXT: movdqa %xmm4, 32(%rdi)
2211 ; SSE41-NEXT: movdqa %xmm3, 16(%rdi)
2212 ; SSE41-NEXT: movdqa %xmm2, (%rdi)
2215 ; AVX1-LABEL: zext_32i8_to_32i32:
2217 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2218 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
2219 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2220 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4
2221 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2222 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2223 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[1,1,2,3]
2224 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2225 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
2226 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
2227 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2228 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,0,1]
2229 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2230 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
2231 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
2232 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2233 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,3,0,1]
2234 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2235 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3
2236 ; AVX1-NEXT: vmovaps %ymm4, %ymm0
2239 ; AVX2-LABEL: zext_32i8_to_32i32:
2241 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2242 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2243 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
2244 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
2245 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
2246 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,2,3]
2247 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2248 ; AVX2-NEXT: vmovdqa %ymm4, %ymm0
2251 ; AVX512-LABEL: zext_32i8_to_32i32:
2253 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2254 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
2255 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2256 ; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
2258 %res = zext <32 x i8>%x to <32 x i32>
2262 define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) {
2263 ; SSE2-LABEL: zext_2i8_to_2i32:
2265 ; SSE2-NEXT: movzwl (%rdi), %eax
2266 ; SSE2-NEXT: movd %eax, %xmm0
2267 ; SSE2-NEXT: pxor %xmm1, %xmm1
2268 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2269 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2270 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
2271 ; SSE2-NEXT: paddq %xmm0, %xmm0
2274 ; SSSE3-LABEL: zext_2i8_to_2i32:
2276 ; SSSE3-NEXT: movzwl (%rdi), %eax
2277 ; SSSE3-NEXT: movd %eax, %xmm0
2278 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[3],zero,zero,zero
2279 ; SSSE3-NEXT: paddq %xmm0, %xmm0
2282 ; SSE41-LABEL: zext_2i8_to_2i32:
2284 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
2285 ; SSE41-NEXT: paddq %xmm0, %xmm0
2288 ; AVX-LABEL: zext_2i8_to_2i32:
2290 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
2291 ; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
2293 %x = load <2 x i8>, <2 x i8>* %addr, align 1
2294 %y = zext <2 x i8> %x to <2 x i32>
2295 %z = add <2 x i32>%y, %y