1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
13 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
14 ; SSE2-LABEL: zext_16i8_to_8i16:
15 ; SSE2: # %bb.0: # %entry
16 ; SSE2-NEXT: pxor %xmm1, %xmm1
17 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
20 ; SSSE3-LABEL: zext_16i8_to_8i16:
21 ; SSSE3: # %bb.0: # %entry
22 ; SSSE3-NEXT: pxor %xmm1, %xmm1
23 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
26 ; SSE41-LABEL: zext_16i8_to_8i16:
27 ; SSE41: # %bb.0: # %entry
28 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
31 ; AVX-LABEL: zext_16i8_to_8i16:
32 ; AVX: # %bb.0: # %entry
33 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
36 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
37 %C = zext <8 x i8> %B to <8 x i16>
42 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
43 ; SSE2-LABEL: zext_16i8_to_16i16:
44 ; SSE2: # %bb.0: # %entry
45 ; SSE2-NEXT: movdqa %xmm0, %xmm1
46 ; SSE2-NEXT: pxor %xmm2, %xmm2
47 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
48 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
51 ; SSSE3-LABEL: zext_16i8_to_16i16:
52 ; SSSE3: # %bb.0: # %entry
53 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
54 ; SSSE3-NEXT: pxor %xmm2, %xmm2
55 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
56 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
59 ; SSE41-LABEL: zext_16i8_to_16i16:
60 ; SSE41: # %bb.0: # %entry
61 ; SSE41-NEXT: movdqa %xmm0, %xmm1
62 ; SSE41-NEXT: pxor %xmm2, %xmm2
63 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
64 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
67 ; AVX1-LABEL: zext_16i8_to_16i16:
68 ; AVX1: # %bb.0: # %entry
69 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
70 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
71 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
72 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
75 ; AVX2-LABEL: zext_16i8_to_16i16:
76 ; AVX2: # %bb.0: # %entry
77 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
80 ; AVX512-LABEL: zext_16i8_to_16i16:
81 ; AVX512: # %bb.0: # %entry
82 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
85 %B = zext <16 x i8> %A to <16 x i16>
89 define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) {
90 ; SSE2-LABEL: zext_32i8_to_32i16:
91 ; SSE2: # %bb.0: # %entry
92 ; SSE2-NEXT: movdqa %xmm1, %xmm3
93 ; SSE2-NEXT: movdqa %xmm0, %xmm1
94 ; SSE2-NEXT: pxor %xmm4, %xmm4
95 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
96 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
97 ; SSE2-NEXT: movdqa %xmm3, %xmm2
98 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
99 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
102 ; SSSE3-LABEL: zext_32i8_to_32i16:
103 ; SSSE3: # %bb.0: # %entry
104 ; SSSE3-NEXT: movdqa %xmm1, %xmm3
105 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
106 ; SSSE3-NEXT: pxor %xmm4, %xmm4
107 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
108 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
109 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
110 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
111 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
114 ; SSE41-LABEL: zext_32i8_to_32i16:
115 ; SSE41: # %bb.0: # %entry
116 ; SSE41-NEXT: movdqa %xmm1, %xmm3
117 ; SSE41-NEXT: movdqa %xmm0, %xmm1
118 ; SSE41-NEXT: pxor %xmm4, %xmm4
119 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
120 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
121 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
122 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
125 ; AVX1-LABEL: zext_32i8_to_32i16:
126 ; AVX1: # %bb.0: # %entry
127 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
128 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
129 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
130 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
131 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
132 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
133 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
134 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
135 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
138 ; AVX2-LABEL: zext_32i8_to_32i16:
139 ; AVX2: # %bb.0: # %entry
140 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
141 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
142 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
143 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
146 ; AVX512F-LABEL: zext_32i8_to_32i16:
147 ; AVX512F: # %bb.0: # %entry
148 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
149 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
150 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
151 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
154 ; AVX512BW-LABEL: zext_32i8_to_32i16:
155 ; AVX512BW: # %bb.0: # %entry
156 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
157 ; AVX512BW-NEXT: retq
159 %B = zext <32 x i8> %A to <32 x i16>
163 define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
164 ; SSE2-LABEL: zext_16i8_to_4i32:
165 ; SSE2: # %bb.0: # %entry
166 ; SSE2-NEXT: pxor %xmm1, %xmm1
167 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
168 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
171 ; SSSE3-LABEL: zext_16i8_to_4i32:
172 ; SSSE3: # %bb.0: # %entry
173 ; SSSE3-NEXT: pxor %xmm1, %xmm1
174 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
175 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
178 ; SSE41-LABEL: zext_16i8_to_4i32:
179 ; SSE41: # %bb.0: # %entry
180 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
183 ; AVX-LABEL: zext_16i8_to_4i32:
184 ; AVX: # %bb.0: # %entry
185 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
188 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
189 %C = zext <4 x i8> %B to <4 x i32>
193 define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
194 ; SSE2-LABEL: zext_16i8_to_8i32:
195 ; SSE2: # %bb.0: # %entry
196 ; SSE2-NEXT: movdqa %xmm0, %xmm1
197 ; SSE2-NEXT: pxor %xmm2, %xmm2
198 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
199 ; SSE2-NEXT: movdqa %xmm1, %xmm0
200 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
201 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
204 ; SSSE3-LABEL: zext_16i8_to_8i32:
205 ; SSSE3: # %bb.0: # %entry
206 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
207 ; SSSE3-NEXT: pxor %xmm2, %xmm2
208 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
209 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
210 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
211 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
214 ; SSE41-LABEL: zext_16i8_to_8i32:
215 ; SSE41: # %bb.0: # %entry
216 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
217 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
218 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
219 ; SSE41-NEXT: movdqa %xmm2, %xmm0
222 ; AVX1-LABEL: zext_16i8_to_8i32:
223 ; AVX1: # %bb.0: # %entry
224 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
225 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
226 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
227 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
230 ; AVX2-LABEL: zext_16i8_to_8i32:
231 ; AVX2: # %bb.0: # %entry
232 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
235 ; AVX512-LABEL: zext_16i8_to_8i32:
236 ; AVX512: # %bb.0: # %entry
237 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
240 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
241 %C = zext <8 x i8> %B to <8 x i32>
245 define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp {
246 ; SSE2-LABEL: zext_16i8_to_16i32:
247 ; SSE2: # %bb.0: # %entry
248 ; SSE2-NEXT: movdqa %xmm0, %xmm3
249 ; SSE2-NEXT: pxor %xmm4, %xmm4
250 ; SSE2-NEXT: movdqa %xmm0, %xmm1
251 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
252 ; SSE2-NEXT: movdqa %xmm1, %xmm0
253 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
254 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
255 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
256 ; SSE2-NEXT: movdqa %xmm3, %xmm2
257 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
258 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
261 ; SSSE3-LABEL: zext_16i8_to_16i32:
262 ; SSSE3: # %bb.0: # %entry
263 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
264 ; SSSE3-NEXT: pxor %xmm4, %xmm4
265 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
266 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
267 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
268 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
269 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
270 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
271 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
272 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
273 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
276 ; SSE41-LABEL: zext_16i8_to_16i32:
277 ; SSE41: # %bb.0: # %entry
278 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
279 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
280 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
281 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
282 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
283 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
284 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
285 ; SSE41-NEXT: movdqa %xmm4, %xmm0
288 ; AVX1-LABEL: zext_16i8_to_16i32:
289 ; AVX1: # %bb.0: # %entry
290 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
291 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
292 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
293 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
294 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
295 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
296 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
297 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
298 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
299 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
302 ; AVX2-LABEL: zext_16i8_to_16i32:
303 ; AVX2: # %bb.0: # %entry
304 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
305 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
306 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
307 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
310 ; AVX512-LABEL: zext_16i8_to_16i32:
311 ; AVX512: # %bb.0: # %entry
312 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
315 %B = zext <16 x i8> %A to <16 x i32>
319 define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
320 ; SSE2-LABEL: zext_16i8_to_2i64:
321 ; SSE2: # %bb.0: # %entry
322 ; SSE2-NEXT: pxor %xmm1, %xmm1
323 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
324 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
325 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
328 ; SSSE3-LABEL: zext_16i8_to_2i64:
329 ; SSSE3: # %bb.0: # %entry
330 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
333 ; SSE41-LABEL: zext_16i8_to_2i64:
334 ; SSE41: # %bb.0: # %entry
335 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
338 ; AVX-LABEL: zext_16i8_to_2i64:
339 ; AVX: # %bb.0: # %entry
340 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
343 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
344 %C = zext <2 x i8> %B to <2 x i64>
348 define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
349 ; SSE2-LABEL: zext_16i8_to_4i64:
350 ; SSE2: # %bb.0: # %entry
351 ; SSE2-NEXT: movdqa %xmm0, %xmm1
352 ; SSE2-NEXT: pxor %xmm2, %xmm2
353 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
354 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
355 ; SSE2-NEXT: movdqa %xmm1, %xmm0
356 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
357 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
360 ; SSSE3-LABEL: zext_16i8_to_4i64:
361 ; SSSE3: # %bb.0: # %entry
362 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
363 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
364 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
367 ; SSE41-LABEL: zext_16i8_to_4i64:
368 ; SSE41: # %bb.0: # %entry
369 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
370 ; SSE41-NEXT: psrld $16, %xmm0
371 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
372 ; SSE41-NEXT: movdqa %xmm2, %xmm0
375 ; AVX1-LABEL: zext_16i8_to_4i64:
376 ; AVX1: # %bb.0: # %entry
377 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
378 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
379 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
380 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
383 ; AVX2-LABEL: zext_16i8_to_4i64:
384 ; AVX2: # %bb.0: # %entry
385 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
388 ; AVX512-LABEL: zext_16i8_to_4i64:
389 ; AVX512: # %bb.0: # %entry
390 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
393 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
394 %C = zext <4 x i8> %B to <4 x i64>
398 define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp {
399 ; SSE2-LABEL: zext_16i8_to_8i64:
400 ; SSE2: # %bb.0: # %entry
401 ; SSE2-NEXT: movdqa %xmm0, %xmm3
402 ; SSE2-NEXT: pxor %xmm4, %xmm4
403 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
404 ; SSE2-NEXT: movdqa %xmm3, %xmm1
405 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
406 ; SSE2-NEXT: movdqa %xmm1, %xmm0
407 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
408 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
409 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
410 ; SSE2-NEXT: movdqa %xmm3, %xmm2
411 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
412 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
415 ; SSSE3-LABEL: zext_16i8_to_8i64:
416 ; SSSE3: # %bb.0: # %entry
417 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
418 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
419 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
420 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
421 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
422 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero
423 ; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero
426 ; SSE41-LABEL: zext_16i8_to_8i64:
427 ; SSE41: # %bb.0: # %entry
428 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
429 ; SSE41-NEXT: movdqa %xmm0, %xmm1
430 ; SSE41-NEXT: psrld $16, %xmm1
431 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
432 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
433 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
434 ; SSE41-NEXT: psrlq $48, %xmm0
435 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
436 ; SSE41-NEXT: movdqa %xmm4, %xmm0
439 ; AVX1-LABEL: zext_16i8_to_8i64:
440 ; AVX1: # %bb.0: # %entry
441 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
442 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2
443 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
444 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
445 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
446 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
447 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
448 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
449 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
450 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
453 ; AVX2-LABEL: zext_16i8_to_8i64:
454 ; AVX2: # %bb.0: # %entry
455 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
456 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
457 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
458 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
461 ; AVX512-LABEL: zext_16i8_to_8i64:
462 ; AVX512: # %bb.0: # %entry
463 ; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
466 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
467 %C = zext <8 x i8> %B to <8 x i64>
471 define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
472 ; SSE2-LABEL: zext_8i16_to_4i32:
473 ; SSE2: # %bb.0: # %entry
474 ; SSE2-NEXT: pxor %xmm1, %xmm1
475 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
478 ; SSSE3-LABEL: zext_8i16_to_4i32:
479 ; SSSE3: # %bb.0: # %entry
480 ; SSSE3-NEXT: pxor %xmm1, %xmm1
481 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
484 ; SSE41-LABEL: zext_8i16_to_4i32:
485 ; SSE41: # %bb.0: # %entry
486 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
489 ; AVX-LABEL: zext_8i16_to_4i32:
490 ; AVX: # %bb.0: # %entry
491 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
494 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
495 %C = zext <4 x i16> %B to <4 x i32>
499 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
500 ; SSE2-LABEL: zext_8i16_to_8i32:
501 ; SSE2: # %bb.0: # %entry
502 ; SSE2-NEXT: movdqa %xmm0, %xmm1
503 ; SSE2-NEXT: pxor %xmm2, %xmm2
504 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
505 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
508 ; SSSE3-LABEL: zext_8i16_to_8i32:
509 ; SSSE3: # %bb.0: # %entry
510 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
511 ; SSSE3-NEXT: pxor %xmm2, %xmm2
512 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
513 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
516 ; SSE41-LABEL: zext_8i16_to_8i32:
517 ; SSE41: # %bb.0: # %entry
518 ; SSE41-NEXT: movdqa %xmm0, %xmm1
519 ; SSE41-NEXT: pxor %xmm2, %xmm2
520 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
521 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
524 ; AVX1-LABEL: zext_8i16_to_8i32:
525 ; AVX1: # %bb.0: # %entry
526 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
527 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
528 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
529 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
532 ; AVX2-LABEL: zext_8i16_to_8i32:
533 ; AVX2: # %bb.0: # %entry
534 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
537 ; AVX512-LABEL: zext_8i16_to_8i32:
538 ; AVX512: # %bb.0: # %entry
539 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
542 %B = zext <8 x i16> %A to <8 x i32>
546 define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp {
547 ; SSE2-LABEL: zext_16i16_to_16i32:
548 ; SSE2: # %bb.0: # %entry
549 ; SSE2-NEXT: movdqa %xmm1, %xmm3
550 ; SSE2-NEXT: movdqa %xmm0, %xmm1
551 ; SSE2-NEXT: pxor %xmm4, %xmm4
552 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
553 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
554 ; SSE2-NEXT: movdqa %xmm3, %xmm2
555 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
556 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
559 ; SSSE3-LABEL: zext_16i16_to_16i32:
560 ; SSSE3: # %bb.0: # %entry
561 ; SSSE3-NEXT: movdqa %xmm1, %xmm3
562 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
563 ; SSSE3-NEXT: pxor %xmm4, %xmm4
564 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
565 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
566 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
567 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
568 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
571 ; SSE41-LABEL: zext_16i16_to_16i32:
572 ; SSE41: # %bb.0: # %entry
573 ; SSE41-NEXT: movdqa %xmm1, %xmm3
574 ; SSE41-NEXT: movdqa %xmm0, %xmm1
575 ; SSE41-NEXT: pxor %xmm4, %xmm4
576 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
577 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
578 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
579 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
582 ; AVX1-LABEL: zext_16i16_to_16i32:
583 ; AVX1: # %bb.0: # %entry
584 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
585 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
586 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
587 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
588 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
589 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
590 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
591 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
592 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
595 ; AVX2-LABEL: zext_16i16_to_16i32:
596 ; AVX2: # %bb.0: # %entry
597 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
598 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
599 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
600 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
603 ; AVX512-LABEL: zext_16i16_to_16i32:
604 ; AVX512: # %bb.0: # %entry
605 ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
608 %B = zext <16 x i16> %A to <16 x i32>
612 define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
613 ; SSE2-LABEL: zext_8i16_to_2i64:
614 ; SSE2: # %bb.0: # %entry
615 ; SSE2-NEXT: pxor %xmm1, %xmm1
616 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
617 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
620 ; SSSE3-LABEL: zext_8i16_to_2i64:
621 ; SSSE3: # %bb.0: # %entry
622 ; SSSE3-NEXT: pxor %xmm1, %xmm1
623 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
624 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
627 ; SSE41-LABEL: zext_8i16_to_2i64:
628 ; SSE41: # %bb.0: # %entry
629 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
632 ; AVX-LABEL: zext_8i16_to_2i64:
633 ; AVX: # %bb.0: # %entry
634 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
637 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
638 %C = zext <2 x i16> %B to <2 x i64>
642 define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
643 ; SSE2-LABEL: zext_8i16_to_4i64:
644 ; SSE2: # %bb.0: # %entry
645 ; SSE2-NEXT: movdqa %xmm0, %xmm1
646 ; SSE2-NEXT: pxor %xmm2, %xmm2
647 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
648 ; SSE2-NEXT: movdqa %xmm1, %xmm0
649 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
650 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
653 ; SSSE3-LABEL: zext_8i16_to_4i64:
654 ; SSSE3: # %bb.0: # %entry
655 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
656 ; SSSE3-NEXT: pxor %xmm2, %xmm2
657 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
658 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
659 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
660 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
663 ; SSE41-LABEL: zext_8i16_to_4i64:
664 ; SSE41: # %bb.0: # %entry
665 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
666 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
667 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
668 ; SSE41-NEXT: movdqa %xmm2, %xmm0
671 ; AVX1-LABEL: zext_8i16_to_4i64:
672 ; AVX1: # %bb.0: # %entry
673 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
674 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
675 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
676 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
679 ; AVX2-LABEL: zext_8i16_to_4i64:
680 ; AVX2: # %bb.0: # %entry
681 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
684 ; AVX512-LABEL: zext_8i16_to_4i64:
685 ; AVX512: # %bb.0: # %entry
686 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
689 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
690 %C = zext <4 x i16> %B to <4 x i64>
694 define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp {
695 ; SSE2-LABEL: zext_8i16_to_8i64:
696 ; SSE2: # %bb.0: # %entry
697 ; SSE2-NEXT: movdqa %xmm0, %xmm3
698 ; SSE2-NEXT: pxor %xmm4, %xmm4
699 ; SSE2-NEXT: movdqa %xmm0, %xmm1
700 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
701 ; SSE2-NEXT: movdqa %xmm1, %xmm0
702 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
703 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
704 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
705 ; SSE2-NEXT: movdqa %xmm3, %xmm2
706 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
707 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
710 ; SSSE3-LABEL: zext_8i16_to_8i64:
711 ; SSSE3: # %bb.0: # %entry
712 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
713 ; SSSE3-NEXT: pxor %xmm4, %xmm4
714 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
715 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
716 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
717 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
718 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
719 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
720 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
721 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
722 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
725 ; SSE41-LABEL: zext_8i16_to_8i64:
726 ; SSE41: # %bb.0: # %entry
727 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
728 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
729 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
730 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
731 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
732 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
733 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
734 ; SSE41-NEXT: movdqa %xmm4, %xmm0
737 ; AVX1-LABEL: zext_8i16_to_8i64:
738 ; AVX1: # %bb.0: # %entry
739 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
740 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
741 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
742 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
743 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
744 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
745 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
746 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
747 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
748 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
751 ; AVX2-LABEL: zext_8i16_to_8i64:
752 ; AVX2: # %bb.0: # %entry
753 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
754 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
755 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
756 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
759 ; AVX512-LABEL: zext_8i16_to_8i64:
760 ; AVX512: # %bb.0: # %entry
761 ; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
764 %B = zext <8 x i16> %A to <8 x i64>
768 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
769 ; SSE2-LABEL: zext_4i32_to_2i64:
770 ; SSE2: # %bb.0: # %entry
771 ; SSE2-NEXT: xorps %xmm1, %xmm1
772 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
775 ; SSSE3-LABEL: zext_4i32_to_2i64:
776 ; SSSE3: # %bb.0: # %entry
777 ; SSSE3-NEXT: xorps %xmm1, %xmm1
778 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
781 ; SSE41-LABEL: zext_4i32_to_2i64:
782 ; SSE41: # %bb.0: # %entry
783 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
786 ; AVX-LABEL: zext_4i32_to_2i64:
787 ; AVX: # %bb.0: # %entry
788 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
791 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
792 %C = zext <2 x i32> %B to <2 x i64>
796 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
797 ; SSE2-LABEL: zext_4i32_to_4i64:
798 ; SSE2: # %bb.0: # %entry
799 ; SSE2-NEXT: movaps %xmm0, %xmm1
800 ; SSE2-NEXT: xorps %xmm2, %xmm2
801 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
802 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
805 ; SSSE3-LABEL: zext_4i32_to_4i64:
806 ; SSSE3: # %bb.0: # %entry
807 ; SSSE3-NEXT: movaps %xmm0, %xmm1
808 ; SSSE3-NEXT: xorps %xmm2, %xmm2
809 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
810 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
813 ; SSE41-LABEL: zext_4i32_to_4i64:
814 ; SSE41: # %bb.0: # %entry
815 ; SSE41-NEXT: movdqa %xmm0, %xmm1
816 ; SSE41-NEXT: pxor %xmm2, %xmm2
817 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
818 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
821 ; AVX1-LABEL: zext_4i32_to_4i64:
822 ; AVX1: # %bb.0: # %entry
823 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
824 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
825 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
826 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
829 ; AVX2-LABEL: zext_4i32_to_4i64:
830 ; AVX2: # %bb.0: # %entry
831 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
834 ; AVX512-LABEL: zext_4i32_to_4i64:
835 ; AVX512: # %bb.0: # %entry
836 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
839 %B = zext <4 x i32> %A to <4 x i64>
843 define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp {
844 ; SSE2-LABEL: zext_8i32_to_8i64:
845 ; SSE2: # %bb.0: # %entry
846 ; SSE2-NEXT: movaps %xmm1, %xmm3
847 ; SSE2-NEXT: movaps %xmm0, %xmm1
848 ; SSE2-NEXT: xorps %xmm4, %xmm4
849 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
850 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
851 ; SSE2-NEXT: movaps %xmm3, %xmm2
852 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
853 ; SSE2-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
856 ; SSSE3-LABEL: zext_8i32_to_8i64:
857 ; SSSE3: # %bb.0: # %entry
858 ; SSSE3-NEXT: movaps %xmm1, %xmm3
859 ; SSSE3-NEXT: movaps %xmm0, %xmm1
860 ; SSSE3-NEXT: xorps %xmm4, %xmm4
861 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
862 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
863 ; SSSE3-NEXT: movaps %xmm3, %xmm2
864 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
865 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
868 ; SSE41-LABEL: zext_8i32_to_8i64:
869 ; SSE41: # %bb.0: # %entry
870 ; SSE41-NEXT: movdqa %xmm1, %xmm3
871 ; SSE41-NEXT: movdqa %xmm0, %xmm1
872 ; SSE41-NEXT: pxor %xmm4, %xmm4
873 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
874 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
875 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero
876 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
879 ; AVX1-LABEL: zext_8i32_to_8i64:
880 ; AVX1: # %bb.0: # %entry
881 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
882 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
883 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
884 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
885 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
886 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
887 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
888 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
889 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
892 ; AVX2-LABEL: zext_8i32_to_8i64:
893 ; AVX2: # %bb.0: # %entry
894 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
895 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
896 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
897 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
900 ; AVX512-LABEL: zext_8i32_to_8i64:
901 ; AVX512: # %bb.0: # %entry
902 ; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
905 %B = zext <8 x i32> %A to <8 x i64>
909 define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
910 ; SSE2-LABEL: load_zext_2i8_to_2i64:
911 ; SSE2: # %bb.0: # %entry
912 ; SSE2-NEXT: movzwl (%rdi), %eax
913 ; SSE2-NEXT: movd %eax, %xmm0
914 ; SSE2-NEXT: pxor %xmm1, %xmm1
915 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
916 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
917 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
920 ; SSSE3-LABEL: load_zext_2i8_to_2i64:
921 ; SSSE3: # %bb.0: # %entry
922 ; SSSE3-NEXT: movzwl (%rdi), %eax
923 ; SSSE3-NEXT: movd %eax, %xmm0
924 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
927 ; SSE41-LABEL: load_zext_2i8_to_2i64:
928 ; SSE41: # %bb.0: # %entry
929 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
932 ; AVX-LABEL: load_zext_2i8_to_2i64:
933 ; AVX: # %bb.0: # %entry
934 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
937 %X = load <2 x i8>, <2 x i8>* %ptr
938 %Y = zext <2 x i8> %X to <2 x i64>
942 define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
943 ; SSE2-LABEL: load_zext_4i8_to_4i32:
944 ; SSE2: # %bb.0: # %entry
945 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
946 ; SSE2-NEXT: pxor %xmm1, %xmm1
947 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
948 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
951 ; SSSE3-LABEL: load_zext_4i8_to_4i32:
952 ; SSSE3: # %bb.0: # %entry
953 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
954 ; SSSE3-NEXT: pxor %xmm1, %xmm1
955 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
956 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
959 ; SSE41-LABEL: load_zext_4i8_to_4i32:
960 ; SSE41: # %bb.0: # %entry
961 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
964 ; AVX-LABEL: load_zext_4i8_to_4i32:
965 ; AVX: # %bb.0: # %entry
966 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
969 %X = load <4 x i8>, <4 x i8>* %ptr
970 %Y = zext <4 x i8> %X to <4 x i32>
974 define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
975 ; SSE2-LABEL: load_zext_4i8_to_4i64:
976 ; SSE2: # %bb.0: # %entry
977 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
978 ; SSE2-NEXT: pxor %xmm2, %xmm2
979 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
980 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
981 ; SSE2-NEXT: movdqa %xmm1, %xmm0
982 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
983 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
986 ; SSSE3-LABEL: load_zext_4i8_to_4i64:
987 ; SSSE3: # %bb.0: # %entry
988 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
989 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
990 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
991 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
994 ; SSE41-LABEL: load_zext_4i8_to_4i64:
995 ; SSE41: # %bb.0: # %entry
996 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
997 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1000 ; AVX1-LABEL: load_zext_4i8_to_4i64:
1001 ; AVX1: # %bb.0: # %entry
1002 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1003 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1004 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1007 ; AVX2-LABEL: load_zext_4i8_to_4i64:
1008 ; AVX2: # %bb.0: # %entry
1009 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1012 ; AVX512-LABEL: load_zext_4i8_to_4i64:
1013 ; AVX512: # %bb.0: # %entry
1014 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1017 %X = load <4 x i8>, <4 x i8>* %ptr
1018 %Y = zext <4 x i8> %X to <4 x i64>
1022 define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
1023 ; SSE2-LABEL: load_zext_8i8_to_8i16:
1024 ; SSE2: # %bb.0: # %entry
1025 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1026 ; SSE2-NEXT: pxor %xmm1, %xmm1
1027 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1030 ; SSSE3-LABEL: load_zext_8i8_to_8i16:
1031 ; SSSE3: # %bb.0: # %entry
1032 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1033 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1034 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1037 ; SSE41-LABEL: load_zext_8i8_to_8i16:
1038 ; SSE41: # %bb.0: # %entry
1039 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1042 ; AVX-LABEL: load_zext_8i8_to_8i16:
1043 ; AVX: # %bb.0: # %entry
1044 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1047 %X = load <8 x i8>, <8 x i8>* %ptr
1048 %Y = zext <8 x i8> %X to <8 x i16>
1052 define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
1053 ; SSE2-LABEL: load_zext_8i8_to_8i32:
1054 ; SSE2: # %bb.0: # %entry
1055 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1056 ; SSE2-NEXT: pxor %xmm2, %xmm2
1057 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1058 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1059 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1060 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1063 ; SSSE3-LABEL: load_zext_8i8_to_8i32:
1064 ; SSSE3: # %bb.0: # %entry
1065 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1066 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1067 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1068 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1069 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1070 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1073 ; SSE41-LABEL: load_zext_8i8_to_8i32:
1074 ; SSE41: # %bb.0: # %entry
1075 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1076 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1079 ; AVX1-LABEL: load_zext_8i8_to_8i32:
1080 ; AVX1: # %bb.0: # %entry
1081 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1082 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1083 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1086 ; AVX2-LABEL: load_zext_8i8_to_8i32:
1087 ; AVX2: # %bb.0: # %entry
1088 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1091 ; AVX512-LABEL: load_zext_8i8_to_8i32:
1092 ; AVX512: # %bb.0: # %entry
1093 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1096 %X = load <8 x i8>, <8 x i8>* %ptr
1097 %Y = zext <8 x i8> %X to <8 x i32>
1101 define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) {
1102 ; SSE2-LABEL: load_zext_16i8_to_8i32:
1103 ; SSE2: # %bb.0: # %entry
1104 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1105 ; SSE2-NEXT: pxor %xmm2, %xmm2
1106 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1107 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1108 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1109 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1112 ; SSSE3-LABEL: load_zext_16i8_to_8i32:
1113 ; SSSE3: # %bb.0: # %entry
1114 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1115 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1116 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1117 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1118 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1119 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1122 ; SSE41-LABEL: load_zext_16i8_to_8i32:
1123 ; SSE41: # %bb.0: # %entry
1124 ; SSE41-NEXT: movdqa (%rdi), %xmm1
1125 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1126 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
1127 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1130 ; AVX1-LABEL: load_zext_16i8_to_8i32:
1131 ; AVX1: # %bb.0: # %entry
1132 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
1133 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1134 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1135 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1136 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1139 ; AVX2-LABEL: load_zext_16i8_to_8i32:
1140 ; AVX2: # %bb.0: # %entry
1141 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1144 ; AVX512-LABEL: load_zext_16i8_to_8i32:
1145 ; AVX512: # %bb.0: # %entry
1146 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1149 %X = load <16 x i8>, <16 x i8>* %ptr
1150 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1151 %Z = zext <8 x i8> %Y to <8 x i32>
1155 define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) {
1156 ; SSE2-LABEL: load_zext_8i8_to_8i64:
1157 ; SSE2: # %bb.0: # %entry
1158 ; SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
1159 ; SSE2-NEXT: pxor %xmm4, %xmm4
1160 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1161 ; SSE2-NEXT: movdqa %xmm3, %xmm1
1162 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1163 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1164 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
1165 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1166 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1167 ; SSE2-NEXT: movdqa %xmm3, %xmm2
1168 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
1169 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
1172 ; SSSE3-LABEL: load_zext_8i8_to_8i64:
1173 ; SSSE3: # %bb.0: # %entry
1174 ; SSSE3-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
1175 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
1176 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1177 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
1178 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
1179 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
1180 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero
1181 ; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero
1184 ; SSE41-LABEL: load_zext_8i8_to_8i64:
1185 ; SSE41: # %bb.0: # %entry
1186 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1187 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1188 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1189 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1192 ; AVX1-LABEL: load_zext_8i8_to_8i64:
1193 ; AVX1: # %bb.0: # %entry
1194 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1195 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1196 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1197 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1198 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
1199 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
1202 ; AVX2-LABEL: load_zext_8i8_to_8i64:
1203 ; AVX2: # %bb.0: # %entry
1204 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1205 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1208 ; AVX512-LABEL: load_zext_8i8_to_8i64:
1209 ; AVX512: # %bb.0: # %entry
1210 ; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
1213 %X = load <8 x i8>, <8 x i8>* %ptr
1214 %Y = zext <8 x i8> %X to <8 x i64>
1218 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
1219 ; SSE2-LABEL: load_zext_16i8_to_16i16:
1220 ; SSE2: # %bb.0: # %entry
1221 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1222 ; SSE2-NEXT: pxor %xmm2, %xmm2
1223 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1224 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1225 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1228 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
1229 ; SSSE3: # %bb.0: # %entry
1230 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1231 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1232 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1233 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1234 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1237 ; SSE41-LABEL: load_zext_16i8_to_16i16:
1238 ; SSE41: # %bb.0: # %entry
1239 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1240 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1243 ; AVX1-LABEL: load_zext_16i8_to_16i16:
1244 ; AVX1: # %bb.0: # %entry
1245 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1246 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1247 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1250 ; AVX2-LABEL: load_zext_16i8_to_16i16:
1251 ; AVX2: # %bb.0: # %entry
1252 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1255 ; AVX512-LABEL: load_zext_16i8_to_16i16:
1256 ; AVX512: # %bb.0: # %entry
1257 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1260 %X = load <16 x i8>, <16 x i8>* %ptr
1261 %Y = zext <16 x i8> %X to <16 x i16>
1265 define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
1266 ; SSE2-LABEL: load_zext_2i16_to_2i64:
1267 ; SSE2: # %bb.0: # %entry
1268 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1269 ; SSE2-NEXT: pxor %xmm1, %xmm1
1270 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1271 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1274 ; SSSE3-LABEL: load_zext_2i16_to_2i64:
1275 ; SSSE3: # %bb.0: # %entry
1276 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1277 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1278 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1279 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1282 ; SSE41-LABEL: load_zext_2i16_to_2i64:
1283 ; SSE41: # %bb.0: # %entry
1284 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1287 ; AVX-LABEL: load_zext_2i16_to_2i64:
1288 ; AVX: # %bb.0: # %entry
1289 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1292 %X = load <2 x i16>, <2 x i16>* %ptr
1293 %Y = zext <2 x i16> %X to <2 x i64>
1297 define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
1298 ; SSE2-LABEL: load_zext_4i16_to_4i32:
1299 ; SSE2: # %bb.0: # %entry
1300 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1301 ; SSE2-NEXT: pxor %xmm1, %xmm1
1302 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1305 ; SSSE3-LABEL: load_zext_4i16_to_4i32:
1306 ; SSSE3: # %bb.0: # %entry
1307 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1308 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1309 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1312 ; SSE41-LABEL: load_zext_4i16_to_4i32:
1313 ; SSE41: # %bb.0: # %entry
1314 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1317 ; AVX-LABEL: load_zext_4i16_to_4i32:
1318 ; AVX: # %bb.0: # %entry
1319 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1322 %X = load <4 x i16>, <4 x i16>* %ptr
1323 %Y = zext <4 x i16> %X to <4 x i32>
1327 define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
1328 ; SSE2-LABEL: load_zext_4i16_to_4i64:
1329 ; SSE2: # %bb.0: # %entry
1330 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1331 ; SSE2-NEXT: pxor %xmm2, %xmm2
1332 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1333 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1334 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1335 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1338 ; SSSE3-LABEL: load_zext_4i16_to_4i64:
1339 ; SSSE3: # %bb.0: # %entry
1340 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1341 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1342 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1343 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1344 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1345 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1348 ; SSE41-LABEL: load_zext_4i16_to_4i64:
1349 ; SSE41: # %bb.0: # %entry
1350 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1351 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1354 ; AVX1-LABEL: load_zext_4i16_to_4i64:
1355 ; AVX1: # %bb.0: # %entry
1356 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1357 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1358 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1361 ; AVX2-LABEL: load_zext_4i16_to_4i64:
1362 ; AVX2: # %bb.0: # %entry
1363 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1366 ; AVX512-LABEL: load_zext_4i16_to_4i64:
1367 ; AVX512: # %bb.0: # %entry
1368 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1371 %X = load <4 x i16>, <4 x i16>* %ptr
1372 %Y = zext <4 x i16> %X to <4 x i64>
1376 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
1377 ; SSE2-LABEL: load_zext_8i16_to_8i32:
1378 ; SSE2: # %bb.0: # %entry
1379 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1380 ; SSE2-NEXT: pxor %xmm2, %xmm2
1381 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1382 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1383 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1386 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
1387 ; SSSE3: # %bb.0: # %entry
1388 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1389 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1390 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1391 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1392 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1395 ; SSE41-LABEL: load_zext_8i16_to_8i32:
1396 ; SSE41: # %bb.0: # %entry
1397 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1398 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1401 ; AVX1-LABEL: load_zext_8i16_to_8i32:
1402 ; AVX1: # %bb.0: # %entry
1403 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1404 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1405 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1408 ; AVX2-LABEL: load_zext_8i16_to_8i32:
1409 ; AVX2: # %bb.0: # %entry
1410 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1413 ; AVX512-LABEL: load_zext_8i16_to_8i32:
1414 ; AVX512: # %bb.0: # %entry
1415 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1418 %X = load <8 x i16>, <8 x i16>* %ptr
1419 %Y = zext <8 x i16> %X to <8 x i32>
1423 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
1424 ; SSE2-LABEL: load_zext_2i32_to_2i64:
1425 ; SSE2: # %bb.0: # %entry
1426 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1427 ; SSE2-NEXT: xorps %xmm1, %xmm1
1428 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1431 ; SSSE3-LABEL: load_zext_2i32_to_2i64:
1432 ; SSSE3: # %bb.0: # %entry
1433 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1434 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1435 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1438 ; SSE41-LABEL: load_zext_2i32_to_2i64:
1439 ; SSE41: # %bb.0: # %entry
1440 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1443 ; AVX-LABEL: load_zext_2i32_to_2i64:
1444 ; AVX: # %bb.0: # %entry
1445 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1448 %X = load <2 x i32>, <2 x i32>* %ptr
1449 %Y = zext <2 x i32> %X to <2 x i64>
1453 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
1454 ; SSE2-LABEL: load_zext_4i32_to_4i64:
1455 ; SSE2: # %bb.0: # %entry
1456 ; SSE2-NEXT: movaps (%rdi), %xmm1
1457 ; SSE2-NEXT: xorps %xmm2, %xmm2
1458 ; SSE2-NEXT: movaps %xmm1, %xmm0
1459 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1460 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1463 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
1464 ; SSSE3: # %bb.0: # %entry
1465 ; SSSE3-NEXT: movaps (%rdi), %xmm1
1466 ; SSSE3-NEXT: xorps %xmm2, %xmm2
1467 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1468 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1469 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1472 ; SSE41-LABEL: load_zext_4i32_to_4i64:
1473 ; SSE41: # %bb.0: # %entry
1474 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1475 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1478 ; AVX1-LABEL: load_zext_4i32_to_4i64:
1479 ; AVX1: # %bb.0: # %entry
1480 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1481 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1482 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1485 ; AVX2-LABEL: load_zext_4i32_to_4i64:
1486 ; AVX2: # %bb.0: # %entry
1487 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1490 ; AVX512-LABEL: load_zext_4i32_to_4i64:
1491 ; AVX512: # %bb.0: # %entry
1492 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1495 %X = load <4 x i32>, <4 x i32>* %ptr
1496 %Y = zext <4 x i32> %X to <4 x i64>
1500 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
1501 ; SSE2-LABEL: zext_8i8_to_8i32:
1502 ; SSE2: # %bb.0: # %entry
1503 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1504 ; SSE2-NEXT: pxor %xmm2, %xmm2
1505 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1506 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1507 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1508 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1511 ; SSSE3-LABEL: zext_8i8_to_8i32:
1512 ; SSSE3: # %bb.0: # %entry
1513 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1514 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1515 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1516 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1517 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1518 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1521 ; SSE41-LABEL: zext_8i8_to_8i32:
1522 ; SSE41: # %bb.0: # %entry
1523 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1524 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1525 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1526 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1529 ; AVX1-LABEL: zext_8i8_to_8i32:
1530 ; AVX1: # %bb.0: # %entry
1531 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1532 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1533 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1534 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1537 ; AVX2-LABEL: zext_8i8_to_8i32:
1538 ; AVX2: # %bb.0: # %entry
1539 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1542 ; AVX512-LABEL: zext_8i8_to_8i32:
1543 ; AVX512: # %bb.0: # %entry
1544 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1547 %t = zext <8 x i8> %z to <8 x i32>
1551 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
1552 ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
1553 ; SSE2: # %bb.0: # %entry
1554 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1555 ; SSE2-NEXT: pxor %xmm2, %xmm2
1556 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1557 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1560 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
1561 ; SSSE3: # %bb.0: # %entry
1562 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1563 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1564 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1565 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1568 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
1569 ; SSE41: # %bb.0: # %entry
1570 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1571 ; SSE41-NEXT: pxor %xmm2, %xmm2
1572 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1573 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1576 ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
1577 ; AVX1: # %bb.0: # %entry
1578 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1579 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1580 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1581 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1584 ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
1585 ; AVX2: # %bb.0: # %entry
1586 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1589 ; AVX512-LABEL: shuf_zext_8i16_to_8i32:
1590 ; AVX512: # %bb.0: # %entry
1591 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1594 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
1595 %Z = bitcast <16 x i16> %B to <8 x i32>
1599 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1600 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
1601 ; SSE2: # %bb.0: # %entry
1602 ; SSE2-NEXT: movaps %xmm0, %xmm1
1603 ; SSE2-NEXT: xorps %xmm2, %xmm2
1604 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1605 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1608 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
1609 ; SSSE3: # %bb.0: # %entry
1610 ; SSSE3-NEXT: movaps %xmm0, %xmm1
1611 ; SSSE3-NEXT: xorps %xmm2, %xmm2
1612 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1613 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1616 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1617 ; SSE41: # %bb.0: # %entry
1618 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1619 ; SSE41-NEXT: pxor %xmm2, %xmm2
1620 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1621 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1624 ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
1625 ; AVX1: # %bb.0: # %entry
1626 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1627 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1628 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1629 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1632 ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
1633 ; AVX2: # %bb.0: # %entry
1634 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1637 ; AVX512-LABEL: shuf_zext_4i32_to_4i64:
1638 ; AVX512: # %bb.0: # %entry
1639 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1642 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
1643 %Z = bitcast <8 x i32> %B to <4 x i64>
1647 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
1648 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
1649 ; SSE2: # %bb.0: # %entry
1650 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1651 ; SSE2-NEXT: pxor %xmm2, %xmm2
1652 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1653 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1654 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1655 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1658 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
1659 ; SSSE3: # %bb.0: # %entry
1660 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1661 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1662 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1663 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1664 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1665 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1668 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1669 ; SSE41: # %bb.0: # %entry
1670 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1671 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1672 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1673 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1676 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
1677 ; AVX1: # %bb.0: # %entry
1678 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1679 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1680 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1681 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1684 ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
1685 ; AVX2: # %bb.0: # %entry
1686 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1689 ; AVX512-LABEL: shuf_zext_8i8_to_8i32:
1690 ; AVX512: # %bb.0: # %entry
1691 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1694 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
1695 %Z = bitcast <32 x i8> %B to <8 x i32>
1699 define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
1700 ; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
1701 ; SSE2: # %bb.0: # %entry
1702 ; SSE2-NEXT: pxor %xmm1, %xmm1
1703 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1704 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1705 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1708 ; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
1709 ; SSSE3: # %bb.0: # %entry
1710 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1713 ; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
1714 ; SSE41: # %bb.0: # %entry
1715 ; SSE41-NEXT: psrlq $48, %xmm0
1716 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1719 ; AVX1-LABEL: shuf_zext_16i8_to_2i64_offset6:
1720 ; AVX1: # %bb.0: # %entry
1721 ; AVX1-NEXT: vpsrlq $48, %xmm0, %xmm0
1722 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1725 ; AVX2-SLOW-LABEL: shuf_zext_16i8_to_2i64_offset6:
1726 ; AVX2-SLOW: # %bb.0: # %entry
1727 ; AVX2-SLOW-NEXT: vpsrlq $48, %xmm0, %xmm0
1728 ; AVX2-SLOW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1729 ; AVX2-SLOW-NEXT: retq
1731 ; AVX2-FAST-LABEL: shuf_zext_16i8_to_2i64_offset6:
1732 ; AVX2-FAST: # %bb.0: # %entry
1733 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1734 ; AVX2-FAST-NEXT: retq
1736 ; AVX512F-LABEL: shuf_zext_16i8_to_2i64_offset6:
1737 ; AVX512F: # %bb.0: # %entry
1738 ; AVX512F-NEXT: vpsrlq $48, %xmm0, %xmm0
1739 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1740 ; AVX512F-NEXT: retq
1742 ; AVX512BW-LABEL: shuf_zext_16i8_to_2i64_offset6:
1743 ; AVX512BW: # %bb.0: # %entry
1744 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1745 ; AVX512BW-NEXT: retq
1747 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1748 %Z = bitcast <16 x i8> %B to <2 x i64>
1752 define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
1753 ; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1754 ; SSE2: # %bb.0: # %entry
1755 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1756 ; SSE2-NEXT: psrlq $8, %xmm1
1757 ; SSE2-NEXT: pxor %xmm2, %xmm2
1758 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1759 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1760 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1761 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1762 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1763 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1766 ; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
1767 ; SSSE3: # %bb.0: # %entry
1768 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1769 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
1770 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
1773 ; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
1774 ; SSE41: # %bb.0: # %entry
1775 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1776 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1777 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1778 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1779 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1780 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1783 ; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
1784 ; AVX1: # %bb.0: # %entry
1785 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1786 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1787 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1788 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1789 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1792 ; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1793 ; AVX2: # %bb.0: # %entry
1794 ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1795 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1798 ; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11:
1799 ; AVX512: # %bb.0: # %entry
1800 ; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1801 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1804 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1805 %Z = bitcast <32 x i8> %B to <4 x i64>
1809 define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
1810 ; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
1811 ; SSE2: # %bb.0: # %entry
1812 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1813 ; SSE2-NEXT: pxor %xmm1, %xmm1
1814 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1815 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1818 ; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
1819 ; SSSE3: # %bb.0: # %entry
1820 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1823 ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
1824 ; SSE41: # %bb.0: # %entry
1825 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1826 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1829 ; AVX1-LABEL: shuf_zext_8i16_to_2i64_offset6:
1830 ; AVX1: # %bb.0: # %entry
1831 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1832 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1835 ; AVX2-SLOW-LABEL: shuf_zext_8i16_to_2i64_offset6:
1836 ; AVX2-SLOW: # %bb.0: # %entry
1837 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1838 ; AVX2-SLOW-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1839 ; AVX2-SLOW-NEXT: retq
1841 ; AVX2-FAST-LABEL: shuf_zext_8i16_to_2i64_offset6:
1842 ; AVX2-FAST: # %bb.0: # %entry
1843 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1844 ; AVX2-FAST-NEXT: retq
1846 ; AVX512F-LABEL: shuf_zext_8i16_to_2i64_offset6:
1847 ; AVX512F: # %bb.0: # %entry
1848 ; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1849 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1850 ; AVX512F-NEXT: retq
1852 ; AVX512BW-LABEL: shuf_zext_8i16_to_2i64_offset6:
1853 ; AVX512BW: # %bb.0: # %entry
1854 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1855 ; AVX512BW-NEXT: retq
1857 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
1858 %Z = bitcast <8 x i16> %B to <2 x i64>
1862 define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
1863 ; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1864 ; SSE2: # %bb.0: # %entry
1865 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1866 ; SSE2-NEXT: pxor %xmm2, %xmm2
1867 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1868 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1869 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1870 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1873 ; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
1874 ; SSSE3: # %bb.0: # %entry
1875 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1876 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1877 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1878 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1879 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1880 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1883 ; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
1884 ; SSE41: # %bb.0: # %entry
1885 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1886 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1887 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1888 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1889 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1892 ; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
1893 ; AVX1: # %bb.0: # %entry
1894 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1895 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1896 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1897 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1898 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1901 ; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1902 ; AVX2: # %bb.0: # %entry
1903 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1904 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1907 ; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2:
1908 ; AVX512: # %bb.0: # %entry
1909 ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1910 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1913 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
1914 %Z = bitcast <16 x i16> %B to <4 x i64>
1918 define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
1919 ; SSE2-LABEL: shuf_zext_8i16_to_4i32_offset1:
1920 ; SSE2: # %bb.0: # %entry
1921 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1922 ; SSE2-NEXT: pxor %xmm1, %xmm1
1923 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1926 ; SSSE3-LABEL: shuf_zext_8i16_to_4i32_offset1:
1927 ; SSSE3: # %bb.0: # %entry
1928 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1929 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1930 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1933 ; SSE41-LABEL: shuf_zext_8i16_to_4i32_offset1:
1934 ; SSE41: # %bb.0: # %entry
1935 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1936 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1939 ; AVX1-LABEL: shuf_zext_8i16_to_4i32_offset1:
1940 ; AVX1: # %bb.0: # %entry
1941 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1942 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1945 ; AVX2-SLOW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1946 ; AVX2-SLOW: # %bb.0: # %entry
1947 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1948 ; AVX2-SLOW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1949 ; AVX2-SLOW-NEXT: retq
1951 ; AVX2-FAST-LABEL: shuf_zext_8i16_to_4i32_offset1:
1952 ; AVX2-FAST: # %bb.0: # %entry
1953 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1954 ; AVX2-FAST-NEXT: retq
1956 ; AVX512F-LABEL: shuf_zext_8i16_to_4i32_offset1:
1957 ; AVX512F: # %bb.0: # %entry
1958 ; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1959 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1960 ; AVX512F-NEXT: retq
1962 ; AVX512BW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1963 ; AVX512BW: # %bb.0: # %entry
1964 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1965 ; AVX512BW-NEXT: retq
1967 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
1968 %Z = bitcast <8 x i16> %B to <4 x i32>
1972 define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
1973 ; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1974 ; SSE2: # %bb.0: # %entry
1975 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1976 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1977 ; SSE2-NEXT: pxor %xmm2, %xmm2
1978 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1979 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1982 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
1983 ; SSSE3: # %bb.0: # %entry
1984 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1985 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1986 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1987 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1988 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1991 ; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
1992 ; SSE41: # %bb.0: # %entry
1993 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1994 ; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1995 ; SSE41-NEXT: pxor %xmm2, %xmm2
1996 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1997 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2000 ; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
2001 ; AVX1: # %bb.0: # %entry
2002 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2003 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2004 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2005 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2006 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2009 ; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
2010 ; AVX2: # %bb.0: # %entry
2011 ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2012 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2015 ; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3:
2016 ; AVX512: # %bb.0: # %entry
2017 ; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2018 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2021 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
2022 %Z = bitcast <16 x i16> %B to <8 x i32>
2026 define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
2027 ; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2028 ; SSE2: # %bb.0: # %entry
2029 ; SSE2-NEXT: pxor %xmm2, %xmm2
2030 ; SSE2-NEXT: movdqa %xmm1, %xmm0
2031 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2032 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2035 ; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
2036 ; SSSE3: # %bb.0: # %entry
2037 ; SSSE3-NEXT: pxor %xmm2, %xmm2
2038 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2039 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2040 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2043 ; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
2044 ; SSE41: # %bb.0: # %entry
2045 ; SSE41-NEXT: pxor %xmm2, %xmm2
2046 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
2047 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2050 ; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
2051 ; AVX1: # %bb.0: # %entry
2052 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2053 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2054 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2055 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2056 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2059 ; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2060 ; AVX2: # %bb.0: # %entry
2061 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2062 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2065 ; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8:
2066 ; AVX512: # %bb.0: # %entry
2067 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
2068 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2071 %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
2072 %Z = bitcast <16 x i16> %B to <8 x i32>
2076 define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
2077 ; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
2078 ; SSE: # %bb.0: # %entry
2079 ; SSE-NEXT: xorps %xmm1, %xmm1
2080 ; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2083 ; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
2084 ; AVX: # %bb.0: # %entry
2085 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
2086 ; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2089 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
2090 %Z = bitcast <4 x i32> %B to <2 x i64>
2094 define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
2095 ; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2096 ; SSE2: # %bb.0: # %entry
2097 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2098 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2099 ; SSE2-NEXT: pand %xmm1, %xmm0
2100 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2103 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
2104 ; SSSE3: # %bb.0: # %entry
2105 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2106 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2107 ; SSSE3-NEXT: pand %xmm1, %xmm0
2108 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2111 ; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
2112 ; SSE41: # %bb.0: # %entry
2113 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2114 ; SSE41-NEXT: pxor %xmm0, %xmm0
2115 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
2116 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2119 ; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
2120 ; AVX1: # %bb.0: # %entry
2121 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2122 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2123 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2124 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2127 ; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2128 ; AVX2: # %bb.0: # %entry
2129 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2130 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2133 ; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1:
2134 ; AVX512: # %bb.0: # %entry
2135 ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2136 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2139 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
2140 %Z = bitcast <8 x i32> %B to <4 x i64>
2144 define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) {
2145 ; SSE2-LABEL: zext_32i8_to_32i32:
2147 ; SSE2-NEXT: movq %rdi, %rax
2148 ; SSE2-NEXT: pxor %xmm2, %xmm2
2149 ; SSE2-NEXT: movdqa %xmm0, %xmm3
2150 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2151 ; SSE2-NEXT: movdqa %xmm3, %xmm8
2152 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2153 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2154 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2155 ; SSE2-NEXT: movdqa %xmm0, %xmm5
2156 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2157 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2158 ; SSE2-NEXT: movdqa %xmm1, %xmm6
2159 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2160 ; SSE2-NEXT: movdqa %xmm6, %xmm7
2161 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2162 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2163 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2164 ; SSE2-NEXT: movdqa %xmm1, %xmm4
2165 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2166 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2167 ; SSE2-NEXT: movdqa %xmm1, 112(%rdi)
2168 ; SSE2-NEXT: movdqa %xmm4, 96(%rdi)
2169 ; SSE2-NEXT: movdqa %xmm6, 80(%rdi)
2170 ; SSE2-NEXT: movdqa %xmm7, 64(%rdi)
2171 ; SSE2-NEXT: movdqa %xmm0, 48(%rdi)
2172 ; SSE2-NEXT: movdqa %xmm5, 32(%rdi)
2173 ; SSE2-NEXT: movdqa %xmm3, 16(%rdi)
2174 ; SSE2-NEXT: movdqa %xmm8, (%rdi)
2177 ; SSSE3-LABEL: zext_32i8_to_32i32:
2179 ; SSSE3-NEXT: movq %rdi, %rax
2180 ; SSSE3-NEXT: pxor %xmm2, %xmm2
2181 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
2182 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2183 ; SSSE3-NEXT: movdqa %xmm3, %xmm8
2184 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2185 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2186 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2187 ; SSSE3-NEXT: movdqa %xmm0, %xmm5
2188 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2189 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2190 ; SSSE3-NEXT: movdqa %xmm1, %xmm6
2191 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2192 ; SSSE3-NEXT: movdqa %xmm6, %xmm7
2193 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2194 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2195 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2196 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
2197 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2198 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2199 ; SSSE3-NEXT: movdqa %xmm1, 112(%rdi)
2200 ; SSSE3-NEXT: movdqa %xmm4, 96(%rdi)
2201 ; SSSE3-NEXT: movdqa %xmm6, 80(%rdi)
2202 ; SSSE3-NEXT: movdqa %xmm7, 64(%rdi)
2203 ; SSSE3-NEXT: movdqa %xmm0, 48(%rdi)
2204 ; SSSE3-NEXT: movdqa %xmm5, 32(%rdi)
2205 ; SSSE3-NEXT: movdqa %xmm3, 16(%rdi)
2206 ; SSSE3-NEXT: movdqa %xmm8, (%rdi)
2209 ; SSE41-LABEL: zext_32i8_to_32i32:
2211 ; SSE41-NEXT: movq %rdi, %rax
2212 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2213 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1]
2214 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2215 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
2216 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
2217 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
2218 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2219 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2220 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,1,1]
2221 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
2222 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
2223 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
2224 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
2225 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2226 ; SSE41-NEXT: movdqa %xmm1, 112(%rdi)
2227 ; SSE41-NEXT: movdqa %xmm7, 96(%rdi)
2228 ; SSE41-NEXT: movdqa %xmm6, 80(%rdi)
2229 ; SSE41-NEXT: movdqa %xmm5, 64(%rdi)
2230 ; SSE41-NEXT: movdqa %xmm0, 48(%rdi)
2231 ; SSE41-NEXT: movdqa %xmm4, 32(%rdi)
2232 ; SSE41-NEXT: movdqa %xmm3, 16(%rdi)
2233 ; SSE41-NEXT: movdqa %xmm2, (%rdi)
2236 ; AVX1-LABEL: zext_32i8_to_32i32:
2238 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2239 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
2240 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2241 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4
2242 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2243 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2244 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[1,1,1,1]
2245 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2246 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
2247 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2248 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2249 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
2250 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2251 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
2252 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
2253 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2254 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
2255 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2256 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3
2257 ; AVX1-NEXT: vmovaps %ymm4, %ymm0
2260 ; AVX2-LABEL: zext_32i8_to_32i32:
2262 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2263 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
2264 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
2265 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2266 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2267 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
2268 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2269 ; AVX2-NEXT: vmovdqa %ymm4, %ymm0
2272 ; AVX512-LABEL: zext_32i8_to_32i32:
2274 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2275 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
2276 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2277 ; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
2279 %res = zext <32 x i8>%x to <32 x i32>
2283 define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) {
2284 ; SSE2-LABEL: zext_2i8_to_2i32:
2286 ; SSE2-NEXT: movzwl (%rdi), %eax
2287 ; SSE2-NEXT: movd %eax, %xmm0
2288 ; SSE2-NEXT: pxor %xmm1, %xmm1
2289 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2290 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2291 ; SSE2-NEXT: paddd %xmm0, %xmm0
2294 ; SSSE3-LABEL: zext_2i8_to_2i32:
2296 ; SSSE3-NEXT: movzwl (%rdi), %eax
2297 ; SSSE3-NEXT: movd %eax, %xmm0
2298 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2299 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2300 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2301 ; SSSE3-NEXT: paddd %xmm0, %xmm0
2304 ; SSE41-LABEL: zext_2i8_to_2i32:
2306 ; SSE41-NEXT: movzwl (%rdi), %eax
2307 ; SSE41-NEXT: movd %eax, %xmm0
2308 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2309 ; SSE41-NEXT: paddd %xmm0, %xmm0
2312 ; AVX-LABEL: zext_2i8_to_2i32:
2314 ; AVX-NEXT: movzwl (%rdi), %eax
2315 ; AVX-NEXT: vmovd %eax, %xmm0
2316 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2317 ; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
2319 %x = load <2 x i8>, <2 x i8>* %addr, align 1
2320 %y = zext <2 x i8> %x to <2 x i32>
2321 %z = add <2 x i32>%y, %y
2325 define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) {
2326 ; SSE2-LABEL: zext_4i17_to_4i32:
2328 ; SSE2-NEXT: movq (%rdi), %rax
2329 ; SSE2-NEXT: movd %eax, %xmm0
2330 ; SSE2-NEXT: movq %rax, %rcx
2331 ; SSE2-NEXT: shrq $17, %rcx
2332 ; SSE2-NEXT: movd %ecx, %xmm1
2333 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2334 ; SSE2-NEXT: movl 8(%rdi), %ecx
2335 ; SSE2-NEXT: shll $13, %ecx
2336 ; SSE2-NEXT: movq %rax, %rdx
2337 ; SSE2-NEXT: shrq $51, %rdx
2338 ; SSE2-NEXT: orl %ecx, %edx
2339 ; SSE2-NEXT: movd %edx, %xmm1
2340 ; SSE2-NEXT: shrq $34, %rax
2341 ; SSE2-NEXT: movd %eax, %xmm2
2342 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2343 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2344 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2347 ; SSSE3-LABEL: zext_4i17_to_4i32:
2349 ; SSSE3-NEXT: movq (%rdi), %rax
2350 ; SSSE3-NEXT: movd %eax, %xmm0
2351 ; SSSE3-NEXT: movq %rax, %rcx
2352 ; SSSE3-NEXT: shrq $17, %rcx
2353 ; SSSE3-NEXT: movd %ecx, %xmm1
2354 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2355 ; SSSE3-NEXT: movl 8(%rdi), %ecx
2356 ; SSSE3-NEXT: shll $13, %ecx
2357 ; SSSE3-NEXT: movq %rax, %rdx
2358 ; SSSE3-NEXT: shrq $51, %rdx
2359 ; SSSE3-NEXT: orl %ecx, %edx
2360 ; SSSE3-NEXT: movd %edx, %xmm1
2361 ; SSSE3-NEXT: shrq $34, %rax
2362 ; SSSE3-NEXT: movd %eax, %xmm2
2363 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2364 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2365 ; SSSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2368 ; SSE41-LABEL: zext_4i17_to_4i32:
2370 ; SSE41-NEXT: movl 8(%rdi), %eax
2371 ; SSE41-NEXT: shll $13, %eax
2372 ; SSE41-NEXT: movq (%rdi), %rcx
2373 ; SSE41-NEXT: movq %rcx, %rdx
2374 ; SSE41-NEXT: shrq $51, %rdx
2375 ; SSE41-NEXT: orl %eax, %edx
2376 ; SSE41-NEXT: movq %rcx, %rax
2377 ; SSE41-NEXT: shrq $17, %rax
2378 ; SSE41-NEXT: movd %ecx, %xmm0
2379 ; SSE41-NEXT: pinsrd $1, %eax, %xmm0
2380 ; SSE41-NEXT: shrq $34, %rcx
2381 ; SSE41-NEXT: pinsrd $2, %ecx, %xmm0
2382 ; SSE41-NEXT: pinsrd $3, %edx, %xmm0
2383 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2386 ; AVX1-LABEL: zext_4i17_to_4i32:
2388 ; AVX1-NEXT: movl 8(%rdi), %eax
2389 ; AVX1-NEXT: shll $13, %eax
2390 ; AVX1-NEXT: movq (%rdi), %rcx
2391 ; AVX1-NEXT: movq %rcx, %rdx
2392 ; AVX1-NEXT: shrq $51, %rdx
2393 ; AVX1-NEXT: orl %eax, %edx
2394 ; AVX1-NEXT: movq %rcx, %rax
2395 ; AVX1-NEXT: shrq $17, %rax
2396 ; AVX1-NEXT: vmovd %ecx, %xmm0
2397 ; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
2398 ; AVX1-NEXT: shrq $34, %rcx
2399 ; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
2400 ; AVX1-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
2401 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2404 ; AVX2-LABEL: zext_4i17_to_4i32:
2406 ; AVX2-NEXT: movl 8(%rdi), %eax
2407 ; AVX2-NEXT: shll $13, %eax
2408 ; AVX2-NEXT: movq (%rdi), %rcx
2409 ; AVX2-NEXT: movq %rcx, %rdx
2410 ; AVX2-NEXT: shrq $51, %rdx
2411 ; AVX2-NEXT: orl %eax, %edx
2412 ; AVX2-NEXT: movq %rcx, %rax
2413 ; AVX2-NEXT: shrq $17, %rax
2414 ; AVX2-NEXT: vmovd %ecx, %xmm0
2415 ; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
2416 ; AVX2-NEXT: shrq $34, %rcx
2417 ; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
2418 ; AVX2-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
2419 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
2420 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2423 ; AVX512-LABEL: zext_4i17_to_4i32:
2425 ; AVX512-NEXT: movl 8(%rdi), %eax
2426 ; AVX512-NEXT: shll $13, %eax
2427 ; AVX512-NEXT: movq (%rdi), %rcx
2428 ; AVX512-NEXT: movq %rcx, %rdx
2429 ; AVX512-NEXT: shrq $51, %rdx
2430 ; AVX512-NEXT: orl %eax, %edx
2431 ; AVX512-NEXT: movq %rcx, %rax
2432 ; AVX512-NEXT: shrq $17, %rax
2433 ; AVX512-NEXT: vmovd %ecx, %xmm0
2434 ; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
2435 ; AVX512-NEXT: shrq $34, %rcx
2436 ; AVX512-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
2437 ; AVX512-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
2438 ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
2439 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
2441 %a = load <4 x i17>, <4 x i17>* %ptr
2442 %b = zext <4 x i17> %a to <4 x i32>
2446 define <8 x i64> @zext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp {
2447 ; SSE2-LABEL: zext_8i6_to_8i64:
2448 ; SSE2: # %bb.0: # %entry
2449 ; SSE2-NEXT: movd %edi, %xmm0
2450 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2451 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2452 ; SSE2-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
2453 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0]
2454 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2455 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [63,63]
2456 ; SSE2-NEXT: pand %xmm4, %xmm0
2457 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
2458 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5]
2459 ; SSE2-NEXT: pand %xmm4, %xmm1
2460 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2]
2461 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5]
2462 ; SSE2-NEXT: pand %xmm4, %xmm2
2463 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
2464 ; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5]
2465 ; SSE2-NEXT: pand %xmm4, %xmm3
2468 ; SSSE3-LABEL: zext_8i6_to_8i64:
2469 ; SSSE3: # %bb.0: # %entry
2470 ; SSSE3-NEXT: movd %edi, %xmm0
2471 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2472 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2473 ; SSSE3-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
2474 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0]
2475 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2476 ; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [63,63]
2477 ; SSSE3-NEXT: pand %xmm4, %xmm0
2478 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
2479 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5]
2480 ; SSSE3-NEXT: pand %xmm4, %xmm1
2481 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2]
2482 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5]
2483 ; SSSE3-NEXT: pand %xmm4, %xmm2
2484 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
2485 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5]
2486 ; SSSE3-NEXT: pand %xmm4, %xmm3
2489 ; SSE41-LABEL: zext_8i6_to_8i64:
2490 ; SSE41: # %bb.0: # %entry
2491 ; SSE41-NEXT: movd %edi, %xmm0
2492 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2493 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2494 ; SSE41-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
2495 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
2496 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [63,63]
2497 ; SSE41-NEXT: pand %xmm4, %xmm0
2498 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
2499 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2500 ; SSE41-NEXT: pand %xmm4, %xmm1
2501 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
2502 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2503 ; SSE41-NEXT: pand %xmm4, %xmm2
2504 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
2505 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
2506 ; SSE41-NEXT: pand %xmm4, %xmm3
2509 ; AVX1-LABEL: zext_8i6_to_8i64:
2510 ; AVX1: # %bb.0: # %entry
2511 ; AVX1-NEXT: vmovd %edi, %xmm0
2512 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2513 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2514 ; AVX1-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2515 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2516 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2517 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
2518 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2519 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2520 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
2521 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2522 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
2523 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2524 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2527 ; AVX2-LABEL: zext_8i6_to_8i64:
2528 ; AVX2: # %bb.0: # %entry
2529 ; AVX2-NEXT: vmovd %edi, %xmm0
2530 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2531 ; AVX2-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2532 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2533 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2534 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
2535 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2538 ; AVX512-LABEL: zext_8i6_to_8i64:
2539 ; AVX512: # %bb.0: # %entry
2540 ; AVX512-NEXT: vmovd %edi, %xmm0
2541 ; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0
2542 ; AVX512-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2543 ; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2544 ; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
2547 %a = trunc i32 %x to i6
2548 %b = insertelement <8 x i6> undef, i6 %a, i32 0
2549 %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer
2550 %d = add <8 x i6> %c, <i6 0, i6 1, i6 2, i6 3, i6 4, i6 5, i6 6, i6 7>
2551 %e = zext <8 x i6> %d to <8 x i64>
2555 define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
2556 ; SSE2-LABEL: splatshuf_zext_v4i64:
2558 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2559 ; SSE2-NEXT: pxor %xmm1, %xmm1
2560 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2561 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2564 ; SSSE3-LABEL: splatshuf_zext_v4i64:
2566 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2567 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2568 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2569 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2572 ; SSE41-LABEL: splatshuf_zext_v4i64:
2574 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
2575 ; SSE41-NEXT: pxor %xmm2, %xmm2
2576 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
2577 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2580 ; AVX1-LABEL: splatshuf_zext_v4i64:
2582 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2583 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2584 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2587 ; AVX2-LABEL: splatshuf_zext_v4i64:
2589 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
2590 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2593 ; AVX512-LABEL: splatshuf_zext_v4i64:
2595 ; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
2596 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2598 %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
2599 %ext = zext <4 x i32> %shuf to <4 x i64>
2603 define <8 x i32> @splatshuf_zext_v8i32_matching_undefs(<8 x i16> %x) {
2604 ; SSE2-LABEL: splatshuf_zext_v8i32_matching_undefs:
2606 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
2607 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
2608 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2609 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2612 ; SSSE3-LABEL: splatshuf_zext_v8i32_matching_undefs:
2614 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[u,u],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2615 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2618 ; SSE41-LABEL: splatshuf_zext_v8i32_matching_undefs:
2620 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[6,7],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2621 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2624 ; AVX1-LABEL: splatshuf_zext_v8i32_matching_undefs:
2626 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[6,7],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2627 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2630 ; AVX2-LABEL: splatshuf_zext_v8i32_matching_undefs:
2632 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
2633 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2636 ; AVX512-LABEL: splatshuf_zext_v8i32_matching_undefs:
2638 ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
2639 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2641 %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7>
2642 %ext = zext <8 x i16> %shuf to <8 x i32>
2646 define <8 x i32> @splatshuf_zext_v8i32_unmatched_undef(<8 x i16> %x) {
2647 ; SSE2-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2649 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
2650 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7]
2651 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2652 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7]
2653 ; SSE2-NEXT: pxor %xmm1, %xmm1
2654 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2655 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2658 ; SSSE3-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2660 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2661 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2664 ; SSE41-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2666 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2667 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2670 ; AVX1-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2672 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2673 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2674 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2675 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2676 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2679 ; AVX2-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2681 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2682 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2685 ; AVX512-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2687 ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2688 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2690 %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7>
2691 %ext = zext <8 x i16> %shuf to <8 x i32>
2695 define <16 x i16> @splatshuf_zext_v16i16(<16 x i8> %x) {
2696 ; SSE2-LABEL: splatshuf_zext_v16i16:
2698 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2699 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
2700 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
2701 ; SSE2-NEXT: pxor %xmm1, %xmm1
2702 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2703 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2706 ; SSSE3-LABEL: splatshuf_zext_v16i16:
2708 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero
2709 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
2712 ; SSE41-LABEL: splatshuf_zext_v16i16:
2714 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2715 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14]
2716 ; SSE41-NEXT: pxor %xmm2, %xmm2
2717 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
2718 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2721 ; AVX1-LABEL: splatshuf_zext_v16i16:
2723 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero
2724 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2727 ; AVX2-LABEL: splatshuf_zext_v16i16:
2729 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14]
2730 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2733 ; AVX512-LABEL: splatshuf_zext_v16i16:
2735 ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14]
2736 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2738 %shuf = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14>
2739 %ext = zext <16 x i8> %shuf to <16 x i16>