1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512DQVL
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512BWVL
13 ; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
14 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE2
20 define <2 x i32> @var_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
21 ; SSE2-LABEL: var_shift_v2i32:
23 ; SSE2-NEXT: pslld $23, %xmm1
24 ; SSE2-NEXT: paddd {{.*}}(%rip), %xmm1
25 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
26 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
27 ; SSE2-NEXT: pmuludq %xmm1, %xmm0
28 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
29 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
30 ; SSE2-NEXT: pmuludq %xmm2, %xmm1
31 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
32 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
35 ; SSE41-LABEL: var_shift_v2i32:
37 ; SSE41-NEXT: pslld $23, %xmm1
38 ; SSE41-NEXT: paddd {{.*}}(%rip), %xmm1
39 ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
40 ; SSE41-NEXT: pmulld %xmm1, %xmm0
43 ; AVX1-LABEL: var_shift_v2i32:
45 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
46 ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm1
47 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
48 ; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0
51 ; AVX2-LABEL: var_shift_v2i32:
53 ; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
56 ; XOPAVX1-LABEL: var_shift_v2i32:
58 ; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0
61 ; XOPAVX2-LABEL: var_shift_v2i32:
63 ; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
66 ; AVX512-LABEL: var_shift_v2i32:
68 ; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
71 ; AVX512VL-LABEL: var_shift_v2i32:
73 ; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
76 ; X32-SSE-LABEL: var_shift_v2i32:
78 ; X32-SSE-NEXT: pslld $23, %xmm1
79 ; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm1
80 ; X32-SSE-NEXT: cvttps2dq %xmm1, %xmm1
81 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
82 ; X32-SSE-NEXT: pmuludq %xmm1, %xmm0
83 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
84 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
85 ; X32-SSE-NEXT: pmuludq %xmm2, %xmm1
86 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
87 ; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
89 %shift = shl <2 x i32> %a, %b
93 define <4 x i16> @var_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
94 ; SSE2-LABEL: var_shift_v4i16:
96 ; SSE2-NEXT: movdqa %xmm1, %xmm2
97 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
98 ; SSE2-NEXT: pslld $23, %xmm2
99 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
100 ; SSE2-NEXT: paddd %xmm3, %xmm2
101 ; SSE2-NEXT: cvttps2dq %xmm2, %xmm2
102 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
103 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
104 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
105 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
106 ; SSE2-NEXT: pslld $23, %xmm1
107 ; SSE2-NEXT: paddd %xmm3, %xmm1
108 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
109 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
110 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
111 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
112 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
113 ; SSE2-NEXT: pmullw %xmm1, %xmm0
116 ; SSE41-LABEL: var_shift_v4i16:
118 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
119 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
120 ; SSE41-NEXT: pslld $23, %xmm1
121 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
122 ; SSE41-NEXT: paddd %xmm3, %xmm1
123 ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
124 ; SSE41-NEXT: pslld $23, %xmm2
125 ; SSE41-NEXT: paddd %xmm3, %xmm2
126 ; SSE41-NEXT: cvttps2dq %xmm2, %xmm2
127 ; SSE41-NEXT: packusdw %xmm1, %xmm2
128 ; SSE41-NEXT: pmullw %xmm2, %xmm0
131 ; AVX1-LABEL: var_shift_v4i16:
133 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
134 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
135 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
136 ; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
137 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
138 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
139 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
140 ; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
141 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
142 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
143 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
146 ; AVX2-LABEL: var_shift_v4i16:
148 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
149 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
150 ; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
151 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
152 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
153 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
154 ; AVX2-NEXT: vzeroupper
157 ; XOP-LABEL: var_shift_v4i16:
159 ; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0
162 ; AVX512DQ-LABEL: var_shift_v4i16:
164 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
165 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
166 ; AVX512DQ-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
167 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
168 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
169 ; AVX512DQ-NEXT: vzeroupper
170 ; AVX512DQ-NEXT: retq
172 ; AVX512BW-LABEL: var_shift_v4i16:
174 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
175 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
176 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
177 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
178 ; AVX512BW-NEXT: vzeroupper
179 ; AVX512BW-NEXT: retq
181 ; AVX512DQVL-LABEL: var_shift_v4i16:
182 ; AVX512DQVL: # %bb.0:
183 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
184 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
185 ; AVX512DQVL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
186 ; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
187 ; AVX512DQVL-NEXT: vzeroupper
188 ; AVX512DQVL-NEXT: retq
190 ; AVX512BWVL-LABEL: var_shift_v4i16:
191 ; AVX512BWVL: # %bb.0:
192 ; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
193 ; AVX512BWVL-NEXT: retq
195 ; X32-SSE-LABEL: var_shift_v4i16:
197 ; X32-SSE-NEXT: movdqa %xmm1, %xmm2
198 ; X32-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
199 ; X32-SSE-NEXT: pslld $23, %xmm2
200 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
201 ; X32-SSE-NEXT: paddd %xmm3, %xmm2
202 ; X32-SSE-NEXT: cvttps2dq %xmm2, %xmm2
203 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
204 ; X32-SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
205 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
206 ; X32-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
207 ; X32-SSE-NEXT: pslld $23, %xmm1
208 ; X32-SSE-NEXT: paddd %xmm3, %xmm1
209 ; X32-SSE-NEXT: cvttps2dq %xmm1, %xmm1
210 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
211 ; X32-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
212 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
213 ; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
214 ; X32-SSE-NEXT: pmullw %xmm1, %xmm0
216 %shift = shl <4 x i16> %a, %b
220 define <2 x i16> @var_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
221 ; SSE2-LABEL: var_shift_v2i16:
223 ; SSE2-NEXT: movdqa %xmm1, %xmm2
224 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
225 ; SSE2-NEXT: pslld $23, %xmm2
226 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
227 ; SSE2-NEXT: paddd %xmm3, %xmm2
228 ; SSE2-NEXT: cvttps2dq %xmm2, %xmm2
229 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
230 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
231 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
232 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
233 ; SSE2-NEXT: pslld $23, %xmm1
234 ; SSE2-NEXT: paddd %xmm3, %xmm1
235 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
236 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
237 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
238 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
239 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
240 ; SSE2-NEXT: pmullw %xmm1, %xmm0
243 ; SSE41-LABEL: var_shift_v2i16:
245 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
246 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
247 ; SSE41-NEXT: pslld $23, %xmm1
248 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
249 ; SSE41-NEXT: paddd %xmm3, %xmm1
250 ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
251 ; SSE41-NEXT: pslld $23, %xmm2
252 ; SSE41-NEXT: paddd %xmm3, %xmm2
253 ; SSE41-NEXT: cvttps2dq %xmm2, %xmm2
254 ; SSE41-NEXT: packusdw %xmm1, %xmm2
255 ; SSE41-NEXT: pmullw %xmm2, %xmm0
258 ; AVX1-LABEL: var_shift_v2i16:
260 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
261 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
262 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
263 ; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
264 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
265 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
266 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
267 ; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
268 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
269 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
270 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
273 ; AVX2-LABEL: var_shift_v2i16:
275 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
276 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
277 ; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
278 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
279 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
280 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
281 ; AVX2-NEXT: vzeroupper
284 ; XOP-LABEL: var_shift_v2i16:
286 ; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0
289 ; AVX512DQ-LABEL: var_shift_v2i16:
291 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
292 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
293 ; AVX512DQ-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
294 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
295 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
296 ; AVX512DQ-NEXT: vzeroupper
297 ; AVX512DQ-NEXT: retq
299 ; AVX512BW-LABEL: var_shift_v2i16:
301 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
302 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
303 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
304 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
305 ; AVX512BW-NEXT: vzeroupper
306 ; AVX512BW-NEXT: retq
308 ; AVX512DQVL-LABEL: var_shift_v2i16:
309 ; AVX512DQVL: # %bb.0:
310 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
311 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
312 ; AVX512DQVL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
313 ; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
314 ; AVX512DQVL-NEXT: vzeroupper
315 ; AVX512DQVL-NEXT: retq
317 ; AVX512BWVL-LABEL: var_shift_v2i16:
318 ; AVX512BWVL: # %bb.0:
319 ; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
320 ; AVX512BWVL-NEXT: retq
322 ; X32-SSE-LABEL: var_shift_v2i16:
324 ; X32-SSE-NEXT: movdqa %xmm1, %xmm2
325 ; X32-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
326 ; X32-SSE-NEXT: pslld $23, %xmm2
327 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
328 ; X32-SSE-NEXT: paddd %xmm3, %xmm2
329 ; X32-SSE-NEXT: cvttps2dq %xmm2, %xmm2
330 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
331 ; X32-SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
332 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
333 ; X32-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
334 ; X32-SSE-NEXT: pslld $23, %xmm1
335 ; X32-SSE-NEXT: paddd %xmm3, %xmm1
336 ; X32-SSE-NEXT: cvttps2dq %xmm1, %xmm1
337 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
338 ; X32-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
339 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
340 ; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
341 ; X32-SSE-NEXT: pmullw %xmm1, %xmm0
343 %shift = shl <2 x i16> %a, %b
347 define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
348 ; SSE2-LABEL: var_shift_v8i8:
350 ; SSE2-NEXT: psllw $5, %xmm1
351 ; SSE2-NEXT: pxor %xmm2, %xmm2
352 ; SSE2-NEXT: pxor %xmm3, %xmm3
353 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
354 ; SSE2-NEXT: movdqa %xmm3, %xmm4
355 ; SSE2-NEXT: pandn %xmm0, %xmm4
356 ; SSE2-NEXT: psllw $4, %xmm0
357 ; SSE2-NEXT: pand %xmm3, %xmm0
358 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
359 ; SSE2-NEXT: por %xmm4, %xmm0
360 ; SSE2-NEXT: paddb %xmm1, %xmm1
361 ; SSE2-NEXT: pxor %xmm3, %xmm3
362 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
363 ; SSE2-NEXT: movdqa %xmm3, %xmm4
364 ; SSE2-NEXT: pandn %xmm0, %xmm4
365 ; SSE2-NEXT: psllw $2, %xmm0
366 ; SSE2-NEXT: pand %xmm3, %xmm0
367 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
368 ; SSE2-NEXT: por %xmm4, %xmm0
369 ; SSE2-NEXT: paddb %xmm1, %xmm1
370 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
371 ; SSE2-NEXT: movdqa %xmm2, %xmm1
372 ; SSE2-NEXT: pandn %xmm0, %xmm1
373 ; SSE2-NEXT: paddb %xmm0, %xmm0
374 ; SSE2-NEXT: pand %xmm2, %xmm0
375 ; SSE2-NEXT: por %xmm1, %xmm0
378 ; SSE41-LABEL: var_shift_v8i8:
380 ; SSE41-NEXT: movdqa %xmm0, %xmm2
381 ; SSE41-NEXT: psllw $5, %xmm1
382 ; SSE41-NEXT: movdqa %xmm0, %xmm3
383 ; SSE41-NEXT: psllw $4, %xmm3
384 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
385 ; SSE41-NEXT: movdqa %xmm1, %xmm0
386 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
387 ; SSE41-NEXT: movdqa %xmm2, %xmm3
388 ; SSE41-NEXT: psllw $2, %xmm3
389 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
390 ; SSE41-NEXT: paddb %xmm1, %xmm1
391 ; SSE41-NEXT: movdqa %xmm1, %xmm0
392 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
393 ; SSE41-NEXT: movdqa %xmm2, %xmm3
394 ; SSE41-NEXT: paddb %xmm2, %xmm3
395 ; SSE41-NEXT: paddb %xmm1, %xmm1
396 ; SSE41-NEXT: movdqa %xmm1, %xmm0
397 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
398 ; SSE41-NEXT: movdqa %xmm2, %xmm0
401 ; AVX-LABEL: var_shift_v8i8:
403 ; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
404 ; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
405 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
406 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
407 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
408 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
409 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
410 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
411 ; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
412 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
413 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
416 ; XOP-LABEL: var_shift_v8i8:
418 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
421 ; AVX512DQ-LABEL: var_shift_v8i8:
423 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
424 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
425 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
426 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
427 ; AVX512DQ-NEXT: vzeroupper
428 ; AVX512DQ-NEXT: retq
430 ; AVX512BW-LABEL: var_shift_v8i8:
432 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
433 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
434 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
435 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
436 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
437 ; AVX512BW-NEXT: vzeroupper
438 ; AVX512BW-NEXT: retq
440 ; AVX512DQVL-LABEL: var_shift_v8i8:
441 ; AVX512DQVL: # %bb.0:
442 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
443 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
444 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
445 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
446 ; AVX512DQVL-NEXT: vzeroupper
447 ; AVX512DQVL-NEXT: retq
449 ; AVX512BWVL-LABEL: var_shift_v8i8:
450 ; AVX512BWVL: # %bb.0:
451 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
452 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
453 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
454 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
455 ; AVX512BWVL-NEXT: vzeroupper
456 ; AVX512BWVL-NEXT: retq
458 ; X32-SSE-LABEL: var_shift_v8i8:
460 ; X32-SSE-NEXT: psllw $5, %xmm1
461 ; X32-SSE-NEXT: pxor %xmm2, %xmm2
462 ; X32-SSE-NEXT: pxor %xmm3, %xmm3
463 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3
464 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4
465 ; X32-SSE-NEXT: pandn %xmm0, %xmm4
466 ; X32-SSE-NEXT: psllw $4, %xmm0
467 ; X32-SSE-NEXT: pand %xmm3, %xmm0
468 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
469 ; X32-SSE-NEXT: por %xmm4, %xmm0
470 ; X32-SSE-NEXT: paddb %xmm1, %xmm1
471 ; X32-SSE-NEXT: pxor %xmm3, %xmm3
472 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3
473 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4
474 ; X32-SSE-NEXT: pandn %xmm0, %xmm4
475 ; X32-SSE-NEXT: psllw $2, %xmm0
476 ; X32-SSE-NEXT: pand %xmm3, %xmm0
477 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
478 ; X32-SSE-NEXT: por %xmm4, %xmm0
479 ; X32-SSE-NEXT: paddb %xmm1, %xmm1
480 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm2
481 ; X32-SSE-NEXT: movdqa %xmm2, %xmm1
482 ; X32-SSE-NEXT: pandn %xmm0, %xmm1
483 ; X32-SSE-NEXT: paddb %xmm0, %xmm0
484 ; X32-SSE-NEXT: pand %xmm2, %xmm0
485 ; X32-SSE-NEXT: por %xmm1, %xmm0
487 %shift = shl <8 x i8> %a, %b
491 define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
492 ; SSE2-LABEL: var_shift_v4i8:
494 ; SSE2-NEXT: psllw $5, %xmm1
495 ; SSE2-NEXT: pxor %xmm2, %xmm2
496 ; SSE2-NEXT: pxor %xmm3, %xmm3
497 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
498 ; SSE2-NEXT: movdqa %xmm3, %xmm4
499 ; SSE2-NEXT: pandn %xmm0, %xmm4
500 ; SSE2-NEXT: psllw $4, %xmm0
501 ; SSE2-NEXT: pand %xmm3, %xmm0
502 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
503 ; SSE2-NEXT: por %xmm4, %xmm0
504 ; SSE2-NEXT: paddb %xmm1, %xmm1
505 ; SSE2-NEXT: pxor %xmm3, %xmm3
506 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
507 ; SSE2-NEXT: movdqa %xmm3, %xmm4
508 ; SSE2-NEXT: pandn %xmm0, %xmm4
509 ; SSE2-NEXT: psllw $2, %xmm0
510 ; SSE2-NEXT: pand %xmm3, %xmm0
511 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
512 ; SSE2-NEXT: por %xmm4, %xmm0
513 ; SSE2-NEXT: paddb %xmm1, %xmm1
514 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
515 ; SSE2-NEXT: movdqa %xmm2, %xmm1
516 ; SSE2-NEXT: pandn %xmm0, %xmm1
517 ; SSE2-NEXT: paddb %xmm0, %xmm0
518 ; SSE2-NEXT: pand %xmm2, %xmm0
519 ; SSE2-NEXT: por %xmm1, %xmm0
522 ; SSE41-LABEL: var_shift_v4i8:
524 ; SSE41-NEXT: movdqa %xmm0, %xmm2
525 ; SSE41-NEXT: psllw $5, %xmm1
526 ; SSE41-NEXT: movdqa %xmm0, %xmm3
527 ; SSE41-NEXT: psllw $4, %xmm3
528 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
529 ; SSE41-NEXT: movdqa %xmm1, %xmm0
530 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
531 ; SSE41-NEXT: movdqa %xmm2, %xmm3
532 ; SSE41-NEXT: psllw $2, %xmm3
533 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
534 ; SSE41-NEXT: paddb %xmm1, %xmm1
535 ; SSE41-NEXT: movdqa %xmm1, %xmm0
536 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
537 ; SSE41-NEXT: movdqa %xmm2, %xmm3
538 ; SSE41-NEXT: paddb %xmm2, %xmm3
539 ; SSE41-NEXT: paddb %xmm1, %xmm1
540 ; SSE41-NEXT: movdqa %xmm1, %xmm0
541 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
542 ; SSE41-NEXT: movdqa %xmm2, %xmm0
545 ; AVX-LABEL: var_shift_v4i8:
547 ; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
548 ; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
549 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
550 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
551 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
552 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
553 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
554 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
555 ; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
556 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
557 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
560 ; XOP-LABEL: var_shift_v4i8:
562 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
565 ; AVX512DQ-LABEL: var_shift_v4i8:
567 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
568 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
569 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
570 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
571 ; AVX512DQ-NEXT: vzeroupper
572 ; AVX512DQ-NEXT: retq
574 ; AVX512BW-LABEL: var_shift_v4i8:
576 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
577 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
578 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
579 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
580 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
581 ; AVX512BW-NEXT: vzeroupper
582 ; AVX512BW-NEXT: retq
584 ; AVX512DQVL-LABEL: var_shift_v4i8:
585 ; AVX512DQVL: # %bb.0:
586 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
587 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
588 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
589 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
590 ; AVX512DQVL-NEXT: vzeroupper
591 ; AVX512DQVL-NEXT: retq
593 ; AVX512BWVL-LABEL: var_shift_v4i8:
594 ; AVX512BWVL: # %bb.0:
595 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
596 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
597 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
598 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
599 ; AVX512BWVL-NEXT: vzeroupper
600 ; AVX512BWVL-NEXT: retq
602 ; X32-SSE-LABEL: var_shift_v4i8:
604 ; X32-SSE-NEXT: psllw $5, %xmm1
605 ; X32-SSE-NEXT: pxor %xmm2, %xmm2
606 ; X32-SSE-NEXT: pxor %xmm3, %xmm3
607 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3
608 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4
609 ; X32-SSE-NEXT: pandn %xmm0, %xmm4
610 ; X32-SSE-NEXT: psllw $4, %xmm0
611 ; X32-SSE-NEXT: pand %xmm3, %xmm0
612 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
613 ; X32-SSE-NEXT: por %xmm4, %xmm0
614 ; X32-SSE-NEXT: paddb %xmm1, %xmm1
615 ; X32-SSE-NEXT: pxor %xmm3, %xmm3
616 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3
617 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4
618 ; X32-SSE-NEXT: pandn %xmm0, %xmm4
619 ; X32-SSE-NEXT: psllw $2, %xmm0
620 ; X32-SSE-NEXT: pand %xmm3, %xmm0
621 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
622 ; X32-SSE-NEXT: por %xmm4, %xmm0
623 ; X32-SSE-NEXT: paddb %xmm1, %xmm1
624 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm2
625 ; X32-SSE-NEXT: movdqa %xmm2, %xmm1
626 ; X32-SSE-NEXT: pandn %xmm0, %xmm1
627 ; X32-SSE-NEXT: paddb %xmm0, %xmm0
628 ; X32-SSE-NEXT: pand %xmm2, %xmm0
629 ; X32-SSE-NEXT: por %xmm1, %xmm0
631 %shift = shl <4 x i8> %a, %b
635 define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
636 ; SSE2-LABEL: var_shift_v2i8:
638 ; SSE2-NEXT: psllw $5, %xmm1
639 ; SSE2-NEXT: pxor %xmm2, %xmm2
640 ; SSE2-NEXT: pxor %xmm3, %xmm3
641 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
642 ; SSE2-NEXT: movdqa %xmm3, %xmm4
643 ; SSE2-NEXT: pandn %xmm0, %xmm4
644 ; SSE2-NEXT: psllw $4, %xmm0
645 ; SSE2-NEXT: pand %xmm3, %xmm0
646 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
647 ; SSE2-NEXT: por %xmm4, %xmm0
648 ; SSE2-NEXT: paddb %xmm1, %xmm1
649 ; SSE2-NEXT: pxor %xmm3, %xmm3
650 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
651 ; SSE2-NEXT: movdqa %xmm3, %xmm4
652 ; SSE2-NEXT: pandn %xmm0, %xmm4
653 ; SSE2-NEXT: psllw $2, %xmm0
654 ; SSE2-NEXT: pand %xmm3, %xmm0
655 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
656 ; SSE2-NEXT: por %xmm4, %xmm0
657 ; SSE2-NEXT: paddb %xmm1, %xmm1
658 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
659 ; SSE2-NEXT: movdqa %xmm2, %xmm1
660 ; SSE2-NEXT: pandn %xmm0, %xmm1
661 ; SSE2-NEXT: paddb %xmm0, %xmm0
662 ; SSE2-NEXT: pand %xmm2, %xmm0
663 ; SSE2-NEXT: por %xmm1, %xmm0
666 ; SSE41-LABEL: var_shift_v2i8:
668 ; SSE41-NEXT: movdqa %xmm0, %xmm2
669 ; SSE41-NEXT: psllw $5, %xmm1
670 ; SSE41-NEXT: movdqa %xmm0, %xmm3
671 ; SSE41-NEXT: psllw $4, %xmm3
672 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
673 ; SSE41-NEXT: movdqa %xmm1, %xmm0
674 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
675 ; SSE41-NEXT: movdqa %xmm2, %xmm3
676 ; SSE41-NEXT: psllw $2, %xmm3
677 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
678 ; SSE41-NEXT: paddb %xmm1, %xmm1
679 ; SSE41-NEXT: movdqa %xmm1, %xmm0
680 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
681 ; SSE41-NEXT: movdqa %xmm2, %xmm3
682 ; SSE41-NEXT: paddb %xmm2, %xmm3
683 ; SSE41-NEXT: paddb %xmm1, %xmm1
684 ; SSE41-NEXT: movdqa %xmm1, %xmm0
685 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
686 ; SSE41-NEXT: movdqa %xmm2, %xmm0
689 ; AVX-LABEL: var_shift_v2i8:
691 ; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
692 ; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
693 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
694 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
695 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
696 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
697 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
698 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
699 ; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
700 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
701 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
704 ; XOP-LABEL: var_shift_v2i8:
706 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
709 ; AVX512DQ-LABEL: var_shift_v2i8:
711 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
712 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
713 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
714 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
715 ; AVX512DQ-NEXT: vzeroupper
716 ; AVX512DQ-NEXT: retq
718 ; AVX512BW-LABEL: var_shift_v2i8:
720 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
721 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
722 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
723 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
724 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
725 ; AVX512BW-NEXT: vzeroupper
726 ; AVX512BW-NEXT: retq
728 ; AVX512DQVL-LABEL: var_shift_v2i8:
729 ; AVX512DQVL: # %bb.0:
730 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
731 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
732 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
733 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
734 ; AVX512DQVL-NEXT: vzeroupper
735 ; AVX512DQVL-NEXT: retq
737 ; AVX512BWVL-LABEL: var_shift_v2i8:
738 ; AVX512BWVL: # %bb.0:
739 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
740 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
741 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
742 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
743 ; AVX512BWVL-NEXT: vzeroupper
744 ; AVX512BWVL-NEXT: retq
746 ; X32-SSE-LABEL: var_shift_v2i8:
748 ; X32-SSE-NEXT: psllw $5, %xmm1
749 ; X32-SSE-NEXT: pxor %xmm2, %xmm2
750 ; X32-SSE-NEXT: pxor %xmm3, %xmm3
751 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3
752 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4
753 ; X32-SSE-NEXT: pandn %xmm0, %xmm4
754 ; X32-SSE-NEXT: psllw $4, %xmm0
755 ; X32-SSE-NEXT: pand %xmm3, %xmm0
756 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
757 ; X32-SSE-NEXT: por %xmm4, %xmm0
758 ; X32-SSE-NEXT: paddb %xmm1, %xmm1
759 ; X32-SSE-NEXT: pxor %xmm3, %xmm3
760 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3
761 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4
762 ; X32-SSE-NEXT: pandn %xmm0, %xmm4
763 ; X32-SSE-NEXT: psllw $2, %xmm0
764 ; X32-SSE-NEXT: pand %xmm3, %xmm0
765 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
766 ; X32-SSE-NEXT: por %xmm4, %xmm0
767 ; X32-SSE-NEXT: paddb %xmm1, %xmm1
768 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm2
769 ; X32-SSE-NEXT: movdqa %xmm2, %xmm1
770 ; X32-SSE-NEXT: pandn %xmm0, %xmm1
771 ; X32-SSE-NEXT: paddb %xmm0, %xmm0
772 ; X32-SSE-NEXT: pand %xmm2, %xmm0
773 ; X32-SSE-NEXT: por %xmm1, %xmm0
775 %shift = shl <2 x i8> %a, %b
780 ; Uniform Variable Shifts
783 define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
784 ; SSE2-LABEL: splatvar_shift_v2i32:
786 ; SSE2-NEXT: xorps %xmm2, %xmm2
787 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
788 ; SSE2-NEXT: pslld %xmm2, %xmm0
791 ; SSE41-LABEL: splatvar_shift_v2i32:
793 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
794 ; SSE41-NEXT: pslld %xmm1, %xmm0
797 ; AVX-LABEL: splatvar_shift_v2i32:
799 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
800 ; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
803 ; XOP-LABEL: splatvar_shift_v2i32:
805 ; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
806 ; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
809 ; AVX512-LABEL: splatvar_shift_v2i32:
811 ; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
812 ; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
815 ; AVX512VL-LABEL: splatvar_shift_v2i32:
817 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
818 ; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
819 ; AVX512VL-NEXT: retq
821 ; X32-SSE-LABEL: splatvar_shift_v2i32:
823 ; X32-SSE-NEXT: xorps %xmm2, %xmm2
824 ; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
825 ; X32-SSE-NEXT: pslld %xmm2, %xmm0
827 %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
828 %shift = shl <2 x i32> %a, %splat
832 define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
833 ; SSE2-LABEL: splatvar_shift_v4i16:
835 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
836 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
837 ; SSE2-NEXT: psllw %xmm1, %xmm0
840 ; SSE41-LABEL: splatvar_shift_v4i16:
842 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
843 ; SSE41-NEXT: psllw %xmm1, %xmm0
846 ; AVX-LABEL: splatvar_shift_v4i16:
848 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
849 ; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
852 ; XOP-LABEL: splatvar_shift_v4i16:
854 ; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
855 ; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
858 ; AVX512-LABEL: splatvar_shift_v4i16:
860 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
861 ; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
864 ; AVX512VL-LABEL: splatvar_shift_v4i16:
866 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
867 ; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
868 ; AVX512VL-NEXT: retq
870 ; X32-SSE-LABEL: splatvar_shift_v4i16:
872 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
873 ; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
874 ; X32-SSE-NEXT: psllw %xmm1, %xmm0
876 %splat = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> zeroinitializer
877 %shift = shl <4 x i16> %a, %splat
881 define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
882 ; SSE2-LABEL: splatvar_shift_v2i16:
884 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
885 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
886 ; SSE2-NEXT: psllw %xmm1, %xmm0
889 ; SSE41-LABEL: splatvar_shift_v2i16:
891 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
892 ; SSE41-NEXT: psllw %xmm1, %xmm0
895 ; AVX-LABEL: splatvar_shift_v2i16:
897 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
898 ; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
901 ; XOP-LABEL: splatvar_shift_v2i16:
903 ; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
904 ; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
907 ; AVX512-LABEL: splatvar_shift_v2i16:
909 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
910 ; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
913 ; AVX512VL-LABEL: splatvar_shift_v2i16:
915 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
916 ; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
917 ; AVX512VL-NEXT: retq
919 ; X32-SSE-LABEL: splatvar_shift_v2i16:
921 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
922 ; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
923 ; X32-SSE-NEXT: psllw %xmm1, %xmm0
925 %splat = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> zeroinitializer
926 %shift = shl <2 x i16> %a, %splat
930 define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
931 ; SSE2-LABEL: splatvar_shift_v8i8:
933 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
934 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
935 ; SSE2-NEXT: psllw %xmm1, %xmm0
936 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
937 ; SSE2-NEXT: psllw %xmm1, %xmm2
938 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
939 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,2,3,4,5,6,7]
940 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
941 ; SSE2-NEXT: pand %xmm1, %xmm0
944 ; SSE41-LABEL: splatvar_shift_v8i8:
946 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
947 ; SSE41-NEXT: psllw %xmm1, %xmm0
948 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
949 ; SSE41-NEXT: psllw %xmm1, %xmm2
950 ; SSE41-NEXT: pxor %xmm1, %xmm1
951 ; SSE41-NEXT: pshufb %xmm1, %xmm2
952 ; SSE41-NEXT: pand %xmm2, %xmm0
955 ; AVX1-LABEL: splatvar_shift_v8i8:
957 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
958 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
959 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
960 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1
961 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
962 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
963 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
966 ; AVX2-LABEL: splatvar_shift_v8i8:
968 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
969 ; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0
970 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
971 ; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
972 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
973 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
976 ; XOPAVX1-LABEL: splatvar_shift_v8i8:
978 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
979 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
980 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
983 ; XOPAVX2-LABEL: splatvar_shift_v8i8:
985 ; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
986 ; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
989 ; AVX512DQ-LABEL: splatvar_shift_v8i8:
991 ; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
992 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
993 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
994 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
995 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
996 ; AVX512DQ-NEXT: vzeroupper
997 ; AVX512DQ-NEXT: retq
999 ; AVX512BW-LABEL: splatvar_shift_v8i8:
1000 ; AVX512BW: # %bb.0:
1001 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
1002 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1003 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1004 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1005 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1006 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1007 ; AVX512BW-NEXT: vzeroupper
1008 ; AVX512BW-NEXT: retq
1010 ; AVX512DQVL-LABEL: splatvar_shift_v8i8:
1011 ; AVX512DQVL: # %bb.0:
1012 ; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
1013 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1014 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
1015 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
1016 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1017 ; AVX512DQVL-NEXT: vzeroupper
1018 ; AVX512DQVL-NEXT: retq
1020 ; AVX512BWVL-LABEL: splatvar_shift_v8i8:
1021 ; AVX512BWVL: # %bb.0:
1022 ; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
1023 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1024 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1025 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
1026 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1027 ; AVX512BWVL-NEXT: vzeroupper
1028 ; AVX512BWVL-NEXT: retq
1030 ; X32-SSE-LABEL: splatvar_shift_v8i8:
1032 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1033 ; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1034 ; X32-SSE-NEXT: psllw %xmm1, %xmm0
1035 ; X32-SSE-NEXT: pcmpeqd %xmm2, %xmm2
1036 ; X32-SSE-NEXT: psllw %xmm1, %xmm2
1037 ; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1038 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,2,3,4,5,6,7]
1039 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1040 ; X32-SSE-NEXT: pand %xmm1, %xmm0
1041 ; X32-SSE-NEXT: retl
1042 %splat = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer
1043 %shift = shl <8 x i8> %a, %splat
1047 define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
1048 ; SSE2-LABEL: splatvar_shift_v4i8:
1050 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1051 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1052 ; SSE2-NEXT: psllw %xmm1, %xmm0
1053 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1054 ; SSE2-NEXT: psllw %xmm1, %xmm2
1055 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1056 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,2,3,4,5,6,7]
1057 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1058 ; SSE2-NEXT: pand %xmm1, %xmm0
1061 ; SSE41-LABEL: splatvar_shift_v4i8:
1063 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1064 ; SSE41-NEXT: psllw %xmm1, %xmm0
1065 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
1066 ; SSE41-NEXT: psllw %xmm1, %xmm2
1067 ; SSE41-NEXT: pxor %xmm1, %xmm1
1068 ; SSE41-NEXT: pshufb %xmm1, %xmm2
1069 ; SSE41-NEXT: pand %xmm2, %xmm0
1072 ; AVX1-LABEL: splatvar_shift_v4i8:
1074 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1075 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
1076 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1077 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1
1078 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1079 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1080 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1083 ; AVX2-LABEL: splatvar_shift_v4i8:
1085 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1086 ; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0
1087 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1088 ; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
1089 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
1090 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1093 ; XOPAVX1-LABEL: splatvar_shift_v4i8:
1095 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1096 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
1097 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
1098 ; XOPAVX1-NEXT: retq
1100 ; XOPAVX2-LABEL: splatvar_shift_v4i8:
1102 ; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
1103 ; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
1104 ; XOPAVX2-NEXT: retq
1106 ; AVX512DQ-LABEL: splatvar_shift_v4i8:
1107 ; AVX512DQ: # %bb.0:
1108 ; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
1109 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1110 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
1111 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
1112 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1113 ; AVX512DQ-NEXT: vzeroupper
1114 ; AVX512DQ-NEXT: retq
1116 ; AVX512BW-LABEL: splatvar_shift_v4i8:
1117 ; AVX512BW: # %bb.0:
1118 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
1119 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1120 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1121 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1122 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1123 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1124 ; AVX512BW-NEXT: vzeroupper
1125 ; AVX512BW-NEXT: retq
1127 ; AVX512DQVL-LABEL: splatvar_shift_v4i8:
1128 ; AVX512DQVL: # %bb.0:
1129 ; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
1130 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1131 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
1132 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
1133 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1134 ; AVX512DQVL-NEXT: vzeroupper
1135 ; AVX512DQVL-NEXT: retq
1137 ; AVX512BWVL-LABEL: splatvar_shift_v4i8:
1138 ; AVX512BWVL: # %bb.0:
1139 ; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
1140 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1141 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1142 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
1143 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1144 ; AVX512BWVL-NEXT: vzeroupper
1145 ; AVX512BWVL-NEXT: retq
1147 ; X32-SSE-LABEL: splatvar_shift_v4i8:
1149 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1150 ; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1151 ; X32-SSE-NEXT: psllw %xmm1, %xmm0
1152 ; X32-SSE-NEXT: pcmpeqd %xmm2, %xmm2
1153 ; X32-SSE-NEXT: psllw %xmm1, %xmm2
1154 ; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1155 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,2,3,4,5,6,7]
1156 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1157 ; X32-SSE-NEXT: pand %xmm1, %xmm0
1158 ; X32-SSE-NEXT: retl
1159 %splat = shufflevector <4 x i8> %b, <4 x i8> undef, <4 x i32> zeroinitializer
1160 %shift = shl <4 x i8> %a, %splat
1164 define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
1165 ; SSE2-LABEL: splatvar_shift_v2i8:
1167 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1168 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1169 ; SSE2-NEXT: psllw %xmm1, %xmm0
1170 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1171 ; SSE2-NEXT: psllw %xmm1, %xmm2
1172 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1173 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,2,3,4,5,6,7]
1174 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1175 ; SSE2-NEXT: pand %xmm1, %xmm0
1178 ; SSE41-LABEL: splatvar_shift_v2i8:
1180 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1181 ; SSE41-NEXT: psllw %xmm1, %xmm0
1182 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
1183 ; SSE41-NEXT: psllw %xmm1, %xmm2
1184 ; SSE41-NEXT: pxor %xmm1, %xmm1
1185 ; SSE41-NEXT: pshufb %xmm1, %xmm2
1186 ; SSE41-NEXT: pand %xmm2, %xmm0
1189 ; AVX1-LABEL: splatvar_shift_v2i8:
1191 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1192 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
1193 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1194 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1
1195 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1196 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1197 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1200 ; AVX2-LABEL: splatvar_shift_v2i8:
1202 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1203 ; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0
1204 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1205 ; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
1206 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
1207 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1210 ; XOP-LABEL: splatvar_shift_v2i8:
1212 ; XOP-NEXT: insertq {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
1213 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
1216 ; AVX512DQ-LABEL: splatvar_shift_v2i8:
1217 ; AVX512DQ: # %bb.0:
1218 ; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
1219 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1220 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
1221 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
1222 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1223 ; AVX512DQ-NEXT: vzeroupper
1224 ; AVX512DQ-NEXT: retq
1226 ; AVX512BW-LABEL: splatvar_shift_v2i8:
1227 ; AVX512BW: # %bb.0:
1228 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
1229 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1230 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1231 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1232 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1233 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1234 ; AVX512BW-NEXT: vzeroupper
1235 ; AVX512BW-NEXT: retq
1237 ; AVX512DQVL-LABEL: splatvar_shift_v2i8:
1238 ; AVX512DQVL: # %bb.0:
1239 ; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
1240 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1241 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
1242 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
1243 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1244 ; AVX512DQVL-NEXT: vzeroupper
1245 ; AVX512DQVL-NEXT: retq
1247 ; AVX512BWVL-LABEL: splatvar_shift_v2i8:
1248 ; AVX512BWVL: # %bb.0:
1249 ; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
1250 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1251 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1252 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
1253 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1254 ; AVX512BWVL-NEXT: vzeroupper
1255 ; AVX512BWVL-NEXT: retq
1257 ; X32-SSE-LABEL: splatvar_shift_v2i8:
1259 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1260 ; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1261 ; X32-SSE-NEXT: psllw %xmm1, %xmm0
1262 ; X32-SSE-NEXT: pcmpeqd %xmm2, %xmm2
1263 ; X32-SSE-NEXT: psllw %xmm1, %xmm2
1264 ; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1265 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,2,3,4,5,6,7]
1266 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1267 ; X32-SSE-NEXT: pand %xmm1, %xmm0
1268 ; X32-SSE-NEXT: retl
1269 %splat = shufflevector <2 x i8> %b, <2 x i8> undef, <2 x i32> zeroinitializer
1270 %shift = shl <2 x i8> %a, %splat
1278 define <2 x i32> @constant_shift_v2i32(<2 x i32> %a) nounwind {
1279 ; SSE2-LABEL: constant_shift_v2i32:
1281 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1282 ; SSE2-NEXT: pslld $4, %xmm1
1283 ; SSE2-NEXT: pslld $5, %xmm0
1284 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1285 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1286 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1289 ; SSE41-LABEL: constant_shift_v2i32:
1291 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1292 ; SSE41-NEXT: pslld $5, %xmm1
1293 ; SSE41-NEXT: pslld $4, %xmm0
1294 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1297 ; AVX1-LABEL: constant_shift_v2i32:
1299 ; AVX1-NEXT: vpslld $5, %xmm0, %xmm1
1300 ; AVX1-NEXT: vpslld $4, %xmm0, %xmm0
1301 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1304 ; AVX2-LABEL: constant_shift_v2i32:
1306 ; AVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
1309 ; XOPAVX1-LABEL: constant_shift_v2i32:
1311 ; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
1312 ; XOPAVX1-NEXT: retq
1314 ; XOPAVX2-LABEL: constant_shift_v2i32:
1316 ; XOPAVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
1317 ; XOPAVX2-NEXT: retq
1319 ; AVX512-LABEL: constant_shift_v2i32:
1321 ; AVX512-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
1324 ; AVX512VL-LABEL: constant_shift_v2i32:
1325 ; AVX512VL: # %bb.0:
1326 ; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
1327 ; AVX512VL-NEXT: retq
1329 ; X32-SSE-LABEL: constant_shift_v2i32:
1331 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1
1332 ; X32-SSE-NEXT: pslld $4, %xmm1
1333 ; X32-SSE-NEXT: pslld $5, %xmm0
1334 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1335 ; X32-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1336 ; X32-SSE-NEXT: movdqa %xmm1, %xmm0
1337 ; X32-SSE-NEXT: retl
1338 %shift = shl <2 x i32> %a, <i32 4, i32 5>
1339 ret <2 x i32> %shift
1342 define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
1343 ; SSE-LABEL: constant_shift_v4i16:
1345 ; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
1348 ; AVX-LABEL: constant_shift_v4i16:
1350 ; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
1353 ; XOP-LABEL: constant_shift_v4i16:
1355 ; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0
1358 ; AVX512DQ-LABEL: constant_shift_v4i16:
1359 ; AVX512DQ: # %bb.0:
1360 ; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
1361 ; AVX512DQ-NEXT: retq
1363 ; AVX512BW-LABEL: constant_shift_v4i16:
1364 ; AVX512BW: # %bb.0:
1365 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1366 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = <0,1,2,3,u,u,u,u>
1367 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1368 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1369 ; AVX512BW-NEXT: vzeroupper
1370 ; AVX512BW-NEXT: retq
1372 ; AVX512DQVL-LABEL: constant_shift_v4i16:
1373 ; AVX512DQVL: # %bb.0:
1374 ; AVX512DQVL-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
1375 ; AVX512DQVL-NEXT: retq
1377 ; AVX512BWVL-LABEL: constant_shift_v4i16:
1378 ; AVX512BWVL: # %bb.0:
1379 ; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0
1380 ; AVX512BWVL-NEXT: retq
1382 ; X32-SSE-LABEL: constant_shift_v4i16:
1384 ; X32-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0
1385 ; X32-SSE-NEXT: retl
1386 %shift = shl <4 x i16> %a, <i16 0, i16 1, i16 2, i16 3>
1387 ret <4 x i16> %shift
1390 define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind {
1391 ; SSE2-LABEL: constant_shift_v2i16:
1393 ; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
1396 ; SSE41-LABEL: constant_shift_v2i16:
1398 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1399 ; SSE41-NEXT: psllw $3, %xmm1
1400 ; SSE41-NEXT: psllw $2, %xmm0
1401 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1404 ; AVX-LABEL: constant_shift_v2i16:
1406 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm1
1407 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm0
1408 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1411 ; XOP-LABEL: constant_shift_v2i16:
1413 ; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0
1416 ; AVX512DQ-LABEL: constant_shift_v2i16:
1417 ; AVX512DQ: # %bb.0:
1418 ; AVX512DQ-NEXT: vpsllw $3, %xmm0, %xmm1
1419 ; AVX512DQ-NEXT: vpsllw $2, %xmm0, %xmm0
1420 ; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1421 ; AVX512DQ-NEXT: retq
1423 ; AVX512BW-LABEL: constant_shift_v2i16:
1424 ; AVX512BW: # %bb.0:
1425 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1426 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = <2,3,u,u,u,u,u,u>
1427 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1428 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1429 ; AVX512BW-NEXT: vzeroupper
1430 ; AVX512BW-NEXT: retq
1432 ; AVX512DQVL-LABEL: constant_shift_v2i16:
1433 ; AVX512DQVL: # %bb.0:
1434 ; AVX512DQVL-NEXT: vpsllw $3, %xmm0, %xmm1
1435 ; AVX512DQVL-NEXT: vpsllw $2, %xmm0, %xmm0
1436 ; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1437 ; AVX512DQVL-NEXT: retq
1439 ; AVX512BWVL-LABEL: constant_shift_v2i16:
1440 ; AVX512BWVL: # %bb.0:
1441 ; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0
1442 ; AVX512BWVL-NEXT: retq
1444 ; X32-SSE-LABEL: constant_shift_v2i16:
1446 ; X32-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0
1447 ; X32-SSE-NEXT: retl
1448 %shift = shl <2 x i16> %a, <i16 2, i16 3>
1449 ret <2 x i16> %shift
1452 define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
1453 ; SSE2-LABEL: constant_shift_v8i8:
1455 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1456 ; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
1457 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
1458 ; SSE2-NEXT: pxor %xmm1, %xmm1
1459 ; SSE2-NEXT: packuswb %xmm1, %xmm0
1462 ; SSE41-LABEL: constant_shift_v8i8:
1464 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1465 ; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm0
1466 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
1467 ; SSE41-NEXT: pxor %xmm1, %xmm1
1468 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1471 ; AVX1-LABEL: constant_shift_v8i8:
1473 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1474 ; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
1475 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1476 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1477 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1480 ; AVX2-LABEL: constant_shift_v8i8:
1482 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1483 ; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
1484 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
1485 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1486 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1487 ; AVX2-NEXT: vzeroupper
1490 ; XOP-LABEL: constant_shift_v8i8:
1492 ; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
1495 ; AVX512DQ-LABEL: constant_shift_v8i8:
1496 ; AVX512DQ: # %bb.0:
1497 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1498 ; AVX512DQ-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1499 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1500 ; AVX512DQ-NEXT: vzeroupper
1501 ; AVX512DQ-NEXT: retq
1503 ; AVX512BW-LABEL: constant_shift_v8i8:
1504 ; AVX512BW: # %bb.0:
1505 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,0,0,0,0,0,0,0]
1506 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1507 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1508 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1509 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1510 ; AVX512BW-NEXT: vzeroupper
1511 ; AVX512BW-NEXT: retq
1513 ; AVX512DQVL-LABEL: constant_shift_v8i8:
1514 ; AVX512DQVL: # %bb.0:
1515 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1516 ; AVX512DQVL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1517 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1518 ; AVX512DQVL-NEXT: vzeroupper
1519 ; AVX512DQVL-NEXT: retq
1521 ; AVX512BWVL-LABEL: constant_shift_v8i8:
1522 ; AVX512BWVL: # %bb.0:
1523 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1524 ; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
1525 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1526 ; AVX512BWVL-NEXT: vzeroupper
1527 ; AVX512BWVL-NEXT: retq
1529 ; X32-SSE-LABEL: constant_shift_v8i8:
1531 ; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1532 ; X32-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0
1533 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
1534 ; X32-SSE-NEXT: pxor %xmm1, %xmm1
1535 ; X32-SSE-NEXT: packuswb %xmm1, %xmm0
1536 ; X32-SSE-NEXT: retl
1537 %shift = shl <8 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
1541 define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
1542 ; SSE2-LABEL: constant_shift_v4i8:
1544 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1545 ; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
1546 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
1547 ; SSE2-NEXT: pxor %xmm1, %xmm1
1548 ; SSE2-NEXT: packuswb %xmm1, %xmm0
1551 ; SSE41-LABEL: constant_shift_v4i8:
1553 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1554 ; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm0
1555 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
1556 ; SSE41-NEXT: pxor %xmm1, %xmm1
1557 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1560 ; AVX1-LABEL: constant_shift_v4i8:
1562 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1563 ; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
1564 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1565 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1566 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1569 ; AVX2-LABEL: constant_shift_v4i8:
1571 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1572 ; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
1573 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
1574 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1575 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1576 ; AVX2-NEXT: vzeroupper
1579 ; XOP-LABEL: constant_shift_v4i8:
1581 ; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
1584 ; AVX512DQ-LABEL: constant_shift_v4i8:
1585 ; AVX512DQ: # %bb.0:
1586 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1587 ; AVX512DQ-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1588 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1589 ; AVX512DQ-NEXT: vzeroupper
1590 ; AVX512DQ-NEXT: retq
1592 ; AVX512BW-LABEL: constant_shift_v4i8:
1593 ; AVX512BW: # %bb.0:
1594 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,0,0,0,0,0,0,0,0,0,0,0,0]
1595 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1596 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1597 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1598 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1599 ; AVX512BW-NEXT: vzeroupper
1600 ; AVX512BW-NEXT: retq
1602 ; AVX512DQVL-LABEL: constant_shift_v4i8:
1603 ; AVX512DQVL: # %bb.0:
1604 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1605 ; AVX512DQVL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1606 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1607 ; AVX512DQVL-NEXT: vzeroupper
1608 ; AVX512DQVL-NEXT: retq
1610 ; AVX512BWVL-LABEL: constant_shift_v4i8:
1611 ; AVX512BWVL: # %bb.0:
1612 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1613 ; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
1614 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1615 ; AVX512BWVL-NEXT: vzeroupper
1616 ; AVX512BWVL-NEXT: retq
1618 ; X32-SSE-LABEL: constant_shift_v4i8:
1620 ; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1621 ; X32-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0
1622 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
1623 ; X32-SSE-NEXT: pxor %xmm1, %xmm1
1624 ; X32-SSE-NEXT: packuswb %xmm1, %xmm0
1625 ; X32-SSE-NEXT: retl
1626 %shift = shl <4 x i8> %a, <i8 0, i8 1, i8 2, i8 3>
1630 define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
1631 ; SSE2-LABEL: constant_shift_v2i8:
1633 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1634 ; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
1635 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
1636 ; SSE2-NEXT: pxor %xmm1, %xmm1
1637 ; SSE2-NEXT: packuswb %xmm1, %xmm0
1640 ; SSE41-LABEL: constant_shift_v2i8:
1642 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1643 ; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm0
1644 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
1645 ; SSE41-NEXT: pxor %xmm1, %xmm1
1646 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1649 ; AVX1-LABEL: constant_shift_v2i8:
1651 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1652 ; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
1653 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1654 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1655 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1658 ; AVX2-LABEL: constant_shift_v2i8:
1660 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1661 ; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
1662 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
1663 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1664 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1665 ; AVX2-NEXT: vzeroupper
1668 ; XOP-LABEL: constant_shift_v2i8:
1670 ; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
1673 ; AVX512DQ-LABEL: constant_shift_v2i8:
1674 ; AVX512DQ: # %bb.0:
1675 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1676 ; AVX512DQ-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1677 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1678 ; AVX512DQ-NEXT: vzeroupper
1679 ; AVX512DQ-NEXT: retq
1681 ; AVX512BW-LABEL: constant_shift_v2i8:
1682 ; AVX512BW: # %bb.0:
1683 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1684 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1685 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1686 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1687 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1688 ; AVX512BW-NEXT: vzeroupper
1689 ; AVX512BW-NEXT: retq
1691 ; AVX512DQVL-LABEL: constant_shift_v2i8:
1692 ; AVX512DQVL: # %bb.0:
1693 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1694 ; AVX512DQVL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1695 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1696 ; AVX512DQVL-NEXT: vzeroupper
1697 ; AVX512DQVL-NEXT: retq
1699 ; AVX512BWVL-LABEL: constant_shift_v2i8:
1700 ; AVX512BWVL: # %bb.0:
1701 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1702 ; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
1703 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1704 ; AVX512BWVL-NEXT: vzeroupper
1705 ; AVX512BWVL-NEXT: retq
1707 ; X32-SSE-LABEL: constant_shift_v2i8:
1709 ; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1710 ; X32-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0
1711 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
1712 ; X32-SSE-NEXT: pxor %xmm1, %xmm1
1713 ; X32-SSE-NEXT: packuswb %xmm1, %xmm0
1714 ; X32-SSE-NEXT: retl
1715 %shift = shl <2 x i8> %a, <i8 2, i8 3>
1720 ; Uniform Constant Shifts
1723 define <2 x i32> @splatconstant_shift_v2i32(<2 x i32> %a) nounwind {
1724 ; SSE-LABEL: splatconstant_shift_v2i32:
1726 ; SSE-NEXT: pslld $5, %xmm0
1729 ; AVX-LABEL: splatconstant_shift_v2i32:
1731 ; AVX-NEXT: vpslld $5, %xmm0, %xmm0
1734 ; XOP-LABEL: splatconstant_shift_v2i32:
1736 ; XOP-NEXT: vpslld $5, %xmm0, %xmm0
1739 ; AVX512-LABEL: splatconstant_shift_v2i32:
1741 ; AVX512-NEXT: vpslld $5, %xmm0, %xmm0
1744 ; AVX512VL-LABEL: splatconstant_shift_v2i32:
1745 ; AVX512VL: # %bb.0:
1746 ; AVX512VL-NEXT: vpslld $5, %xmm0, %xmm0
1747 ; AVX512VL-NEXT: retq
1749 ; X32-SSE-LABEL: splatconstant_shift_v2i32:
1751 ; X32-SSE-NEXT: pslld $5, %xmm0
1752 ; X32-SSE-NEXT: retl
1753 %shift = shl <2 x i32> %a, <i32 5, i32 5>
1754 ret <2 x i32> %shift
1757 define <4 x i16> @splatconstant_shift_v4i16(<4 x i16> %a) nounwind {
1758 ; SSE-LABEL: splatconstant_shift_v4i16:
1760 ; SSE-NEXT: psllw $3, %xmm0
1763 ; AVX-LABEL: splatconstant_shift_v4i16:
1765 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1768 ; XOP-LABEL: splatconstant_shift_v4i16:
1770 ; XOP-NEXT: vpsllw $3, %xmm0, %xmm0
1773 ; AVX512-LABEL: splatconstant_shift_v4i16:
1775 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1778 ; AVX512VL-LABEL: splatconstant_shift_v4i16:
1779 ; AVX512VL: # %bb.0:
1780 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1781 ; AVX512VL-NEXT: retq
1783 ; X32-SSE-LABEL: splatconstant_shift_v4i16:
1785 ; X32-SSE-NEXT: psllw $3, %xmm0
1786 ; X32-SSE-NEXT: retl
1787 %shift = shl <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
1788 ret <4 x i16> %shift
1791 define <2 x i16> @splatconstant_shift_v2i16(<2 x i16> %a) nounwind {
1792 ; SSE-LABEL: splatconstant_shift_v2i16:
1794 ; SSE-NEXT: psllw $3, %xmm0
1797 ; AVX-LABEL: splatconstant_shift_v2i16:
1799 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1802 ; XOP-LABEL: splatconstant_shift_v2i16:
1804 ; XOP-NEXT: vpsllw $3, %xmm0, %xmm0
1807 ; AVX512-LABEL: splatconstant_shift_v2i16:
1809 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1812 ; AVX512VL-LABEL: splatconstant_shift_v2i16:
1813 ; AVX512VL: # %bb.0:
1814 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1815 ; AVX512VL-NEXT: retq
1817 ; X32-SSE-LABEL: splatconstant_shift_v2i16:
1819 ; X32-SSE-NEXT: psllw $3, %xmm0
1820 ; X32-SSE-NEXT: retl
1821 %shift = shl <2 x i16> %a, <i16 3, i16 3>
1822 ret <2 x i16> %shift
1825 define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
1826 ; SSE-LABEL: splatconstant_shift_v8i8:
1828 ; SSE-NEXT: psllw $3, %xmm0
1829 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0
1832 ; AVX-LABEL: splatconstant_shift_v8i8:
1834 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1835 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1838 ; XOP-LABEL: splatconstant_shift_v8i8:
1840 ; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
1843 ; AVX512-LABEL: splatconstant_shift_v8i8:
1845 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1846 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1849 ; AVX512VL-LABEL: splatconstant_shift_v8i8:
1850 ; AVX512VL: # %bb.0:
1851 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1852 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1853 ; AVX512VL-NEXT: retq
1855 ; X32-SSE-LABEL: splatconstant_shift_v8i8:
1857 ; X32-SSE-NEXT: psllw $3, %xmm0
1858 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
1859 ; X32-SSE-NEXT: retl
1860 %shift = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
1864 define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
1865 ; SSE-LABEL: splatconstant_shift_v4i8:
1867 ; SSE-NEXT: psllw $3, %xmm0
1868 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0
1871 ; AVX-LABEL: splatconstant_shift_v4i8:
1873 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1874 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1877 ; XOP-LABEL: splatconstant_shift_v4i8:
1879 ; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
1882 ; AVX512-LABEL: splatconstant_shift_v4i8:
1884 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1885 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1888 ; AVX512VL-LABEL: splatconstant_shift_v4i8:
1889 ; AVX512VL: # %bb.0:
1890 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1891 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1892 ; AVX512VL-NEXT: retq
1894 ; X32-SSE-LABEL: splatconstant_shift_v4i8:
1896 ; X32-SSE-NEXT: psllw $3, %xmm0
1897 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
1898 ; X32-SSE-NEXT: retl
1899 %shift = shl <4 x i8> %a, <i8 3, i8 3, i8 3, i8 3>
1903 define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
1904 ; SSE-LABEL: splatconstant_shift_v2i8:
1906 ; SSE-NEXT: psllw $3, %xmm0
1907 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0
1910 ; AVX-LABEL: splatconstant_shift_v2i8:
1912 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1913 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1916 ; XOP-LABEL: splatconstant_shift_v2i8:
1918 ; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
1921 ; AVX512-LABEL: splatconstant_shift_v2i8:
1923 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1924 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1927 ; AVX512VL-LABEL: splatconstant_shift_v2i8:
1928 ; AVX512VL: # %bb.0:
1929 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1930 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1931 ; AVX512VL-NEXT: retq
1933 ; X32-SSE-LABEL: splatconstant_shift_v2i8:
1935 ; X32-SSE-NEXT: psllw $3, %xmm0
1936 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
1937 ; X32-SSE-NEXT: retl
1938 %shift = shl <2 x i8> %a, <i8 3, i8 3>