1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512DQVL
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512BWVL
13 ; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
14 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE2
20 define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
21 ; SSE2-LABEL: var_shift_v2i64:
23 ; SSE2-NEXT: movdqa %xmm0, %xmm2
24 ; SSE2-NEXT: psllq %xmm1, %xmm2
25 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
26 ; SSE2-NEXT: psllq %xmm1, %xmm0
27 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
30 ; SSE41-LABEL: var_shift_v2i64:
32 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33 ; SSE41-NEXT: psllq %xmm1, %xmm2
34 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
35 ; SSE41-NEXT: psllq %xmm1, %xmm0
36 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
39 ; AVX1-LABEL: var_shift_v2i64:
41 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm2
42 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
43 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
44 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
47 ; AVX2-LABEL: var_shift_v2i64:
49 ; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
52 ; XOPAVX1-LABEL: var_shift_v2i64:
54 ; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0
57 ; XOPAVX2-LABEL: var_shift_v2i64:
59 ; XOPAVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
62 ; AVX512-LABEL: var_shift_v2i64:
64 ; AVX512-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
67 ; AVX512VL-LABEL: var_shift_v2i64:
69 ; AVX512VL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
72 ; X32-SSE-LABEL: var_shift_v2i64:
74 ; X32-SSE-NEXT: movdqa %xmm0, %xmm2
75 ; X32-SSE-NEXT: psllq %xmm1, %xmm2
76 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
77 ; X32-SSE-NEXT: psllq %xmm1, %xmm0
78 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
80 %shift = shl <2 x i64> %a, %b
84 define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
85 ; SSE2-LABEL: var_shift_v4i32:
87 ; SSE2-NEXT: pslld $23, %xmm1
88 ; SSE2-NEXT: paddd {{.*}}(%rip), %xmm1
89 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
90 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
91 ; SSE2-NEXT: pmuludq %xmm1, %xmm0
92 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
93 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
94 ; SSE2-NEXT: pmuludq %xmm2, %xmm1
95 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
96 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
99 ; SSE41-LABEL: var_shift_v4i32:
101 ; SSE41-NEXT: pslld $23, %xmm1
102 ; SSE41-NEXT: paddd {{.*}}(%rip), %xmm1
103 ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
104 ; SSE41-NEXT: pmulld %xmm1, %xmm0
107 ; AVX1-LABEL: var_shift_v4i32:
109 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
110 ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm1
111 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
112 ; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0
115 ; AVX2-LABEL: var_shift_v4i32:
117 ; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
120 ; XOPAVX1-LABEL: var_shift_v4i32:
122 ; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0
125 ; XOPAVX2-LABEL: var_shift_v4i32:
127 ; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
130 ; AVX512-LABEL: var_shift_v4i32:
132 ; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
135 ; AVX512VL-LABEL: var_shift_v4i32:
137 ; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
138 ; AVX512VL-NEXT: retq
140 ; X32-SSE-LABEL: var_shift_v4i32:
142 ; X32-SSE-NEXT: pslld $23, %xmm1
143 ; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm1
144 ; X32-SSE-NEXT: cvttps2dq %xmm1, %xmm1
145 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
146 ; X32-SSE-NEXT: pmuludq %xmm1, %xmm0
147 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
148 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
149 ; X32-SSE-NEXT: pmuludq %xmm2, %xmm1
150 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
151 ; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
153 %shift = shl <4 x i32> %a, %b
157 define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
158 ; SSE2-LABEL: var_shift_v8i16:
160 ; SSE2-NEXT: movdqa %xmm1, %xmm2
161 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
162 ; SSE2-NEXT: pslld $23, %xmm2
163 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
164 ; SSE2-NEXT: paddd %xmm3, %xmm2
165 ; SSE2-NEXT: cvttps2dq %xmm2, %xmm2
166 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
167 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
168 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
169 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
170 ; SSE2-NEXT: pslld $23, %xmm1
171 ; SSE2-NEXT: paddd %xmm3, %xmm1
172 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
173 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
174 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
175 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
176 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
177 ; SSE2-NEXT: pmullw %xmm1, %xmm0
180 ; SSE41-LABEL: var_shift_v8i16:
182 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
183 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
184 ; SSE41-NEXT: pslld $23, %xmm1
185 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
186 ; SSE41-NEXT: paddd %xmm3, %xmm1
187 ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
188 ; SSE41-NEXT: pslld $23, %xmm2
189 ; SSE41-NEXT: paddd %xmm3, %xmm2
190 ; SSE41-NEXT: cvttps2dq %xmm2, %xmm2
191 ; SSE41-NEXT: packusdw %xmm1, %xmm2
192 ; SSE41-NEXT: pmullw %xmm2, %xmm0
195 ; AVX1-LABEL: var_shift_v8i16:
197 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
198 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
199 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
200 ; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
201 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
202 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
203 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
204 ; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
205 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
206 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
207 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
210 ; AVX2-LABEL: var_shift_v8i16:
212 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
213 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
214 ; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
215 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
216 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
217 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
218 ; AVX2-NEXT: vzeroupper
221 ; XOP-LABEL: var_shift_v8i16:
223 ; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0
226 ; AVX512DQ-LABEL: var_shift_v8i16:
228 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
229 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
230 ; AVX512DQ-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
231 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
232 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
233 ; AVX512DQ-NEXT: vzeroupper
234 ; AVX512DQ-NEXT: retq
236 ; AVX512BW-LABEL: var_shift_v8i16:
238 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
239 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
240 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
241 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
242 ; AVX512BW-NEXT: vzeroupper
243 ; AVX512BW-NEXT: retq
245 ; AVX512DQVL-LABEL: var_shift_v8i16:
246 ; AVX512DQVL: # %bb.0:
247 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
248 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
249 ; AVX512DQVL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
250 ; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
251 ; AVX512DQVL-NEXT: vzeroupper
252 ; AVX512DQVL-NEXT: retq
254 ; AVX512BWVL-LABEL: var_shift_v8i16:
255 ; AVX512BWVL: # %bb.0:
256 ; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
257 ; AVX512BWVL-NEXT: retq
259 ; X32-SSE-LABEL: var_shift_v8i16:
261 ; X32-SSE-NEXT: movdqa %xmm1, %xmm2
262 ; X32-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
263 ; X32-SSE-NEXT: pslld $23, %xmm2
264 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
265 ; X32-SSE-NEXT: paddd %xmm3, %xmm2
266 ; X32-SSE-NEXT: cvttps2dq %xmm2, %xmm2
267 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
268 ; X32-SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
269 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
270 ; X32-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
271 ; X32-SSE-NEXT: pslld $23, %xmm1
272 ; X32-SSE-NEXT: paddd %xmm3, %xmm1
273 ; X32-SSE-NEXT: cvttps2dq %xmm1, %xmm1
274 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
275 ; X32-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
276 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
277 ; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
278 ; X32-SSE-NEXT: pmullw %xmm1, %xmm0
280 %shift = shl <8 x i16> %a, %b
284 define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
285 ; SSE2-LABEL: var_shift_v16i8:
287 ; SSE2-NEXT: psllw $5, %xmm1
288 ; SSE2-NEXT: pxor %xmm2, %xmm2
289 ; SSE2-NEXT: pxor %xmm3, %xmm3
290 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
291 ; SSE2-NEXT: movdqa %xmm3, %xmm4
292 ; SSE2-NEXT: pandn %xmm0, %xmm4
293 ; SSE2-NEXT: psllw $4, %xmm0
294 ; SSE2-NEXT: pand %xmm3, %xmm0
295 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
296 ; SSE2-NEXT: por %xmm4, %xmm0
297 ; SSE2-NEXT: paddb %xmm1, %xmm1
298 ; SSE2-NEXT: pxor %xmm3, %xmm3
299 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
300 ; SSE2-NEXT: movdqa %xmm3, %xmm4
301 ; SSE2-NEXT: pandn %xmm0, %xmm4
302 ; SSE2-NEXT: psllw $2, %xmm0
303 ; SSE2-NEXT: pand %xmm3, %xmm0
304 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
305 ; SSE2-NEXT: por %xmm4, %xmm0
306 ; SSE2-NEXT: paddb %xmm1, %xmm1
307 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
308 ; SSE2-NEXT: movdqa %xmm2, %xmm1
309 ; SSE2-NEXT: pandn %xmm0, %xmm1
310 ; SSE2-NEXT: paddb %xmm0, %xmm0
311 ; SSE2-NEXT: pand %xmm2, %xmm0
312 ; SSE2-NEXT: por %xmm1, %xmm0
315 ; SSE41-LABEL: var_shift_v16i8:
317 ; SSE41-NEXT: movdqa %xmm0, %xmm2
318 ; SSE41-NEXT: psllw $5, %xmm1
319 ; SSE41-NEXT: movdqa %xmm0, %xmm3
320 ; SSE41-NEXT: psllw $4, %xmm3
321 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
322 ; SSE41-NEXT: movdqa %xmm1, %xmm0
323 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
324 ; SSE41-NEXT: movdqa %xmm2, %xmm3
325 ; SSE41-NEXT: psllw $2, %xmm3
326 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
327 ; SSE41-NEXT: paddb %xmm1, %xmm1
328 ; SSE41-NEXT: movdqa %xmm1, %xmm0
329 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
330 ; SSE41-NEXT: movdqa %xmm2, %xmm3
331 ; SSE41-NEXT: paddb %xmm2, %xmm3
332 ; SSE41-NEXT: paddb %xmm1, %xmm1
333 ; SSE41-NEXT: movdqa %xmm1, %xmm0
334 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
335 ; SSE41-NEXT: movdqa %xmm2, %xmm0
338 ; AVX-LABEL: var_shift_v16i8:
340 ; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
341 ; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
342 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
343 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
344 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
345 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
346 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
347 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
348 ; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
349 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
350 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
353 ; XOP-LABEL: var_shift_v16i8:
355 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
358 ; AVX512DQ-LABEL: var_shift_v16i8:
360 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
361 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
362 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
363 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
364 ; AVX512DQ-NEXT: vzeroupper
365 ; AVX512DQ-NEXT: retq
367 ; AVX512BW-LABEL: var_shift_v16i8:
369 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
370 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
371 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
372 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
373 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
374 ; AVX512BW-NEXT: vzeroupper
375 ; AVX512BW-NEXT: retq
377 ; AVX512DQVL-LABEL: var_shift_v16i8:
378 ; AVX512DQVL: # %bb.0:
379 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
380 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
381 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
382 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
383 ; AVX512DQVL-NEXT: vzeroupper
384 ; AVX512DQVL-NEXT: retq
386 ; AVX512BWVL-LABEL: var_shift_v16i8:
387 ; AVX512BWVL: # %bb.0:
388 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
389 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
390 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
391 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
392 ; AVX512BWVL-NEXT: vzeroupper
393 ; AVX512BWVL-NEXT: retq
395 ; X32-SSE-LABEL: var_shift_v16i8:
397 ; X32-SSE-NEXT: psllw $5, %xmm1
398 ; X32-SSE-NEXT: pxor %xmm2, %xmm2
399 ; X32-SSE-NEXT: pxor %xmm3, %xmm3
400 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3
401 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4
402 ; X32-SSE-NEXT: pandn %xmm0, %xmm4
403 ; X32-SSE-NEXT: psllw $4, %xmm0
404 ; X32-SSE-NEXT: pand %xmm3, %xmm0
405 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
406 ; X32-SSE-NEXT: por %xmm4, %xmm0
407 ; X32-SSE-NEXT: paddb %xmm1, %xmm1
408 ; X32-SSE-NEXT: pxor %xmm3, %xmm3
409 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3
410 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4
411 ; X32-SSE-NEXT: pandn %xmm0, %xmm4
412 ; X32-SSE-NEXT: psllw $2, %xmm0
413 ; X32-SSE-NEXT: pand %xmm3, %xmm0
414 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
415 ; X32-SSE-NEXT: por %xmm4, %xmm0
416 ; X32-SSE-NEXT: paddb %xmm1, %xmm1
417 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm2
418 ; X32-SSE-NEXT: movdqa %xmm2, %xmm1
419 ; X32-SSE-NEXT: pandn %xmm0, %xmm1
420 ; X32-SSE-NEXT: paddb %xmm0, %xmm0
421 ; X32-SSE-NEXT: pand %xmm2, %xmm0
422 ; X32-SSE-NEXT: por %xmm1, %xmm0
424 %shift = shl <16 x i8> %a, %b
429 ; Uniform Variable Shifts
432 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
433 ; SSE-LABEL: splatvar_shift_v2i64:
435 ; SSE-NEXT: psllq %xmm1, %xmm0
438 ; AVX-LABEL: splatvar_shift_v2i64:
440 ; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
443 ; XOP-LABEL: splatvar_shift_v2i64:
445 ; XOP-NEXT: vpsllq %xmm1, %xmm0, %xmm0
448 ; AVX512-LABEL: splatvar_shift_v2i64:
450 ; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0
453 ; AVX512VL-LABEL: splatvar_shift_v2i64:
455 ; AVX512VL-NEXT: vpsllq %xmm1, %xmm0, %xmm0
456 ; AVX512VL-NEXT: retq
458 ; X32-SSE-LABEL: splatvar_shift_v2i64:
460 ; X32-SSE-NEXT: psllq %xmm1, %xmm0
462 %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
463 %shift = shl <2 x i64> %a, %splat
467 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
468 ; SSE2-LABEL: splatvar_shift_v4i32:
470 ; SSE2-NEXT: xorps %xmm2, %xmm2
471 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
472 ; SSE2-NEXT: pslld %xmm2, %xmm0
475 ; SSE41-LABEL: splatvar_shift_v4i32:
477 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
478 ; SSE41-NEXT: pslld %xmm1, %xmm0
481 ; AVX-LABEL: splatvar_shift_v4i32:
483 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
484 ; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
487 ; XOP-LABEL: splatvar_shift_v4i32:
489 ; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
490 ; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
493 ; AVX512-LABEL: splatvar_shift_v4i32:
495 ; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
496 ; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
499 ; AVX512VL-LABEL: splatvar_shift_v4i32:
501 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
502 ; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
503 ; AVX512VL-NEXT: retq
505 ; X32-SSE-LABEL: splatvar_shift_v4i32:
507 ; X32-SSE-NEXT: xorps %xmm2, %xmm2
508 ; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
509 ; X32-SSE-NEXT: pslld %xmm2, %xmm0
511 %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
512 %shift = shl <4 x i32> %a, %splat
516 define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
517 ; SSE2-LABEL: splatvar_shift_v8i16:
519 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
520 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
521 ; SSE2-NEXT: psllw %xmm1, %xmm0
524 ; SSE41-LABEL: splatvar_shift_v8i16:
526 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
527 ; SSE41-NEXT: psllw %xmm1, %xmm0
530 ; AVX-LABEL: splatvar_shift_v8i16:
532 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
533 ; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
536 ; XOP-LABEL: splatvar_shift_v8i16:
538 ; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
539 ; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
542 ; AVX512-LABEL: splatvar_shift_v8i16:
544 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
545 ; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
548 ; AVX512VL-LABEL: splatvar_shift_v8i16:
550 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
551 ; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
552 ; AVX512VL-NEXT: retq
554 ; X32-SSE-LABEL: splatvar_shift_v8i16:
556 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
557 ; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
558 ; X32-SSE-NEXT: psllw %xmm1, %xmm0
560 %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
561 %shift = shl <8 x i16> %a, %splat
565 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
566 ; SSE2-LABEL: splatvar_shift_v16i8:
568 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
569 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
570 ; SSE2-NEXT: psllw %xmm1, %xmm0
571 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
572 ; SSE2-NEXT: psllw %xmm1, %xmm2
573 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
574 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,2,3,4,5,6,7]
575 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
576 ; SSE2-NEXT: pand %xmm1, %xmm0
579 ; SSE41-LABEL: splatvar_shift_v16i8:
581 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
582 ; SSE41-NEXT: psllw %xmm1, %xmm0
583 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
584 ; SSE41-NEXT: psllw %xmm1, %xmm2
585 ; SSE41-NEXT: pxor %xmm1, %xmm1
586 ; SSE41-NEXT: pshufb %xmm1, %xmm2
587 ; SSE41-NEXT: pand %xmm2, %xmm0
590 ; AVX1-LABEL: splatvar_shift_v16i8:
592 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
593 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
594 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
595 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1
596 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
597 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
598 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
601 ; AVX2-LABEL: splatvar_shift_v16i8:
603 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
604 ; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0
605 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
606 ; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
607 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
608 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
611 ; XOPAVX1-LABEL: splatvar_shift_v16i8:
613 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
614 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
615 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
618 ; XOPAVX2-LABEL: splatvar_shift_v16i8:
620 ; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
621 ; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
624 ; AVX512DQ-LABEL: splatvar_shift_v16i8:
626 ; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
627 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
628 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
629 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
630 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
631 ; AVX512DQ-NEXT: vzeroupper
632 ; AVX512DQ-NEXT: retq
634 ; AVX512BW-LABEL: splatvar_shift_v16i8:
636 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
637 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
638 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
639 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
640 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
641 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
642 ; AVX512BW-NEXT: vzeroupper
643 ; AVX512BW-NEXT: retq
645 ; AVX512DQVL-LABEL: splatvar_shift_v16i8:
646 ; AVX512DQVL: # %bb.0:
647 ; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
648 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
649 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
650 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
651 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
652 ; AVX512DQVL-NEXT: vzeroupper
653 ; AVX512DQVL-NEXT: retq
655 ; AVX512BWVL-LABEL: splatvar_shift_v16i8:
656 ; AVX512BWVL: # %bb.0:
657 ; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
658 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
659 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
660 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
661 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
662 ; AVX512BWVL-NEXT: vzeroupper
663 ; AVX512BWVL-NEXT: retq
665 ; X32-SSE-LABEL: splatvar_shift_v16i8:
667 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
668 ; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
669 ; X32-SSE-NEXT: psllw %xmm1, %xmm0
670 ; X32-SSE-NEXT: pcmpeqd %xmm2, %xmm2
671 ; X32-SSE-NEXT: psllw %xmm1, %xmm2
672 ; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
673 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,2,3,4,5,6,7]
674 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
675 ; X32-SSE-NEXT: pand %xmm1, %xmm0
677 %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
678 %shift = shl <16 x i8> %a, %splat
686 define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
687 ; SSE2-LABEL: constant_shift_v2i64:
689 ; SSE2-NEXT: movdqa %xmm0, %xmm1
690 ; SSE2-NEXT: psllq $1, %xmm1
691 ; SSE2-NEXT: psllq $7, %xmm0
692 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
695 ; SSE41-LABEL: constant_shift_v2i64:
697 ; SSE41-NEXT: movdqa %xmm0, %xmm1
698 ; SSE41-NEXT: psllq $7, %xmm1
699 ; SSE41-NEXT: psllq $1, %xmm0
700 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
703 ; AVX1-LABEL: constant_shift_v2i64:
705 ; AVX1-NEXT: vpsllq $7, %xmm0, %xmm1
706 ; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0
707 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
710 ; AVX2-LABEL: constant_shift_v2i64:
712 ; AVX2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
715 ; XOPAVX1-LABEL: constant_shift_v2i64:
717 ; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm0
720 ; XOPAVX2-LABEL: constant_shift_v2i64:
722 ; XOPAVX2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
725 ; AVX512-LABEL: constant_shift_v2i64:
727 ; AVX512-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
730 ; AVX512VL-LABEL: constant_shift_v2i64:
732 ; AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
733 ; AVX512VL-NEXT: retq
735 ; X32-SSE-LABEL: constant_shift_v2i64:
737 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1
738 ; X32-SSE-NEXT: psllq $1, %xmm1
739 ; X32-SSE-NEXT: psllq $7, %xmm0
740 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
742 %shift = shl <2 x i64> %a, <i64 1, i64 7>
746 define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
747 ; SSE2-LABEL: constant_shift_v4i32:
749 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,32,64,128]
750 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
751 ; SSE2-NEXT: pmuludq %xmm1, %xmm0
752 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
753 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
754 ; SSE2-NEXT: pmuludq %xmm2, %xmm1
755 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
756 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
759 ; SSE41-LABEL: constant_shift_v4i32:
761 ; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
764 ; AVX1-LABEL: constant_shift_v4i32:
766 ; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
769 ; AVX2-LABEL: constant_shift_v4i32:
771 ; AVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
774 ; XOPAVX1-LABEL: constant_shift_v4i32:
776 ; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
779 ; XOPAVX2-LABEL: constant_shift_v4i32:
781 ; XOPAVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
784 ; AVX512-LABEL: constant_shift_v4i32:
786 ; AVX512-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
789 ; AVX512VL-LABEL: constant_shift_v4i32:
791 ; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
792 ; AVX512VL-NEXT: retq
794 ; X32-SSE-LABEL: constant_shift_v4i32:
796 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,32,64,128]
797 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
798 ; X32-SSE-NEXT: pmuludq %xmm1, %xmm0
799 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
800 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
801 ; X32-SSE-NEXT: pmuludq %xmm2, %xmm1
802 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
803 ; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
805 %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
809 define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
810 ; SSE-LABEL: constant_shift_v8i16:
812 ; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
815 ; AVX-LABEL: constant_shift_v8i16:
817 ; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
820 ; XOP-LABEL: constant_shift_v8i16:
822 ; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0
825 ; AVX512DQ-LABEL: constant_shift_v8i16:
827 ; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
828 ; AVX512DQ-NEXT: retq
830 ; AVX512BW-LABEL: constant_shift_v8i16:
832 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
833 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
834 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
835 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
836 ; AVX512BW-NEXT: vzeroupper
837 ; AVX512BW-NEXT: retq
839 ; AVX512DQVL-LABEL: constant_shift_v8i16:
840 ; AVX512DQVL: # %bb.0:
841 ; AVX512DQVL-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
842 ; AVX512DQVL-NEXT: retq
844 ; AVX512BWVL-LABEL: constant_shift_v8i16:
845 ; AVX512BWVL: # %bb.0:
846 ; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0
847 ; AVX512BWVL-NEXT: retq
849 ; X32-SSE-LABEL: constant_shift_v8i16:
851 ; X32-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0
853 %shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
857 define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
858 ; SSE2-LABEL: constant_shift_v16i8:
860 ; SSE2-NEXT: movdqa %xmm0, %xmm1
861 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
862 ; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm1
863 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
864 ; SSE2-NEXT: pand %xmm2, %xmm1
865 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
866 ; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
867 ; SSE2-NEXT: pand %xmm2, %xmm0
868 ; SSE2-NEXT: packuswb %xmm1, %xmm0
871 ; SSE41-LABEL: constant_shift_v16i8:
873 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
874 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
875 ; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm0
876 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
877 ; SSE41-NEXT: pand %xmm2, %xmm0
878 ; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm1
879 ; SSE41-NEXT: pand %xmm2, %xmm1
880 ; SSE41-NEXT: packuswb %xmm0, %xmm1
881 ; SSE41-NEXT: movdqa %xmm1, %xmm0
884 ; AVX1-LABEL: constant_shift_v16i8:
886 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
887 ; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
888 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
889 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
890 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
891 ; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
892 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
893 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
896 ; AVX2-LABEL: constant_shift_v16i8:
898 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
899 ; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
900 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
901 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
902 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
903 ; AVX2-NEXT: vzeroupper
906 ; XOP-LABEL: constant_shift_v16i8:
908 ; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
911 ; AVX512DQ-LABEL: constant_shift_v16i8:
913 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
914 ; AVX512DQ-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
915 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
916 ; AVX512DQ-NEXT: vzeroupper
917 ; AVX512DQ-NEXT: retq
919 ; AVX512BW-LABEL: constant_shift_v16i8:
921 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
922 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
923 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
924 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
925 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
926 ; AVX512BW-NEXT: vzeroupper
927 ; AVX512BW-NEXT: retq
929 ; AVX512DQVL-LABEL: constant_shift_v16i8:
930 ; AVX512DQVL: # %bb.0:
931 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
932 ; AVX512DQVL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
933 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
934 ; AVX512DQVL-NEXT: vzeroupper
935 ; AVX512DQVL-NEXT: retq
937 ; AVX512BWVL-LABEL: constant_shift_v16i8:
938 ; AVX512BWVL: # %bb.0:
939 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
940 ; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
941 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
942 ; AVX512BWVL-NEXT: vzeroupper
943 ; AVX512BWVL-NEXT: retq
945 ; X32-SSE-LABEL: constant_shift_v16i8:
947 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1
948 ; X32-SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
949 ; X32-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm1
950 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
951 ; X32-SSE-NEXT: pand %xmm2, %xmm1
952 ; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
953 ; X32-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0
954 ; X32-SSE-NEXT: pand %xmm2, %xmm0
955 ; X32-SSE-NEXT: packuswb %xmm1, %xmm0
957 %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
962 ; Uniform Constant Shifts
965 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind {
966 ; SSE-LABEL: splatconstant_shift_v2i64:
968 ; SSE-NEXT: psllq $7, %xmm0
971 ; AVX-LABEL: splatconstant_shift_v2i64:
973 ; AVX-NEXT: vpsllq $7, %xmm0, %xmm0
976 ; XOP-LABEL: splatconstant_shift_v2i64:
978 ; XOP-NEXT: vpsllq $7, %xmm0, %xmm0
981 ; AVX512-LABEL: splatconstant_shift_v2i64:
983 ; AVX512-NEXT: vpsllq $7, %xmm0, %xmm0
986 ; AVX512VL-LABEL: splatconstant_shift_v2i64:
988 ; AVX512VL-NEXT: vpsllq $7, %xmm0, %xmm0
989 ; AVX512VL-NEXT: retq
991 ; X32-SSE-LABEL: splatconstant_shift_v2i64:
993 ; X32-SSE-NEXT: psllq $7, %xmm0
995 %shift = shl <2 x i64> %a, <i64 7, i64 7>
999 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind {
1000 ; SSE-LABEL: splatconstant_shift_v4i32:
1002 ; SSE-NEXT: pslld $5, %xmm0
1005 ; AVX-LABEL: splatconstant_shift_v4i32:
1007 ; AVX-NEXT: vpslld $5, %xmm0, %xmm0
1010 ; XOP-LABEL: splatconstant_shift_v4i32:
1012 ; XOP-NEXT: vpslld $5, %xmm0, %xmm0
1015 ; AVX512-LABEL: splatconstant_shift_v4i32:
1017 ; AVX512-NEXT: vpslld $5, %xmm0, %xmm0
1020 ; AVX512VL-LABEL: splatconstant_shift_v4i32:
1021 ; AVX512VL: # %bb.0:
1022 ; AVX512VL-NEXT: vpslld $5, %xmm0, %xmm0
1023 ; AVX512VL-NEXT: retq
1025 ; X32-SSE-LABEL: splatconstant_shift_v4i32:
1027 ; X32-SSE-NEXT: pslld $5, %xmm0
1028 ; X32-SSE-NEXT: retl
1029 %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
1030 ret <4 x i32> %shift
1033 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind {
1034 ; SSE-LABEL: splatconstant_shift_v8i16:
1036 ; SSE-NEXT: psllw $3, %xmm0
1039 ; AVX-LABEL: splatconstant_shift_v8i16:
1041 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1044 ; XOP-LABEL: splatconstant_shift_v8i16:
1046 ; XOP-NEXT: vpsllw $3, %xmm0, %xmm0
1049 ; AVX512-LABEL: splatconstant_shift_v8i16:
1051 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1054 ; AVX512VL-LABEL: splatconstant_shift_v8i16:
1055 ; AVX512VL: # %bb.0:
1056 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1057 ; AVX512VL-NEXT: retq
1059 ; X32-SSE-LABEL: splatconstant_shift_v8i16:
1061 ; X32-SSE-NEXT: psllw $3, %xmm0
1062 ; X32-SSE-NEXT: retl
1063 %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
1064 ret <8 x i16> %shift
1067 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
1068 ; SSE-LABEL: splatconstant_shift_v16i8:
1070 ; SSE-NEXT: psllw $3, %xmm0
1071 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0
1074 ; AVX-LABEL: splatconstant_shift_v16i8:
1076 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1077 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1080 ; XOP-LABEL: splatconstant_shift_v16i8:
1082 ; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
1085 ; AVX512-LABEL: splatconstant_shift_v16i8:
1087 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1088 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1091 ; AVX512VL-LABEL: splatconstant_shift_v16i8:
1092 ; AVX512VL: # %bb.0:
1093 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1094 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1095 ; AVX512VL-NEXT: retq
1097 ; X32-SSE-LABEL: splatconstant_shift_v16i8:
1099 ; X32-SSE-NEXT: psllw $3, %xmm0
1100 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
1101 ; X32-SSE-NEXT: retl
1102 %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
1103 ret <16 x i8> %shift