1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512DQVL
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
13 ; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
14 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
20 define <2 x i32> @var_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
21 ; SSE2-LABEL: var_shift_v2i32:
23 ; SSE2-NEXT: pslld $23, %xmm1
24 ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
26 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
27 ; SSE2-NEXT: pmuludq %xmm1, %xmm0
28 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
29 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
30 ; SSE2-NEXT: pmuludq %xmm2, %xmm1
31 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
32 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
35 ; SSE41-LABEL: var_shift_v2i32:
37 ; SSE41-NEXT: pslld $23, %xmm1
38 ; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
39 ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
40 ; SSE41-NEXT: pmulld %xmm1, %xmm0
43 ; AVX1-LABEL: var_shift_v2i32:
45 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
46 ; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
47 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
48 ; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0
51 ; AVX2-LABEL: var_shift_v2i32:
53 ; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
56 ; XOPAVX1-LABEL: var_shift_v2i32:
58 ; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0
61 ; XOPAVX2-LABEL: var_shift_v2i32:
63 ; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
66 ; AVX512-LABEL: var_shift_v2i32:
68 ; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
71 ; AVX512VL-LABEL: var_shift_v2i32:
73 ; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
76 ; X86-SSE-LABEL: var_shift_v2i32:
78 ; X86-SSE-NEXT: pslld $23, %xmm1
79 ; X86-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
80 ; X86-SSE-NEXT: cvttps2dq %xmm1, %xmm1
81 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
82 ; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
83 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
84 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
85 ; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
86 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
87 ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
89 %shift = shl <2 x i32> %a, %b
93 define <4 x i16> @var_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
94 ; SSE2-LABEL: var_shift_v4i16:
96 ; SSE2-NEXT: movdqa %xmm1, %xmm2
97 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
98 ; SSE2-NEXT: pslld $23, %xmm2
99 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
100 ; SSE2-NEXT: paddd %xmm3, %xmm2
101 ; SSE2-NEXT: cvttps2dq %xmm2, %xmm2
102 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
103 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
104 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
105 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
106 ; SSE2-NEXT: pslld $23, %xmm1
107 ; SSE2-NEXT: paddd %xmm3, %xmm1
108 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
109 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
110 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
111 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
112 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
113 ; SSE2-NEXT: pmullw %xmm1, %xmm0
116 ; SSE41-LABEL: var_shift_v4i16:
118 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
119 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
120 ; SSE41-NEXT: pslld $23, %xmm1
121 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
122 ; SSE41-NEXT: paddd %xmm3, %xmm1
123 ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
124 ; SSE41-NEXT: pslld $23, %xmm2
125 ; SSE41-NEXT: paddd %xmm3, %xmm2
126 ; SSE41-NEXT: cvttps2dq %xmm2, %xmm2
127 ; SSE41-NEXT: packusdw %xmm1, %xmm2
128 ; SSE41-NEXT: pmullw %xmm2, %xmm0
131 ; AVX1-LABEL: var_shift_v4i16:
133 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4,4,5,5,6,6,7,7]
134 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
135 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
136 ; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
137 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
138 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
139 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
140 ; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
141 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
142 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
143 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
146 ; AVX2-LABEL: var_shift_v4i16:
148 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
149 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
150 ; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
151 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
152 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
153 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
154 ; AVX2-NEXT: vzeroupper
157 ; XOP-LABEL: var_shift_v4i16:
159 ; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0
162 ; AVX512DQ-LABEL: var_shift_v4i16:
164 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
165 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
166 ; AVX512DQ-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
167 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
168 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
169 ; AVX512DQ-NEXT: vzeroupper
170 ; AVX512DQ-NEXT: retq
172 ; AVX512BW-LABEL: var_shift_v4i16:
174 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
175 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
176 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
177 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
178 ; AVX512BW-NEXT: vzeroupper
179 ; AVX512BW-NEXT: retq
181 ; AVX512DQVL-LABEL: var_shift_v4i16:
182 ; AVX512DQVL: # %bb.0:
183 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
184 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
185 ; AVX512DQVL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
186 ; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
187 ; AVX512DQVL-NEXT: vzeroupper
188 ; AVX512DQVL-NEXT: retq
190 ; AVX512BWVL-LABEL: var_shift_v4i16:
191 ; AVX512BWVL: # %bb.0:
192 ; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
193 ; AVX512BWVL-NEXT: retq
195 ; X86-SSE-LABEL: var_shift_v4i16:
197 ; X86-SSE-NEXT: movdqa %xmm1, %xmm2
198 ; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
199 ; X86-SSE-NEXT: pslld $23, %xmm2
200 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
201 ; X86-SSE-NEXT: paddd %xmm3, %xmm2
202 ; X86-SSE-NEXT: cvttps2dq %xmm2, %xmm2
203 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
204 ; X86-SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
205 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
206 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
207 ; X86-SSE-NEXT: pslld $23, %xmm1
208 ; X86-SSE-NEXT: paddd %xmm3, %xmm1
209 ; X86-SSE-NEXT: cvttps2dq %xmm1, %xmm1
210 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
211 ; X86-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
212 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
213 ; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
214 ; X86-SSE-NEXT: pmullw %xmm1, %xmm0
216 %shift = shl <4 x i16> %a, %b
220 define <2 x i16> @var_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
221 ; SSE2-LABEL: var_shift_v2i16:
223 ; SSE2-NEXT: movdqa %xmm1, %xmm2
224 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
225 ; SSE2-NEXT: pslld $23, %xmm2
226 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
227 ; SSE2-NEXT: paddd %xmm3, %xmm2
228 ; SSE2-NEXT: cvttps2dq %xmm2, %xmm2
229 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
230 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
231 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
232 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
233 ; SSE2-NEXT: pslld $23, %xmm1
234 ; SSE2-NEXT: paddd %xmm3, %xmm1
235 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
236 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
237 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
238 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
239 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
240 ; SSE2-NEXT: pmullw %xmm1, %xmm0
243 ; SSE41-LABEL: var_shift_v2i16:
245 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
246 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
247 ; SSE41-NEXT: pslld $23, %xmm1
248 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
249 ; SSE41-NEXT: paddd %xmm3, %xmm1
250 ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
251 ; SSE41-NEXT: pslld $23, %xmm2
252 ; SSE41-NEXT: paddd %xmm3, %xmm2
253 ; SSE41-NEXT: cvttps2dq %xmm2, %xmm2
254 ; SSE41-NEXT: packusdw %xmm1, %xmm2
255 ; SSE41-NEXT: pmullw %xmm2, %xmm0
258 ; AVX1-LABEL: var_shift_v2i16:
260 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4,4,5,5,6,6,7,7]
261 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
262 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
263 ; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
264 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
265 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
266 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
267 ; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
268 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
269 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
270 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
273 ; AVX2-LABEL: var_shift_v2i16:
275 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
276 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
277 ; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
278 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
279 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
280 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
281 ; AVX2-NEXT: vzeroupper
284 ; XOP-LABEL: var_shift_v2i16:
286 ; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0
289 ; AVX512DQ-LABEL: var_shift_v2i16:
291 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
292 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
293 ; AVX512DQ-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
294 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
295 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
296 ; AVX512DQ-NEXT: vzeroupper
297 ; AVX512DQ-NEXT: retq
299 ; AVX512BW-LABEL: var_shift_v2i16:
301 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
302 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
303 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
304 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
305 ; AVX512BW-NEXT: vzeroupper
306 ; AVX512BW-NEXT: retq
308 ; AVX512DQVL-LABEL: var_shift_v2i16:
309 ; AVX512DQVL: # %bb.0:
310 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
311 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
312 ; AVX512DQVL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
313 ; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
314 ; AVX512DQVL-NEXT: vzeroupper
315 ; AVX512DQVL-NEXT: retq
317 ; AVX512BWVL-LABEL: var_shift_v2i16:
318 ; AVX512BWVL: # %bb.0:
319 ; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
320 ; AVX512BWVL-NEXT: retq
322 ; X86-SSE-LABEL: var_shift_v2i16:
324 ; X86-SSE-NEXT: movdqa %xmm1, %xmm2
325 ; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
326 ; X86-SSE-NEXT: pslld $23, %xmm2
327 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
328 ; X86-SSE-NEXT: paddd %xmm3, %xmm2
329 ; X86-SSE-NEXT: cvttps2dq %xmm2, %xmm2
330 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
331 ; X86-SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
332 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
333 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
334 ; X86-SSE-NEXT: pslld $23, %xmm1
335 ; X86-SSE-NEXT: paddd %xmm3, %xmm1
336 ; X86-SSE-NEXT: cvttps2dq %xmm1, %xmm1
337 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
338 ; X86-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
339 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
340 ; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
341 ; X86-SSE-NEXT: pmullw %xmm1, %xmm0
343 %shift = shl <2 x i16> %a, %b
347 define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
348 ; SSE2-LABEL: var_shift_v8i8:
350 ; SSE2-NEXT: psllw $5, %xmm1
351 ; SSE2-NEXT: pxor %xmm2, %xmm2
352 ; SSE2-NEXT: pxor %xmm3, %xmm3
353 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
354 ; SSE2-NEXT: movdqa %xmm3, %xmm4
355 ; SSE2-NEXT: pandn %xmm0, %xmm4
356 ; SSE2-NEXT: psllw $4, %xmm0
357 ; SSE2-NEXT: pand %xmm3, %xmm0
358 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
359 ; SSE2-NEXT: por %xmm4, %xmm0
360 ; SSE2-NEXT: paddb %xmm1, %xmm1
361 ; SSE2-NEXT: pxor %xmm3, %xmm3
362 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
363 ; SSE2-NEXT: movdqa %xmm3, %xmm4
364 ; SSE2-NEXT: pandn %xmm0, %xmm4
365 ; SSE2-NEXT: psllw $2, %xmm0
366 ; SSE2-NEXT: pand %xmm3, %xmm0
367 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
368 ; SSE2-NEXT: por %xmm4, %xmm0
369 ; SSE2-NEXT: paddb %xmm1, %xmm1
370 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
371 ; SSE2-NEXT: movdqa %xmm2, %xmm1
372 ; SSE2-NEXT: pandn %xmm0, %xmm1
373 ; SSE2-NEXT: paddb %xmm0, %xmm0
374 ; SSE2-NEXT: pand %xmm2, %xmm0
375 ; SSE2-NEXT: por %xmm1, %xmm0
378 ; SSE41-LABEL: var_shift_v8i8:
380 ; SSE41-NEXT: movdqa %xmm0, %xmm2
381 ; SSE41-NEXT: psllw $5, %xmm1
382 ; SSE41-NEXT: movdqa %xmm0, %xmm3
383 ; SSE41-NEXT: psllw $4, %xmm3
384 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
385 ; SSE41-NEXT: movdqa %xmm1, %xmm0
386 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
387 ; SSE41-NEXT: movdqa %xmm2, %xmm3
388 ; SSE41-NEXT: psllw $2, %xmm3
389 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
390 ; SSE41-NEXT: paddb %xmm1, %xmm1
391 ; SSE41-NEXT: movdqa %xmm1, %xmm0
392 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
393 ; SSE41-NEXT: movdqa %xmm2, %xmm3
394 ; SSE41-NEXT: paddb %xmm2, %xmm3
395 ; SSE41-NEXT: paddb %xmm1, %xmm1
396 ; SSE41-NEXT: movdqa %xmm1, %xmm0
397 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
398 ; SSE41-NEXT: movdqa %xmm2, %xmm0
401 ; AVX-LABEL: var_shift_v8i8:
403 ; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
404 ; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
405 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
406 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
407 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
408 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
409 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
410 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
411 ; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
412 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
413 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
416 ; XOP-LABEL: var_shift_v8i8:
418 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
421 ; AVX512DQ-LABEL: var_shift_v8i8:
423 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
424 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
425 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
426 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
427 ; AVX512DQ-NEXT: vzeroupper
428 ; AVX512DQ-NEXT: retq
430 ; AVX512BW-LABEL: var_shift_v8i8:
432 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
433 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
434 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
435 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
436 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
437 ; AVX512BW-NEXT: vzeroupper
438 ; AVX512BW-NEXT: retq
440 ; AVX512DQVL-LABEL: var_shift_v8i8:
441 ; AVX512DQVL: # %bb.0:
442 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
443 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
444 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
445 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
446 ; AVX512DQVL-NEXT: vzeroupper
447 ; AVX512DQVL-NEXT: retq
449 ; AVX512BWVL-LABEL: var_shift_v8i8:
450 ; AVX512BWVL: # %bb.0:
451 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
452 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
453 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
454 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
455 ; AVX512BWVL-NEXT: vzeroupper
456 ; AVX512BWVL-NEXT: retq
458 ; X86-SSE-LABEL: var_shift_v8i8:
460 ; X86-SSE-NEXT: psllw $5, %xmm1
461 ; X86-SSE-NEXT: pxor %xmm2, %xmm2
462 ; X86-SSE-NEXT: pxor %xmm3, %xmm3
463 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3
464 ; X86-SSE-NEXT: movdqa %xmm3, %xmm4
465 ; X86-SSE-NEXT: pandn %xmm0, %xmm4
466 ; X86-SSE-NEXT: psllw $4, %xmm0
467 ; X86-SSE-NEXT: pand %xmm3, %xmm0
468 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
469 ; X86-SSE-NEXT: por %xmm4, %xmm0
470 ; X86-SSE-NEXT: paddb %xmm1, %xmm1
471 ; X86-SSE-NEXT: pxor %xmm3, %xmm3
472 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3
473 ; X86-SSE-NEXT: movdqa %xmm3, %xmm4
474 ; X86-SSE-NEXT: pandn %xmm0, %xmm4
475 ; X86-SSE-NEXT: psllw $2, %xmm0
476 ; X86-SSE-NEXT: pand %xmm3, %xmm0
477 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
478 ; X86-SSE-NEXT: por %xmm4, %xmm0
479 ; X86-SSE-NEXT: paddb %xmm1, %xmm1
480 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
481 ; X86-SSE-NEXT: movdqa %xmm2, %xmm1
482 ; X86-SSE-NEXT: pandn %xmm0, %xmm1
483 ; X86-SSE-NEXT: paddb %xmm0, %xmm0
484 ; X86-SSE-NEXT: pand %xmm2, %xmm0
485 ; X86-SSE-NEXT: por %xmm1, %xmm0
487 %shift = shl <8 x i8> %a, %b
491 define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
492 ; SSE2-LABEL: var_shift_v4i8:
494 ; SSE2-NEXT: psllw $5, %xmm1
495 ; SSE2-NEXT: pxor %xmm2, %xmm2
496 ; SSE2-NEXT: pxor %xmm3, %xmm3
497 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
498 ; SSE2-NEXT: movdqa %xmm3, %xmm4
499 ; SSE2-NEXT: pandn %xmm0, %xmm4
500 ; SSE2-NEXT: psllw $4, %xmm0
501 ; SSE2-NEXT: pand %xmm3, %xmm0
502 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
503 ; SSE2-NEXT: por %xmm4, %xmm0
504 ; SSE2-NEXT: paddb %xmm1, %xmm1
505 ; SSE2-NEXT: pxor %xmm3, %xmm3
506 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
507 ; SSE2-NEXT: movdqa %xmm3, %xmm4
508 ; SSE2-NEXT: pandn %xmm0, %xmm4
509 ; SSE2-NEXT: psllw $2, %xmm0
510 ; SSE2-NEXT: pand %xmm3, %xmm0
511 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
512 ; SSE2-NEXT: por %xmm4, %xmm0
513 ; SSE2-NEXT: paddb %xmm1, %xmm1
514 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
515 ; SSE2-NEXT: movdqa %xmm2, %xmm1
516 ; SSE2-NEXT: pandn %xmm0, %xmm1
517 ; SSE2-NEXT: paddb %xmm0, %xmm0
518 ; SSE2-NEXT: pand %xmm2, %xmm0
519 ; SSE2-NEXT: por %xmm1, %xmm0
522 ; SSE41-LABEL: var_shift_v4i8:
524 ; SSE41-NEXT: movdqa %xmm0, %xmm2
525 ; SSE41-NEXT: psllw $5, %xmm1
526 ; SSE41-NEXT: movdqa %xmm0, %xmm3
527 ; SSE41-NEXT: psllw $4, %xmm3
528 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
529 ; SSE41-NEXT: movdqa %xmm1, %xmm0
530 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
531 ; SSE41-NEXT: movdqa %xmm2, %xmm3
532 ; SSE41-NEXT: psllw $2, %xmm3
533 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
534 ; SSE41-NEXT: paddb %xmm1, %xmm1
535 ; SSE41-NEXT: movdqa %xmm1, %xmm0
536 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
537 ; SSE41-NEXT: movdqa %xmm2, %xmm3
538 ; SSE41-NEXT: paddb %xmm2, %xmm3
539 ; SSE41-NEXT: paddb %xmm1, %xmm1
540 ; SSE41-NEXT: movdqa %xmm1, %xmm0
541 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
542 ; SSE41-NEXT: movdqa %xmm2, %xmm0
545 ; AVX-LABEL: var_shift_v4i8:
547 ; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
548 ; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
549 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
550 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
551 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
552 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
553 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
554 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
555 ; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
556 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
557 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
560 ; XOP-LABEL: var_shift_v4i8:
562 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
565 ; AVX512DQ-LABEL: var_shift_v4i8:
567 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
568 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
569 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
570 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
571 ; AVX512DQ-NEXT: vzeroupper
572 ; AVX512DQ-NEXT: retq
574 ; AVX512BW-LABEL: var_shift_v4i8:
576 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
577 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
578 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
579 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
580 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
581 ; AVX512BW-NEXT: vzeroupper
582 ; AVX512BW-NEXT: retq
584 ; AVX512DQVL-LABEL: var_shift_v4i8:
585 ; AVX512DQVL: # %bb.0:
586 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
587 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
588 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
589 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
590 ; AVX512DQVL-NEXT: vzeroupper
591 ; AVX512DQVL-NEXT: retq
593 ; AVX512BWVL-LABEL: var_shift_v4i8:
594 ; AVX512BWVL: # %bb.0:
595 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
596 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
597 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
598 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
599 ; AVX512BWVL-NEXT: vzeroupper
600 ; AVX512BWVL-NEXT: retq
602 ; X86-SSE-LABEL: var_shift_v4i8:
604 ; X86-SSE-NEXT: psllw $5, %xmm1
605 ; X86-SSE-NEXT: pxor %xmm2, %xmm2
606 ; X86-SSE-NEXT: pxor %xmm3, %xmm3
607 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3
608 ; X86-SSE-NEXT: movdqa %xmm3, %xmm4
609 ; X86-SSE-NEXT: pandn %xmm0, %xmm4
610 ; X86-SSE-NEXT: psllw $4, %xmm0
611 ; X86-SSE-NEXT: pand %xmm3, %xmm0
612 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
613 ; X86-SSE-NEXT: por %xmm4, %xmm0
614 ; X86-SSE-NEXT: paddb %xmm1, %xmm1
615 ; X86-SSE-NEXT: pxor %xmm3, %xmm3
616 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3
617 ; X86-SSE-NEXT: movdqa %xmm3, %xmm4
618 ; X86-SSE-NEXT: pandn %xmm0, %xmm4
619 ; X86-SSE-NEXT: psllw $2, %xmm0
620 ; X86-SSE-NEXT: pand %xmm3, %xmm0
621 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
622 ; X86-SSE-NEXT: por %xmm4, %xmm0
623 ; X86-SSE-NEXT: paddb %xmm1, %xmm1
624 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
625 ; X86-SSE-NEXT: movdqa %xmm2, %xmm1
626 ; X86-SSE-NEXT: pandn %xmm0, %xmm1
627 ; X86-SSE-NEXT: paddb %xmm0, %xmm0
628 ; X86-SSE-NEXT: pand %xmm2, %xmm0
629 ; X86-SSE-NEXT: por %xmm1, %xmm0
631 %shift = shl <4 x i8> %a, %b
635 define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
636 ; SSE2-LABEL: var_shift_v2i8:
638 ; SSE2-NEXT: psllw $5, %xmm1
639 ; SSE2-NEXT: pxor %xmm2, %xmm2
640 ; SSE2-NEXT: pxor %xmm3, %xmm3
641 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
642 ; SSE2-NEXT: movdqa %xmm3, %xmm4
643 ; SSE2-NEXT: pandn %xmm0, %xmm4
644 ; SSE2-NEXT: psllw $4, %xmm0
645 ; SSE2-NEXT: pand %xmm3, %xmm0
646 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
647 ; SSE2-NEXT: por %xmm4, %xmm0
648 ; SSE2-NEXT: paddb %xmm1, %xmm1
649 ; SSE2-NEXT: pxor %xmm3, %xmm3
650 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
651 ; SSE2-NEXT: movdqa %xmm3, %xmm4
652 ; SSE2-NEXT: pandn %xmm0, %xmm4
653 ; SSE2-NEXT: psllw $2, %xmm0
654 ; SSE2-NEXT: pand %xmm3, %xmm0
655 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
656 ; SSE2-NEXT: por %xmm4, %xmm0
657 ; SSE2-NEXT: paddb %xmm1, %xmm1
658 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
659 ; SSE2-NEXT: movdqa %xmm2, %xmm1
660 ; SSE2-NEXT: pandn %xmm0, %xmm1
661 ; SSE2-NEXT: paddb %xmm0, %xmm0
662 ; SSE2-NEXT: pand %xmm2, %xmm0
663 ; SSE2-NEXT: por %xmm1, %xmm0
666 ; SSE41-LABEL: var_shift_v2i8:
668 ; SSE41-NEXT: movdqa %xmm0, %xmm2
669 ; SSE41-NEXT: psllw $5, %xmm1
670 ; SSE41-NEXT: movdqa %xmm0, %xmm3
671 ; SSE41-NEXT: psllw $4, %xmm3
672 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
673 ; SSE41-NEXT: movdqa %xmm1, %xmm0
674 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
675 ; SSE41-NEXT: movdqa %xmm2, %xmm3
676 ; SSE41-NEXT: psllw $2, %xmm3
677 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
678 ; SSE41-NEXT: paddb %xmm1, %xmm1
679 ; SSE41-NEXT: movdqa %xmm1, %xmm0
680 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
681 ; SSE41-NEXT: movdqa %xmm2, %xmm3
682 ; SSE41-NEXT: paddb %xmm2, %xmm3
683 ; SSE41-NEXT: paddb %xmm1, %xmm1
684 ; SSE41-NEXT: movdqa %xmm1, %xmm0
685 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
686 ; SSE41-NEXT: movdqa %xmm2, %xmm0
689 ; AVX-LABEL: var_shift_v2i8:
691 ; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
692 ; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
693 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
694 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
695 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
696 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
697 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
698 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
699 ; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
700 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
701 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
704 ; XOP-LABEL: var_shift_v2i8:
706 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
709 ; AVX512DQ-LABEL: var_shift_v2i8:
711 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
712 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
713 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
714 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
715 ; AVX512DQ-NEXT: vzeroupper
716 ; AVX512DQ-NEXT: retq
718 ; AVX512BW-LABEL: var_shift_v2i8:
720 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
721 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
722 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
723 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
724 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
725 ; AVX512BW-NEXT: vzeroupper
726 ; AVX512BW-NEXT: retq
728 ; AVX512DQVL-LABEL: var_shift_v2i8:
729 ; AVX512DQVL: # %bb.0:
730 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
731 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
732 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
733 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
734 ; AVX512DQVL-NEXT: vzeroupper
735 ; AVX512DQVL-NEXT: retq
737 ; AVX512BWVL-LABEL: var_shift_v2i8:
738 ; AVX512BWVL: # %bb.0:
739 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
740 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
741 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
742 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
743 ; AVX512BWVL-NEXT: vzeroupper
744 ; AVX512BWVL-NEXT: retq
746 ; X86-SSE-LABEL: var_shift_v2i8:
748 ; X86-SSE-NEXT: psllw $5, %xmm1
749 ; X86-SSE-NEXT: pxor %xmm2, %xmm2
750 ; X86-SSE-NEXT: pxor %xmm3, %xmm3
751 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3
752 ; X86-SSE-NEXT: movdqa %xmm3, %xmm4
753 ; X86-SSE-NEXT: pandn %xmm0, %xmm4
754 ; X86-SSE-NEXT: psllw $4, %xmm0
755 ; X86-SSE-NEXT: pand %xmm3, %xmm0
756 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
757 ; X86-SSE-NEXT: por %xmm4, %xmm0
758 ; X86-SSE-NEXT: paddb %xmm1, %xmm1
759 ; X86-SSE-NEXT: pxor %xmm3, %xmm3
760 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3
761 ; X86-SSE-NEXT: movdqa %xmm3, %xmm4
762 ; X86-SSE-NEXT: pandn %xmm0, %xmm4
763 ; X86-SSE-NEXT: psllw $2, %xmm0
764 ; X86-SSE-NEXT: pand %xmm3, %xmm0
765 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
766 ; X86-SSE-NEXT: por %xmm4, %xmm0
767 ; X86-SSE-NEXT: paddb %xmm1, %xmm1
768 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
769 ; X86-SSE-NEXT: movdqa %xmm2, %xmm1
770 ; X86-SSE-NEXT: pandn %xmm0, %xmm1
771 ; X86-SSE-NEXT: paddb %xmm0, %xmm0
772 ; X86-SSE-NEXT: pand %xmm2, %xmm0
773 ; X86-SSE-NEXT: por %xmm1, %xmm0
775 %shift = shl <2 x i8> %a, %b
780 ; Uniform Variable Shifts
783 define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
784 ; SSE2-LABEL: splatvar_shift_v2i32:
786 ; SSE2-NEXT: xorps %xmm2, %xmm2
787 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
788 ; SSE2-NEXT: pslld %xmm2, %xmm0
791 ; SSE41-LABEL: splatvar_shift_v2i32:
793 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
794 ; SSE41-NEXT: pslld %xmm1, %xmm0
797 ; AVX-LABEL: splatvar_shift_v2i32:
799 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
800 ; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
803 ; XOP-LABEL: splatvar_shift_v2i32:
805 ; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
806 ; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
809 ; AVX512-LABEL: splatvar_shift_v2i32:
811 ; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
812 ; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
815 ; AVX512VL-LABEL: splatvar_shift_v2i32:
817 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
818 ; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
819 ; AVX512VL-NEXT: retq
821 ; X86-SSE-LABEL: splatvar_shift_v2i32:
823 ; X86-SSE-NEXT: xorps %xmm2, %xmm2
824 ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
825 ; X86-SSE-NEXT: pslld %xmm2, %xmm0
827 %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
828 %shift = shl <2 x i32> %a, %splat
832 define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
833 ; SSE2-LABEL: splatvar_shift_v4i16:
835 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
836 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
837 ; SSE2-NEXT: psllw %xmm1, %xmm0
840 ; SSE41-LABEL: splatvar_shift_v4i16:
842 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
843 ; SSE41-NEXT: psllw %xmm1, %xmm0
846 ; AVX-LABEL: splatvar_shift_v4i16:
848 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
849 ; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
852 ; XOP-LABEL: splatvar_shift_v4i16:
854 ; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
855 ; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
858 ; AVX512-LABEL: splatvar_shift_v4i16:
860 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
861 ; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
864 ; AVX512VL-LABEL: splatvar_shift_v4i16:
866 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
867 ; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
868 ; AVX512VL-NEXT: retq
870 ; X86-SSE-LABEL: splatvar_shift_v4i16:
872 ; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
873 ; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
874 ; X86-SSE-NEXT: psllw %xmm1, %xmm0
876 %splat = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> zeroinitializer
877 %shift = shl <4 x i16> %a, %splat
881 define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
882 ; SSE2-LABEL: splatvar_shift_v2i16:
884 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
885 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
886 ; SSE2-NEXT: psllw %xmm1, %xmm0
889 ; SSE41-LABEL: splatvar_shift_v2i16:
891 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
892 ; SSE41-NEXT: psllw %xmm1, %xmm0
895 ; AVX-LABEL: splatvar_shift_v2i16:
897 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
898 ; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
901 ; XOP-LABEL: splatvar_shift_v2i16:
903 ; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
904 ; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
907 ; AVX512-LABEL: splatvar_shift_v2i16:
909 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
910 ; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
913 ; AVX512VL-LABEL: splatvar_shift_v2i16:
915 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
916 ; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
917 ; AVX512VL-NEXT: retq
919 ; X86-SSE-LABEL: splatvar_shift_v2i16:
921 ; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
922 ; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
923 ; X86-SSE-NEXT: psllw %xmm1, %xmm0
925 %splat = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> zeroinitializer
926 %shift = shl <2 x i16> %a, %splat
930 define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
931 ; SSE2-LABEL: splatvar_shift_v8i8:
933 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
934 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
935 ; SSE2-NEXT: psllw %xmm1, %xmm0
936 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
937 ; SSE2-NEXT: psllw %xmm1, %xmm2
938 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
939 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
940 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
941 ; SSE2-NEXT: pand %xmm1, %xmm0
944 ; SSE41-LABEL: splatvar_shift_v8i8:
946 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
947 ; SSE41-NEXT: psllw %xmm1, %xmm0
948 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
949 ; SSE41-NEXT: psllw %xmm1, %xmm2
950 ; SSE41-NEXT: pxor %xmm1, %xmm1
951 ; SSE41-NEXT: pshufb %xmm1, %xmm2
952 ; SSE41-NEXT: pand %xmm2, %xmm0
955 ; AVX1-LABEL: splatvar_shift_v8i8:
957 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
958 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
959 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
960 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1
961 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
962 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
963 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
966 ; AVX2-LABEL: splatvar_shift_v8i8:
968 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
969 ; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0
970 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
971 ; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
972 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
973 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
976 ; XOPAVX1-LABEL: splatvar_shift_v8i8:
978 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
979 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
980 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
983 ; XOPAVX2-LABEL: splatvar_shift_v8i8:
985 ; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
986 ; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
989 ; AVX512DQ-LABEL: splatvar_shift_v8i8:
991 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
992 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
993 ; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
994 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
995 ; AVX512DQ-NEXT: vzeroupper
996 ; AVX512DQ-NEXT: retq
998 ; AVX512BW-LABEL: splatvar_shift_v8i8:
1000 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1001 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1002 ; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
1003 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1004 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1005 ; AVX512BW-NEXT: vzeroupper
1006 ; AVX512BW-NEXT: retq
1008 ; AVX512DQVL-LABEL: splatvar_shift_v8i8:
1009 ; AVX512DQVL: # %bb.0:
1010 ; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1011 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1012 ; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
1013 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1014 ; AVX512DQVL-NEXT: vzeroupper
1015 ; AVX512DQVL-NEXT: retq
1017 ; AVX512BWVL-LABEL: splatvar_shift_v8i8:
1018 ; AVX512BWVL: # %bb.0:
1019 ; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1020 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1021 ; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
1022 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1023 ; AVX512BWVL-NEXT: vzeroupper
1024 ; AVX512BWVL-NEXT: retq
1026 ; X86-SSE-LABEL: splatvar_shift_v8i8:
1028 ; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1029 ; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1030 ; X86-SSE-NEXT: psllw %xmm1, %xmm0
1031 ; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2
1032 ; X86-SSE-NEXT: psllw %xmm1, %xmm2
1033 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1034 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
1035 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1036 ; X86-SSE-NEXT: pand %xmm1, %xmm0
1037 ; X86-SSE-NEXT: retl
1038 %splat = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer
1039 %shift = shl <8 x i8> %a, %splat
1043 define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
1044 ; SSE2-LABEL: splatvar_shift_v4i8:
1046 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1047 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1048 ; SSE2-NEXT: psllw %xmm1, %xmm0
1049 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1050 ; SSE2-NEXT: psllw %xmm1, %xmm2
1051 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1052 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
1053 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1054 ; SSE2-NEXT: pand %xmm1, %xmm0
1057 ; SSE41-LABEL: splatvar_shift_v4i8:
1059 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1060 ; SSE41-NEXT: psllw %xmm1, %xmm0
1061 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
1062 ; SSE41-NEXT: psllw %xmm1, %xmm2
1063 ; SSE41-NEXT: pxor %xmm1, %xmm1
1064 ; SSE41-NEXT: pshufb %xmm1, %xmm2
1065 ; SSE41-NEXT: pand %xmm2, %xmm0
1068 ; AVX1-LABEL: splatvar_shift_v4i8:
1070 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1071 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
1072 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1073 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1
1074 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1075 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1076 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1079 ; AVX2-LABEL: splatvar_shift_v4i8:
1081 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1082 ; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0
1083 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1084 ; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
1085 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
1086 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1089 ; XOPAVX1-LABEL: splatvar_shift_v4i8:
1091 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1092 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1093 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
1094 ; XOPAVX1-NEXT: retq
1096 ; XOPAVX2-LABEL: splatvar_shift_v4i8:
1098 ; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
1099 ; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
1100 ; XOPAVX2-NEXT: retq
1102 ; AVX512DQ-LABEL: splatvar_shift_v4i8:
1103 ; AVX512DQ: # %bb.0:
1104 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1105 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1106 ; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
1107 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1108 ; AVX512DQ-NEXT: vzeroupper
1109 ; AVX512DQ-NEXT: retq
1111 ; AVX512BW-LABEL: splatvar_shift_v4i8:
1112 ; AVX512BW: # %bb.0:
1113 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1114 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1115 ; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
1116 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1117 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1118 ; AVX512BW-NEXT: vzeroupper
1119 ; AVX512BW-NEXT: retq
1121 ; AVX512DQVL-LABEL: splatvar_shift_v4i8:
1122 ; AVX512DQVL: # %bb.0:
1123 ; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1124 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1125 ; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
1126 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1127 ; AVX512DQVL-NEXT: vzeroupper
1128 ; AVX512DQVL-NEXT: retq
1130 ; AVX512BWVL-LABEL: splatvar_shift_v4i8:
1131 ; AVX512BWVL: # %bb.0:
1132 ; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1133 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1134 ; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
1135 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1136 ; AVX512BWVL-NEXT: vzeroupper
1137 ; AVX512BWVL-NEXT: retq
1139 ; X86-SSE-LABEL: splatvar_shift_v4i8:
1141 ; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1142 ; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1143 ; X86-SSE-NEXT: psllw %xmm1, %xmm0
1144 ; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2
1145 ; X86-SSE-NEXT: psllw %xmm1, %xmm2
1146 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1147 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
1148 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1149 ; X86-SSE-NEXT: pand %xmm1, %xmm0
1150 ; X86-SSE-NEXT: retl
1151 %splat = shufflevector <4 x i8> %b, <4 x i8> undef, <4 x i32> zeroinitializer
1152 %shift = shl <4 x i8> %a, %splat
1156 define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
1157 ; SSE2-LABEL: splatvar_shift_v2i8:
1159 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1160 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1161 ; SSE2-NEXT: psllw %xmm1, %xmm0
1162 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1163 ; SSE2-NEXT: psllw %xmm1, %xmm2
1164 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1165 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
1166 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1167 ; SSE2-NEXT: pand %xmm1, %xmm0
1170 ; SSE41-LABEL: splatvar_shift_v2i8:
1172 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1173 ; SSE41-NEXT: psllw %xmm1, %xmm0
1174 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
1175 ; SSE41-NEXT: psllw %xmm1, %xmm2
1176 ; SSE41-NEXT: pxor %xmm1, %xmm1
1177 ; SSE41-NEXT: pshufb %xmm1, %xmm2
1178 ; SSE41-NEXT: pand %xmm2, %xmm0
1181 ; AVX1-LABEL: splatvar_shift_v2i8:
1183 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1184 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
1185 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1186 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1
1187 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1188 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1189 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1192 ; AVX2-LABEL: splatvar_shift_v2i8:
1194 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1195 ; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0
1196 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1197 ; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
1198 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
1199 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1202 ; XOP-LABEL: splatvar_shift_v2i8:
1204 ; XOP-NEXT: insertq {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
1205 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
1208 ; AVX512DQ-LABEL: splatvar_shift_v2i8:
1209 ; AVX512DQ: # %bb.0:
1210 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1211 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1212 ; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
1213 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1214 ; AVX512DQ-NEXT: vzeroupper
1215 ; AVX512DQ-NEXT: retq
1217 ; AVX512BW-LABEL: splatvar_shift_v2i8:
1218 ; AVX512BW: # %bb.0:
1219 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1220 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1221 ; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
1222 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1223 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1224 ; AVX512BW-NEXT: vzeroupper
1225 ; AVX512BW-NEXT: retq
1227 ; AVX512DQVL-LABEL: splatvar_shift_v2i8:
1228 ; AVX512DQVL: # %bb.0:
1229 ; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1230 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1231 ; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
1232 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1233 ; AVX512DQVL-NEXT: vzeroupper
1234 ; AVX512DQVL-NEXT: retq
1236 ; AVX512BWVL-LABEL: splatvar_shift_v2i8:
1237 ; AVX512BWVL: # %bb.0:
1238 ; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1239 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1240 ; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
1241 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1242 ; AVX512BWVL-NEXT: vzeroupper
1243 ; AVX512BWVL-NEXT: retq
1245 ; X86-SSE-LABEL: splatvar_shift_v2i8:
1247 ; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1248 ; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1249 ; X86-SSE-NEXT: psllw %xmm1, %xmm0
1250 ; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2
1251 ; X86-SSE-NEXT: psllw %xmm1, %xmm2
1252 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1253 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
1254 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1255 ; X86-SSE-NEXT: pand %xmm1, %xmm0
1256 ; X86-SSE-NEXT: retl
1257 %splat = shufflevector <2 x i8> %b, <2 x i8> undef, <2 x i32> zeroinitializer
1258 %shift = shl <2 x i8> %a, %splat
1266 define <2 x i32> @constant_shift_v2i32(<2 x i32> %a) nounwind {
1267 ; SSE2-LABEL: constant_shift_v2i32:
1269 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1270 ; SSE2-NEXT: pslld $4, %xmm1
1271 ; SSE2-NEXT: pslld $5, %xmm0
1272 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1273 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1274 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1277 ; SSE41-LABEL: constant_shift_v2i32:
1279 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1280 ; SSE41-NEXT: pslld $5, %xmm1
1281 ; SSE41-NEXT: pslld $4, %xmm0
1282 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1285 ; AVX1-LABEL: constant_shift_v2i32:
1287 ; AVX1-NEXT: vpslld $5, %xmm0, %xmm1
1288 ; AVX1-NEXT: vpslld $4, %xmm0, %xmm0
1289 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1292 ; AVX2-LABEL: constant_shift_v2i32:
1294 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1297 ; XOPAVX1-LABEL: constant_shift_v2i32:
1299 ; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1300 ; XOPAVX1-NEXT: retq
1302 ; XOPAVX2-LABEL: constant_shift_v2i32:
1304 ; XOPAVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1305 ; XOPAVX2-NEXT: retq
1307 ; AVX512-LABEL: constant_shift_v2i32:
1309 ; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1312 ; AVX512VL-LABEL: constant_shift_v2i32:
1313 ; AVX512VL: # %bb.0:
1314 ; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1315 ; AVX512VL-NEXT: retq
1317 ; X86-SSE-LABEL: constant_shift_v2i32:
1319 ; X86-SSE-NEXT: movdqa %xmm0, %xmm1
1320 ; X86-SSE-NEXT: pslld $4, %xmm1
1321 ; X86-SSE-NEXT: pslld $5, %xmm0
1322 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1323 ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1324 ; X86-SSE-NEXT: movdqa %xmm1, %xmm0
1325 ; X86-SSE-NEXT: retl
1326 %shift = shl <2 x i32> %a, <i32 4, i32 5>
1327 ret <2 x i32> %shift
1330 define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
1331 ; SSE-LABEL: constant_shift_v4i16:
1333 ; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1336 ; AVX-LABEL: constant_shift_v4i16:
1338 ; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1341 ; XOP-LABEL: constant_shift_v4i16:
1343 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1346 ; AVX512DQ-LABEL: constant_shift_v4i16:
1347 ; AVX512DQ: # %bb.0:
1348 ; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1349 ; AVX512DQ-NEXT: retq
1351 ; AVX512BW-LABEL: constant_shift_v4i16:
1352 ; AVX512BW: # %bb.0:
1353 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1354 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = <0,1,2,3,u,u,u,u>
1355 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1356 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1357 ; AVX512BW-NEXT: vzeroupper
1358 ; AVX512BW-NEXT: retq
1360 ; AVX512DQVL-LABEL: constant_shift_v4i16:
1361 ; AVX512DQVL: # %bb.0:
1362 ; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1363 ; AVX512DQVL-NEXT: retq
1365 ; AVX512BWVL-LABEL: constant_shift_v4i16:
1366 ; AVX512BWVL: # %bb.0:
1367 ; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1368 ; AVX512BWVL-NEXT: retq
1370 ; X86-SSE-LABEL: constant_shift_v4i16:
1372 ; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1373 ; X86-SSE-NEXT: retl
1374 %shift = shl <4 x i16> %a, <i16 0, i16 1, i16 2, i16 3>
1375 ret <4 x i16> %shift
1378 define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind {
1379 ; SSE2-LABEL: constant_shift_v2i16:
1381 ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1384 ; SSE41-LABEL: constant_shift_v2i16:
1386 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1387 ; SSE41-NEXT: psllw $3, %xmm1
1388 ; SSE41-NEXT: psllw $2, %xmm0
1389 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1392 ; AVX-LABEL: constant_shift_v2i16:
1394 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm1
1395 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm0
1396 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1399 ; XOP-LABEL: constant_shift_v2i16:
1401 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1404 ; AVX512DQ-LABEL: constant_shift_v2i16:
1405 ; AVX512DQ: # %bb.0:
1406 ; AVX512DQ-NEXT: vpsllw $3, %xmm0, %xmm1
1407 ; AVX512DQ-NEXT: vpsllw $2, %xmm0, %xmm0
1408 ; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1409 ; AVX512DQ-NEXT: retq
1411 ; AVX512BW-LABEL: constant_shift_v2i16:
1412 ; AVX512BW: # %bb.0:
1413 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1414 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = <2,3,u,u,u,u,u,u>
1415 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1416 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1417 ; AVX512BW-NEXT: vzeroupper
1418 ; AVX512BW-NEXT: retq
1420 ; AVX512DQVL-LABEL: constant_shift_v2i16:
1421 ; AVX512DQVL: # %bb.0:
1422 ; AVX512DQVL-NEXT: vpsllw $3, %xmm0, %xmm1
1423 ; AVX512DQVL-NEXT: vpsllw $2, %xmm0, %xmm0
1424 ; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1425 ; AVX512DQVL-NEXT: retq
1427 ; AVX512BWVL-LABEL: constant_shift_v2i16:
1428 ; AVX512BWVL: # %bb.0:
1429 ; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1430 ; AVX512BWVL-NEXT: retq
1432 ; X86-SSE-LABEL: constant_shift_v2i16:
1434 ; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1435 ; X86-SSE-NEXT: retl
1436 %shift = shl <2 x i16> %a, <i16 2, i16 3>
1437 ret <2 x i16> %shift
1440 define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
1441 ; SSE2-LABEL: constant_shift_v8i8:
1443 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1444 ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1445 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1446 ; SSE2-NEXT: pxor %xmm1, %xmm1
1447 ; SSE2-NEXT: packuswb %xmm1, %xmm0
1450 ; SSE41-LABEL: constant_shift_v8i8:
1452 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1453 ; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1454 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1455 ; SSE41-NEXT: pxor %xmm1, %xmm1
1456 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1459 ; AVX1-LABEL: constant_shift_v8i8:
1461 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1462 ; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1463 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1464 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1465 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1468 ; AVX2-LABEL: constant_shift_v8i8:
1470 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1471 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1472 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1473 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1474 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1475 ; AVX2-NEXT: vzeroupper
1478 ; XOP-LABEL: constant_shift_v8i8:
1480 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1483 ; AVX512DQ-LABEL: constant_shift_v8i8:
1484 ; AVX512DQ: # %bb.0:
1485 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1486 ; AVX512DQ-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1487 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1488 ; AVX512DQ-NEXT: vzeroupper
1489 ; AVX512DQ-NEXT: retq
1491 ; AVX512BW-LABEL: constant_shift_v8i8:
1492 ; AVX512BW: # %bb.0:
1493 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,0,0,0,0,0,0,0]
1494 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1495 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1496 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1497 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1498 ; AVX512BW-NEXT: vzeroupper
1499 ; AVX512BW-NEXT: retq
1501 ; AVX512DQVL-LABEL: constant_shift_v8i8:
1502 ; AVX512DQVL: # %bb.0:
1503 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1504 ; AVX512DQVL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1505 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1506 ; AVX512DQVL-NEXT: vzeroupper
1507 ; AVX512DQVL-NEXT: retq
1509 ; AVX512BWVL-LABEL: constant_shift_v8i8:
1510 ; AVX512BWVL: # %bb.0:
1511 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1512 ; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1513 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1514 ; AVX512BWVL-NEXT: vzeroupper
1515 ; AVX512BWVL-NEXT: retq
1517 ; X86-SSE-LABEL: constant_shift_v8i8:
1519 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1520 ; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1521 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1522 ; X86-SSE-NEXT: pxor %xmm1, %xmm1
1523 ; X86-SSE-NEXT: packuswb %xmm1, %xmm0
1524 ; X86-SSE-NEXT: retl
1525 %shift = shl <8 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
1529 define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
1530 ; SSE2-LABEL: constant_shift_v4i8:
1532 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1533 ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1534 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1535 ; SSE2-NEXT: pxor %xmm1, %xmm1
1536 ; SSE2-NEXT: packuswb %xmm1, %xmm0
1539 ; SSE41-LABEL: constant_shift_v4i8:
1541 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1542 ; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1543 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1544 ; SSE41-NEXT: pxor %xmm1, %xmm1
1545 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1548 ; AVX1-LABEL: constant_shift_v4i8:
1550 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1551 ; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1552 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1553 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1554 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1557 ; AVX2-LABEL: constant_shift_v4i8:
1559 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1560 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1561 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1562 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1563 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1564 ; AVX2-NEXT: vzeroupper
1567 ; XOP-LABEL: constant_shift_v4i8:
1569 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1572 ; AVX512DQ-LABEL: constant_shift_v4i8:
1573 ; AVX512DQ: # %bb.0:
1574 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1575 ; AVX512DQ-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1576 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1577 ; AVX512DQ-NEXT: vzeroupper
1578 ; AVX512DQ-NEXT: retq
1580 ; AVX512BW-LABEL: constant_shift_v4i8:
1581 ; AVX512BW: # %bb.0:
1582 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,0,0,0,0,0,0,0,0,0,0,0,0]
1583 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1584 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1585 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1586 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1587 ; AVX512BW-NEXT: vzeroupper
1588 ; AVX512BW-NEXT: retq
1590 ; AVX512DQVL-LABEL: constant_shift_v4i8:
1591 ; AVX512DQVL: # %bb.0:
1592 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1593 ; AVX512DQVL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1594 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1595 ; AVX512DQVL-NEXT: vzeroupper
1596 ; AVX512DQVL-NEXT: retq
1598 ; AVX512BWVL-LABEL: constant_shift_v4i8:
1599 ; AVX512BWVL: # %bb.0:
1600 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1601 ; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1602 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1603 ; AVX512BWVL-NEXT: vzeroupper
1604 ; AVX512BWVL-NEXT: retq
1606 ; X86-SSE-LABEL: constant_shift_v4i8:
1608 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1609 ; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1610 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1611 ; X86-SSE-NEXT: pxor %xmm1, %xmm1
1612 ; X86-SSE-NEXT: packuswb %xmm1, %xmm0
1613 ; X86-SSE-NEXT: retl
1614 %shift = shl <4 x i8> %a, <i8 0, i8 1, i8 2, i8 3>
1618 define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
1619 ; SSE2-LABEL: constant_shift_v2i8:
1621 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1622 ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1623 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1624 ; SSE2-NEXT: pxor %xmm1, %xmm1
1625 ; SSE2-NEXT: packuswb %xmm1, %xmm0
1628 ; SSE41-LABEL: constant_shift_v2i8:
1630 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1631 ; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1632 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1633 ; SSE41-NEXT: pxor %xmm1, %xmm1
1634 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1637 ; AVX1-LABEL: constant_shift_v2i8:
1639 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1640 ; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1641 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1642 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1643 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1646 ; AVX2-LABEL: constant_shift_v2i8:
1648 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1649 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1650 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1651 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1652 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1653 ; AVX2-NEXT: vzeroupper
1656 ; XOP-LABEL: constant_shift_v2i8:
1658 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1661 ; AVX512DQ-LABEL: constant_shift_v2i8:
1662 ; AVX512DQ: # %bb.0:
1663 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1664 ; AVX512DQ-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1665 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1666 ; AVX512DQ-NEXT: vzeroupper
1667 ; AVX512DQ-NEXT: retq
1669 ; AVX512BW-LABEL: constant_shift_v2i8:
1670 ; AVX512BW: # %bb.0:
1671 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1672 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1673 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1674 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1675 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1676 ; AVX512BW-NEXT: vzeroupper
1677 ; AVX512BW-NEXT: retq
1679 ; AVX512DQVL-LABEL: constant_shift_v2i8:
1680 ; AVX512DQVL: # %bb.0:
1681 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1682 ; AVX512DQVL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1683 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1684 ; AVX512DQVL-NEXT: vzeroupper
1685 ; AVX512DQVL-NEXT: retq
1687 ; AVX512BWVL-LABEL: constant_shift_v2i8:
1688 ; AVX512BWVL: # %bb.0:
1689 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1690 ; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1691 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1692 ; AVX512BWVL-NEXT: vzeroupper
1693 ; AVX512BWVL-NEXT: retq
1695 ; X86-SSE-LABEL: constant_shift_v2i8:
1697 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1698 ; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1699 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1700 ; X86-SSE-NEXT: pxor %xmm1, %xmm1
1701 ; X86-SSE-NEXT: packuswb %xmm1, %xmm0
1702 ; X86-SSE-NEXT: retl
1703 %shift = shl <2 x i8> %a, <i8 2, i8 3>
1708 ; Uniform Constant Shifts
1711 define <2 x i32> @splatconstant_shift_v2i32(<2 x i32> %a) nounwind {
1712 ; SSE-LABEL: splatconstant_shift_v2i32:
1714 ; SSE-NEXT: pslld $5, %xmm0
1717 ; AVX-LABEL: splatconstant_shift_v2i32:
1719 ; AVX-NEXT: vpslld $5, %xmm0, %xmm0
1722 ; XOP-LABEL: splatconstant_shift_v2i32:
1724 ; XOP-NEXT: vpslld $5, %xmm0, %xmm0
1727 ; AVX512-LABEL: splatconstant_shift_v2i32:
1729 ; AVX512-NEXT: vpslld $5, %xmm0, %xmm0
1732 ; AVX512VL-LABEL: splatconstant_shift_v2i32:
1733 ; AVX512VL: # %bb.0:
1734 ; AVX512VL-NEXT: vpslld $5, %xmm0, %xmm0
1735 ; AVX512VL-NEXT: retq
1737 ; X86-SSE-LABEL: splatconstant_shift_v2i32:
1739 ; X86-SSE-NEXT: pslld $5, %xmm0
1740 ; X86-SSE-NEXT: retl
1741 %shift = shl <2 x i32> %a, <i32 5, i32 5>
1742 ret <2 x i32> %shift
1745 define <4 x i16> @splatconstant_shift_v4i16(<4 x i16> %a) nounwind {
1746 ; SSE-LABEL: splatconstant_shift_v4i16:
1748 ; SSE-NEXT: psllw $3, %xmm0
1751 ; AVX-LABEL: splatconstant_shift_v4i16:
1753 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1756 ; XOP-LABEL: splatconstant_shift_v4i16:
1758 ; XOP-NEXT: vpsllw $3, %xmm0, %xmm0
1761 ; AVX512-LABEL: splatconstant_shift_v4i16:
1763 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1766 ; AVX512VL-LABEL: splatconstant_shift_v4i16:
1767 ; AVX512VL: # %bb.0:
1768 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1769 ; AVX512VL-NEXT: retq
1771 ; X86-SSE-LABEL: splatconstant_shift_v4i16:
1773 ; X86-SSE-NEXT: psllw $3, %xmm0
1774 ; X86-SSE-NEXT: retl
1775 %shift = shl <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
1776 ret <4 x i16> %shift
1779 define <2 x i16> @splatconstant_shift_v2i16(<2 x i16> %a) nounwind {
1780 ; SSE-LABEL: splatconstant_shift_v2i16:
1782 ; SSE-NEXT: psllw $3, %xmm0
1785 ; AVX-LABEL: splatconstant_shift_v2i16:
1787 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1790 ; XOP-LABEL: splatconstant_shift_v2i16:
1792 ; XOP-NEXT: vpsllw $3, %xmm0, %xmm0
1795 ; AVX512-LABEL: splatconstant_shift_v2i16:
1797 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1800 ; AVX512VL-LABEL: splatconstant_shift_v2i16:
1801 ; AVX512VL: # %bb.0:
1802 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1803 ; AVX512VL-NEXT: retq
1805 ; X86-SSE-LABEL: splatconstant_shift_v2i16:
1807 ; X86-SSE-NEXT: psllw $3, %xmm0
1808 ; X86-SSE-NEXT: retl
1809 %shift = shl <2 x i16> %a, <i16 3, i16 3>
1810 ret <2 x i16> %shift
1813 define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
1814 ; SSE-LABEL: splatconstant_shift_v8i8:
1816 ; SSE-NEXT: psllw $3, %xmm0
1817 ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1820 ; AVX-LABEL: splatconstant_shift_v8i8:
1822 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1823 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1826 ; XOP-LABEL: splatconstant_shift_v8i8:
1828 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1831 ; AVX512-LABEL: splatconstant_shift_v8i8:
1833 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1834 ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1837 ; AVX512VL-LABEL: splatconstant_shift_v8i8:
1838 ; AVX512VL: # %bb.0:
1839 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1840 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1841 ; AVX512VL-NEXT: retq
1843 ; X86-SSE-LABEL: splatconstant_shift_v8i8:
1845 ; X86-SSE-NEXT: psllw $3, %xmm0
1846 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1847 ; X86-SSE-NEXT: retl
1848 %shift = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
1852 define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
1853 ; SSE-LABEL: splatconstant_shift_v4i8:
1855 ; SSE-NEXT: psllw $3, %xmm0
1856 ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1859 ; AVX-LABEL: splatconstant_shift_v4i8:
1861 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1862 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1865 ; XOP-LABEL: splatconstant_shift_v4i8:
1867 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1870 ; AVX512-LABEL: splatconstant_shift_v4i8:
1872 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1873 ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1876 ; AVX512VL-LABEL: splatconstant_shift_v4i8:
1877 ; AVX512VL: # %bb.0:
1878 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1879 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1880 ; AVX512VL-NEXT: retq
1882 ; X86-SSE-LABEL: splatconstant_shift_v4i8:
1884 ; X86-SSE-NEXT: psllw $3, %xmm0
1885 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1886 ; X86-SSE-NEXT: retl
1887 %shift = shl <4 x i8> %a, <i8 3, i8 3, i8 3, i8 3>
1891 define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
1892 ; SSE-LABEL: splatconstant_shift_v2i8:
1894 ; SSE-NEXT: psllw $3, %xmm0
1895 ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1898 ; AVX-LABEL: splatconstant_shift_v2i8:
1900 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1901 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1904 ; XOP-LABEL: splatconstant_shift_v2i8:
1906 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1909 ; AVX512-LABEL: splatconstant_shift_v2i8:
1911 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1912 ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1915 ; AVX512VL-LABEL: splatconstant_shift_v2i8:
1916 ; AVX512VL: # %bb.0:
1917 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1918 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1919 ; AVX512VL-NEXT: retq
1921 ; X86-SSE-LABEL: splatconstant_shift_v2i8:
1923 ; X86-SSE-NEXT: psllw $3, %xmm0
1924 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1925 ; X86-SSE-NEXT: retl
1926 %shift = shl <2 x i8> %a, <i8 3, i8 3>