1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512DQVL
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
13 ; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
14 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
20 define <2 x i32> @var_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
21 ; SSE2-LABEL: var_shift_v2i32:
23 ; SSE2-NEXT: pslld $23, %xmm1
24 ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
26 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
27 ; SSE2-NEXT: pmuludq %xmm1, %xmm0
28 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
29 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
30 ; SSE2-NEXT: pmuludq %xmm2, %xmm1
31 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
32 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
35 ; SSE41-LABEL: var_shift_v2i32:
37 ; SSE41-NEXT: pslld $23, %xmm1
38 ; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
39 ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
40 ; SSE41-NEXT: pmulld %xmm1, %xmm0
43 ; AVX1-LABEL: var_shift_v2i32:
45 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
46 ; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
47 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
48 ; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0
51 ; AVX2-LABEL: var_shift_v2i32:
53 ; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
56 ; XOPAVX1-LABEL: var_shift_v2i32:
58 ; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0
61 ; XOPAVX2-LABEL: var_shift_v2i32:
63 ; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
66 ; AVX512-LABEL: var_shift_v2i32:
68 ; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
71 ; AVX512VL-LABEL: var_shift_v2i32:
73 ; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
76 ; X86-SSE-LABEL: var_shift_v2i32:
78 ; X86-SSE-NEXT: pslld $23, %xmm1
79 ; X86-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
80 ; X86-SSE-NEXT: cvttps2dq %xmm1, %xmm1
81 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
82 ; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
83 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
84 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
85 ; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
86 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
87 ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
89 %shift = shl <2 x i32> %a, %b
93 define <4 x i16> @var_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
94 ; SSE2-LABEL: var_shift_v4i16:
96 ; SSE2-NEXT: movdqa %xmm1, %xmm2
97 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
98 ; SSE2-NEXT: pslld $23, %xmm2
99 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
100 ; SSE2-NEXT: paddd %xmm3, %xmm2
101 ; SSE2-NEXT: cvttps2dq %xmm2, %xmm2
102 ; SSE2-NEXT: pslld $16, %xmm2
103 ; SSE2-NEXT: psrad $16, %xmm2
104 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
105 ; SSE2-NEXT: pslld $23, %xmm1
106 ; SSE2-NEXT: paddd %xmm3, %xmm1
107 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
108 ; SSE2-NEXT: pslld $16, %xmm1
109 ; SSE2-NEXT: psrad $16, %xmm1
110 ; SSE2-NEXT: packssdw %xmm2, %xmm1
111 ; SSE2-NEXT: pmullw %xmm1, %xmm0
114 ; SSE41-LABEL: var_shift_v4i16:
116 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
117 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
118 ; SSE41-NEXT: pslld $23, %xmm1
119 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
120 ; SSE41-NEXT: paddd %xmm3, %xmm1
121 ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
122 ; SSE41-NEXT: pslld $23, %xmm2
123 ; SSE41-NEXT: paddd %xmm3, %xmm2
124 ; SSE41-NEXT: cvttps2dq %xmm2, %xmm2
125 ; SSE41-NEXT: packusdw %xmm1, %xmm2
126 ; SSE41-NEXT: pmullw %xmm2, %xmm0
129 ; AVX1-LABEL: var_shift_v4i16:
131 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4,4,5,5,6,6,7,7]
132 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
133 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
134 ; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
135 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
136 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
137 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
138 ; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
139 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
140 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
141 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
144 ; AVX2-LABEL: var_shift_v4i16:
146 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
147 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
148 ; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
149 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
150 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
151 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
152 ; AVX2-NEXT: vzeroupper
155 ; XOP-LABEL: var_shift_v4i16:
157 ; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0
160 ; AVX512DQ-LABEL: var_shift_v4i16:
162 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
163 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
164 ; AVX512DQ-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
165 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
166 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
167 ; AVX512DQ-NEXT: vzeroupper
168 ; AVX512DQ-NEXT: retq
170 ; AVX512BW-LABEL: var_shift_v4i16:
172 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
173 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
174 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
175 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
176 ; AVX512BW-NEXT: vzeroupper
177 ; AVX512BW-NEXT: retq
179 ; AVX512DQVL-LABEL: var_shift_v4i16:
180 ; AVX512DQVL: # %bb.0:
181 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
182 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
183 ; AVX512DQVL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
184 ; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
185 ; AVX512DQVL-NEXT: vzeroupper
186 ; AVX512DQVL-NEXT: retq
188 ; AVX512BWVL-LABEL: var_shift_v4i16:
189 ; AVX512BWVL: # %bb.0:
190 ; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
191 ; AVX512BWVL-NEXT: retq
193 ; X86-SSE-LABEL: var_shift_v4i16:
195 ; X86-SSE-NEXT: movdqa %xmm1, %xmm2
196 ; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
197 ; X86-SSE-NEXT: pslld $23, %xmm2
198 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
199 ; X86-SSE-NEXT: paddd %xmm3, %xmm2
200 ; X86-SSE-NEXT: cvttps2dq %xmm2, %xmm2
201 ; X86-SSE-NEXT: pslld $16, %xmm2
202 ; X86-SSE-NEXT: psrad $16, %xmm2
203 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
204 ; X86-SSE-NEXT: pslld $23, %xmm1
205 ; X86-SSE-NEXT: paddd %xmm3, %xmm1
206 ; X86-SSE-NEXT: cvttps2dq %xmm1, %xmm1
207 ; X86-SSE-NEXT: pslld $16, %xmm1
208 ; X86-SSE-NEXT: psrad $16, %xmm1
209 ; X86-SSE-NEXT: packssdw %xmm2, %xmm1
210 ; X86-SSE-NEXT: pmullw %xmm1, %xmm0
212 %shift = shl <4 x i16> %a, %b
216 define <2 x i16> @var_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
217 ; SSE2-LABEL: var_shift_v2i16:
219 ; SSE2-NEXT: movdqa %xmm1, %xmm2
220 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
221 ; SSE2-NEXT: pslld $23, %xmm2
222 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
223 ; SSE2-NEXT: paddd %xmm3, %xmm2
224 ; SSE2-NEXT: cvttps2dq %xmm2, %xmm2
225 ; SSE2-NEXT: pslld $16, %xmm2
226 ; SSE2-NEXT: psrad $16, %xmm2
227 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
228 ; SSE2-NEXT: pslld $23, %xmm1
229 ; SSE2-NEXT: paddd %xmm3, %xmm1
230 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
231 ; SSE2-NEXT: pslld $16, %xmm1
232 ; SSE2-NEXT: psrad $16, %xmm1
233 ; SSE2-NEXT: packssdw %xmm2, %xmm1
234 ; SSE2-NEXT: pmullw %xmm1, %xmm0
237 ; SSE41-LABEL: var_shift_v2i16:
239 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
240 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
241 ; SSE41-NEXT: pslld $23, %xmm1
242 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
243 ; SSE41-NEXT: paddd %xmm3, %xmm1
244 ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
245 ; SSE41-NEXT: pslld $23, %xmm2
246 ; SSE41-NEXT: paddd %xmm3, %xmm2
247 ; SSE41-NEXT: cvttps2dq %xmm2, %xmm2
248 ; SSE41-NEXT: packusdw %xmm1, %xmm2
249 ; SSE41-NEXT: pmullw %xmm2, %xmm0
252 ; AVX1-LABEL: var_shift_v2i16:
254 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4,4,5,5,6,6,7,7]
255 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
256 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
257 ; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
258 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
259 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
260 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
261 ; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
262 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
263 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
264 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
267 ; AVX2-LABEL: var_shift_v2i16:
269 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
270 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
271 ; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
272 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
273 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
274 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
275 ; AVX2-NEXT: vzeroupper
278 ; XOP-LABEL: var_shift_v2i16:
280 ; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0
283 ; AVX512DQ-LABEL: var_shift_v2i16:
285 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
286 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
287 ; AVX512DQ-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
288 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
289 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
290 ; AVX512DQ-NEXT: vzeroupper
291 ; AVX512DQ-NEXT: retq
293 ; AVX512BW-LABEL: var_shift_v2i16:
295 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
296 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
297 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
298 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
299 ; AVX512BW-NEXT: vzeroupper
300 ; AVX512BW-NEXT: retq
302 ; AVX512DQVL-LABEL: var_shift_v2i16:
303 ; AVX512DQVL: # %bb.0:
304 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
305 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
306 ; AVX512DQVL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
307 ; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
308 ; AVX512DQVL-NEXT: vzeroupper
309 ; AVX512DQVL-NEXT: retq
311 ; AVX512BWVL-LABEL: var_shift_v2i16:
312 ; AVX512BWVL: # %bb.0:
313 ; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
314 ; AVX512BWVL-NEXT: retq
316 ; X86-SSE-LABEL: var_shift_v2i16:
318 ; X86-SSE-NEXT: movdqa %xmm1, %xmm2
319 ; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
320 ; X86-SSE-NEXT: pslld $23, %xmm2
321 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
322 ; X86-SSE-NEXT: paddd %xmm3, %xmm2
323 ; X86-SSE-NEXT: cvttps2dq %xmm2, %xmm2
324 ; X86-SSE-NEXT: pslld $16, %xmm2
325 ; X86-SSE-NEXT: psrad $16, %xmm2
326 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
327 ; X86-SSE-NEXT: pslld $23, %xmm1
328 ; X86-SSE-NEXT: paddd %xmm3, %xmm1
329 ; X86-SSE-NEXT: cvttps2dq %xmm1, %xmm1
330 ; X86-SSE-NEXT: pslld $16, %xmm1
331 ; X86-SSE-NEXT: psrad $16, %xmm1
332 ; X86-SSE-NEXT: packssdw %xmm2, %xmm1
333 ; X86-SSE-NEXT: pmullw %xmm1, %xmm0
335 %shift = shl <2 x i16> %a, %b
339 define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
340 ; SSE2-LABEL: var_shift_v8i8:
342 ; SSE2-NEXT: psllw $5, %xmm1
343 ; SSE2-NEXT: pxor %xmm2, %xmm2
344 ; SSE2-NEXT: pxor %xmm3, %xmm3
345 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
346 ; SSE2-NEXT: movdqa %xmm3, %xmm4
347 ; SSE2-NEXT: pandn %xmm0, %xmm4
348 ; SSE2-NEXT: psllw $4, %xmm0
349 ; SSE2-NEXT: pand %xmm3, %xmm0
350 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
351 ; SSE2-NEXT: por %xmm4, %xmm0
352 ; SSE2-NEXT: paddb %xmm1, %xmm1
353 ; SSE2-NEXT: pxor %xmm3, %xmm3
354 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
355 ; SSE2-NEXT: movdqa %xmm3, %xmm4
356 ; SSE2-NEXT: pandn %xmm0, %xmm4
357 ; SSE2-NEXT: psllw $2, %xmm0
358 ; SSE2-NEXT: pand %xmm3, %xmm0
359 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
360 ; SSE2-NEXT: por %xmm4, %xmm0
361 ; SSE2-NEXT: paddb %xmm1, %xmm1
362 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
363 ; SSE2-NEXT: movdqa %xmm2, %xmm1
364 ; SSE2-NEXT: pandn %xmm0, %xmm1
365 ; SSE2-NEXT: paddb %xmm0, %xmm0
366 ; SSE2-NEXT: pand %xmm2, %xmm0
367 ; SSE2-NEXT: por %xmm1, %xmm0
370 ; SSE41-LABEL: var_shift_v8i8:
372 ; SSE41-NEXT: movdqa %xmm0, %xmm2
373 ; SSE41-NEXT: psllw $5, %xmm1
374 ; SSE41-NEXT: movdqa %xmm0, %xmm3
375 ; SSE41-NEXT: psllw $4, %xmm3
376 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
377 ; SSE41-NEXT: movdqa %xmm1, %xmm0
378 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
379 ; SSE41-NEXT: movdqa %xmm2, %xmm3
380 ; SSE41-NEXT: psllw $2, %xmm3
381 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
382 ; SSE41-NEXT: paddb %xmm1, %xmm1
383 ; SSE41-NEXT: movdqa %xmm1, %xmm0
384 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
385 ; SSE41-NEXT: movdqa %xmm2, %xmm3
386 ; SSE41-NEXT: paddb %xmm2, %xmm3
387 ; SSE41-NEXT: paddb %xmm1, %xmm1
388 ; SSE41-NEXT: movdqa %xmm1, %xmm0
389 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
390 ; SSE41-NEXT: movdqa %xmm2, %xmm0
393 ; AVX-LABEL: var_shift_v8i8:
395 ; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
396 ; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
397 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
398 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
399 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
400 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
401 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
402 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
403 ; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
404 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
405 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
408 ; XOP-LABEL: var_shift_v8i8:
410 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
413 ; AVX512DQ-LABEL: var_shift_v8i8:
415 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
416 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
417 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
418 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
419 ; AVX512DQ-NEXT: vzeroupper
420 ; AVX512DQ-NEXT: retq
422 ; AVX512BW-LABEL: var_shift_v8i8:
424 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
425 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
426 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
427 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
428 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
429 ; AVX512BW-NEXT: vzeroupper
430 ; AVX512BW-NEXT: retq
432 ; AVX512DQVL-LABEL: var_shift_v8i8:
433 ; AVX512DQVL: # %bb.0:
434 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
435 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
436 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
437 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
438 ; AVX512DQVL-NEXT: vzeroupper
439 ; AVX512DQVL-NEXT: retq
441 ; AVX512BWVL-LABEL: var_shift_v8i8:
442 ; AVX512BWVL: # %bb.0:
443 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
444 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
445 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
446 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
447 ; AVX512BWVL-NEXT: vzeroupper
448 ; AVX512BWVL-NEXT: retq
450 ; X86-SSE-LABEL: var_shift_v8i8:
452 ; X86-SSE-NEXT: psllw $5, %xmm1
453 ; X86-SSE-NEXT: pxor %xmm2, %xmm2
454 ; X86-SSE-NEXT: pxor %xmm3, %xmm3
455 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3
456 ; X86-SSE-NEXT: movdqa %xmm3, %xmm4
457 ; X86-SSE-NEXT: pandn %xmm0, %xmm4
458 ; X86-SSE-NEXT: psllw $4, %xmm0
459 ; X86-SSE-NEXT: pand %xmm3, %xmm0
460 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
461 ; X86-SSE-NEXT: por %xmm4, %xmm0
462 ; X86-SSE-NEXT: paddb %xmm1, %xmm1
463 ; X86-SSE-NEXT: pxor %xmm3, %xmm3
464 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3
465 ; X86-SSE-NEXT: movdqa %xmm3, %xmm4
466 ; X86-SSE-NEXT: pandn %xmm0, %xmm4
467 ; X86-SSE-NEXT: psllw $2, %xmm0
468 ; X86-SSE-NEXT: pand %xmm3, %xmm0
469 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
470 ; X86-SSE-NEXT: por %xmm4, %xmm0
471 ; X86-SSE-NEXT: paddb %xmm1, %xmm1
472 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
473 ; X86-SSE-NEXT: movdqa %xmm2, %xmm1
474 ; X86-SSE-NEXT: pandn %xmm0, %xmm1
475 ; X86-SSE-NEXT: paddb %xmm0, %xmm0
476 ; X86-SSE-NEXT: pand %xmm2, %xmm0
477 ; X86-SSE-NEXT: por %xmm1, %xmm0
479 %shift = shl <8 x i8> %a, %b
483 define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
484 ; SSE2-LABEL: var_shift_v4i8:
486 ; SSE2-NEXT: psllw $5, %xmm1
487 ; SSE2-NEXT: pxor %xmm2, %xmm2
488 ; SSE2-NEXT: pxor %xmm3, %xmm3
489 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
490 ; SSE2-NEXT: movdqa %xmm3, %xmm4
491 ; SSE2-NEXT: pandn %xmm0, %xmm4
492 ; SSE2-NEXT: psllw $4, %xmm0
493 ; SSE2-NEXT: pand %xmm3, %xmm0
494 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
495 ; SSE2-NEXT: por %xmm4, %xmm0
496 ; SSE2-NEXT: paddb %xmm1, %xmm1
497 ; SSE2-NEXT: pxor %xmm3, %xmm3
498 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
499 ; SSE2-NEXT: movdqa %xmm3, %xmm4
500 ; SSE2-NEXT: pandn %xmm0, %xmm4
501 ; SSE2-NEXT: psllw $2, %xmm0
502 ; SSE2-NEXT: pand %xmm3, %xmm0
503 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
504 ; SSE2-NEXT: por %xmm4, %xmm0
505 ; SSE2-NEXT: paddb %xmm1, %xmm1
506 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
507 ; SSE2-NEXT: movdqa %xmm2, %xmm1
508 ; SSE2-NEXT: pandn %xmm0, %xmm1
509 ; SSE2-NEXT: paddb %xmm0, %xmm0
510 ; SSE2-NEXT: pand %xmm2, %xmm0
511 ; SSE2-NEXT: por %xmm1, %xmm0
514 ; SSE41-LABEL: var_shift_v4i8:
516 ; SSE41-NEXT: movdqa %xmm0, %xmm2
517 ; SSE41-NEXT: psllw $5, %xmm1
518 ; SSE41-NEXT: movdqa %xmm0, %xmm3
519 ; SSE41-NEXT: psllw $4, %xmm3
520 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
521 ; SSE41-NEXT: movdqa %xmm1, %xmm0
522 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
523 ; SSE41-NEXT: movdqa %xmm2, %xmm3
524 ; SSE41-NEXT: psllw $2, %xmm3
525 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
526 ; SSE41-NEXT: paddb %xmm1, %xmm1
527 ; SSE41-NEXT: movdqa %xmm1, %xmm0
528 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
529 ; SSE41-NEXT: movdqa %xmm2, %xmm3
530 ; SSE41-NEXT: paddb %xmm2, %xmm3
531 ; SSE41-NEXT: paddb %xmm1, %xmm1
532 ; SSE41-NEXT: movdqa %xmm1, %xmm0
533 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
534 ; SSE41-NEXT: movdqa %xmm2, %xmm0
537 ; AVX-LABEL: var_shift_v4i8:
539 ; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
540 ; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
541 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
542 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
543 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
544 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
545 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
546 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
547 ; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
548 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
549 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
552 ; XOP-LABEL: var_shift_v4i8:
554 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
557 ; AVX512DQ-LABEL: var_shift_v4i8:
559 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
560 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
561 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
562 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
563 ; AVX512DQ-NEXT: vzeroupper
564 ; AVX512DQ-NEXT: retq
566 ; AVX512BW-LABEL: var_shift_v4i8:
568 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
569 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
570 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
571 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
572 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
573 ; AVX512BW-NEXT: vzeroupper
574 ; AVX512BW-NEXT: retq
576 ; AVX512DQVL-LABEL: var_shift_v4i8:
577 ; AVX512DQVL: # %bb.0:
578 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
579 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
580 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
581 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
582 ; AVX512DQVL-NEXT: vzeroupper
583 ; AVX512DQVL-NEXT: retq
585 ; AVX512BWVL-LABEL: var_shift_v4i8:
586 ; AVX512BWVL: # %bb.0:
587 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
588 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
589 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
590 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
591 ; AVX512BWVL-NEXT: vzeroupper
592 ; AVX512BWVL-NEXT: retq
594 ; X86-SSE-LABEL: var_shift_v4i8:
596 ; X86-SSE-NEXT: psllw $5, %xmm1
597 ; X86-SSE-NEXT: pxor %xmm2, %xmm2
598 ; X86-SSE-NEXT: pxor %xmm3, %xmm3
599 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3
600 ; X86-SSE-NEXT: movdqa %xmm3, %xmm4
601 ; X86-SSE-NEXT: pandn %xmm0, %xmm4
602 ; X86-SSE-NEXT: psllw $4, %xmm0
603 ; X86-SSE-NEXT: pand %xmm3, %xmm0
604 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
605 ; X86-SSE-NEXT: por %xmm4, %xmm0
606 ; X86-SSE-NEXT: paddb %xmm1, %xmm1
607 ; X86-SSE-NEXT: pxor %xmm3, %xmm3
608 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3
609 ; X86-SSE-NEXT: movdqa %xmm3, %xmm4
610 ; X86-SSE-NEXT: pandn %xmm0, %xmm4
611 ; X86-SSE-NEXT: psllw $2, %xmm0
612 ; X86-SSE-NEXT: pand %xmm3, %xmm0
613 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
614 ; X86-SSE-NEXT: por %xmm4, %xmm0
615 ; X86-SSE-NEXT: paddb %xmm1, %xmm1
616 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
617 ; X86-SSE-NEXT: movdqa %xmm2, %xmm1
618 ; X86-SSE-NEXT: pandn %xmm0, %xmm1
619 ; X86-SSE-NEXT: paddb %xmm0, %xmm0
620 ; X86-SSE-NEXT: pand %xmm2, %xmm0
621 ; X86-SSE-NEXT: por %xmm1, %xmm0
623 %shift = shl <4 x i8> %a, %b
627 define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
628 ; SSE2-LABEL: var_shift_v2i8:
630 ; SSE2-NEXT: psllw $5, %xmm1
631 ; SSE2-NEXT: pxor %xmm2, %xmm2
632 ; SSE2-NEXT: pxor %xmm3, %xmm3
633 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
634 ; SSE2-NEXT: movdqa %xmm3, %xmm4
635 ; SSE2-NEXT: pandn %xmm0, %xmm4
636 ; SSE2-NEXT: psllw $4, %xmm0
637 ; SSE2-NEXT: pand %xmm3, %xmm0
638 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
639 ; SSE2-NEXT: por %xmm4, %xmm0
640 ; SSE2-NEXT: paddb %xmm1, %xmm1
641 ; SSE2-NEXT: pxor %xmm3, %xmm3
642 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
643 ; SSE2-NEXT: movdqa %xmm3, %xmm4
644 ; SSE2-NEXT: pandn %xmm0, %xmm4
645 ; SSE2-NEXT: psllw $2, %xmm0
646 ; SSE2-NEXT: pand %xmm3, %xmm0
647 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
648 ; SSE2-NEXT: por %xmm4, %xmm0
649 ; SSE2-NEXT: paddb %xmm1, %xmm1
650 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
651 ; SSE2-NEXT: movdqa %xmm2, %xmm1
652 ; SSE2-NEXT: pandn %xmm0, %xmm1
653 ; SSE2-NEXT: paddb %xmm0, %xmm0
654 ; SSE2-NEXT: pand %xmm2, %xmm0
655 ; SSE2-NEXT: por %xmm1, %xmm0
658 ; SSE41-LABEL: var_shift_v2i8:
660 ; SSE41-NEXT: movdqa %xmm0, %xmm2
661 ; SSE41-NEXT: psllw $5, %xmm1
662 ; SSE41-NEXT: movdqa %xmm0, %xmm3
663 ; SSE41-NEXT: psllw $4, %xmm3
664 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
665 ; SSE41-NEXT: movdqa %xmm1, %xmm0
666 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
667 ; SSE41-NEXT: movdqa %xmm2, %xmm3
668 ; SSE41-NEXT: psllw $2, %xmm3
669 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
670 ; SSE41-NEXT: paddb %xmm1, %xmm1
671 ; SSE41-NEXT: movdqa %xmm1, %xmm0
672 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
673 ; SSE41-NEXT: movdqa %xmm2, %xmm3
674 ; SSE41-NEXT: paddb %xmm2, %xmm3
675 ; SSE41-NEXT: paddb %xmm1, %xmm1
676 ; SSE41-NEXT: movdqa %xmm1, %xmm0
677 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
678 ; SSE41-NEXT: movdqa %xmm2, %xmm0
681 ; AVX-LABEL: var_shift_v2i8:
683 ; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
684 ; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
685 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
686 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
687 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
688 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
689 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
690 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
691 ; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
692 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
693 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
696 ; XOP-LABEL: var_shift_v2i8:
698 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
701 ; AVX512DQ-LABEL: var_shift_v2i8:
703 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
704 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
705 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
706 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
707 ; AVX512DQ-NEXT: vzeroupper
708 ; AVX512DQ-NEXT: retq
710 ; AVX512BW-LABEL: var_shift_v2i8:
712 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
713 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
714 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
715 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
716 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
717 ; AVX512BW-NEXT: vzeroupper
718 ; AVX512BW-NEXT: retq
720 ; AVX512DQVL-LABEL: var_shift_v2i8:
721 ; AVX512DQVL: # %bb.0:
722 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
723 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
724 ; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
725 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
726 ; AVX512DQVL-NEXT: vzeroupper
727 ; AVX512DQVL-NEXT: retq
729 ; AVX512BWVL-LABEL: var_shift_v2i8:
730 ; AVX512BWVL: # %bb.0:
731 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
732 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
733 ; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
734 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
735 ; AVX512BWVL-NEXT: vzeroupper
736 ; AVX512BWVL-NEXT: retq
738 ; X86-SSE-LABEL: var_shift_v2i8:
740 ; X86-SSE-NEXT: psllw $5, %xmm1
741 ; X86-SSE-NEXT: pxor %xmm2, %xmm2
742 ; X86-SSE-NEXT: pxor %xmm3, %xmm3
743 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3
744 ; X86-SSE-NEXT: movdqa %xmm3, %xmm4
745 ; X86-SSE-NEXT: pandn %xmm0, %xmm4
746 ; X86-SSE-NEXT: psllw $4, %xmm0
747 ; X86-SSE-NEXT: pand %xmm3, %xmm0
748 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
749 ; X86-SSE-NEXT: por %xmm4, %xmm0
750 ; X86-SSE-NEXT: paddb %xmm1, %xmm1
751 ; X86-SSE-NEXT: pxor %xmm3, %xmm3
752 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3
753 ; X86-SSE-NEXT: movdqa %xmm3, %xmm4
754 ; X86-SSE-NEXT: pandn %xmm0, %xmm4
755 ; X86-SSE-NEXT: psllw $2, %xmm0
756 ; X86-SSE-NEXT: pand %xmm3, %xmm0
757 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
758 ; X86-SSE-NEXT: por %xmm4, %xmm0
759 ; X86-SSE-NEXT: paddb %xmm1, %xmm1
760 ; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
761 ; X86-SSE-NEXT: movdqa %xmm2, %xmm1
762 ; X86-SSE-NEXT: pandn %xmm0, %xmm1
763 ; X86-SSE-NEXT: paddb %xmm0, %xmm0
764 ; X86-SSE-NEXT: pand %xmm2, %xmm0
765 ; X86-SSE-NEXT: por %xmm1, %xmm0
767 %shift = shl <2 x i8> %a, %b
772 ; Uniform Variable Shifts
775 define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
776 ; SSE2-LABEL: splatvar_shift_v2i32:
778 ; SSE2-NEXT: xorps %xmm2, %xmm2
779 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
780 ; SSE2-NEXT: pslld %xmm2, %xmm0
783 ; SSE41-LABEL: splatvar_shift_v2i32:
785 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
786 ; SSE41-NEXT: pslld %xmm1, %xmm0
789 ; AVX-LABEL: splatvar_shift_v2i32:
791 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
792 ; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
795 ; XOP-LABEL: splatvar_shift_v2i32:
797 ; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
798 ; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
801 ; AVX512-LABEL: splatvar_shift_v2i32:
803 ; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
804 ; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
807 ; AVX512VL-LABEL: splatvar_shift_v2i32:
809 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
810 ; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
811 ; AVX512VL-NEXT: retq
813 ; X86-SSE-LABEL: splatvar_shift_v2i32:
815 ; X86-SSE-NEXT: xorps %xmm2, %xmm2
816 ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
817 ; X86-SSE-NEXT: pslld %xmm2, %xmm0
819 %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
820 %shift = shl <2 x i32> %a, %splat
824 define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
825 ; SSE2-LABEL: splatvar_shift_v4i16:
827 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
828 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
829 ; SSE2-NEXT: psllw %xmm1, %xmm0
832 ; SSE41-LABEL: splatvar_shift_v4i16:
834 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
835 ; SSE41-NEXT: psllw %xmm1, %xmm0
838 ; AVX-LABEL: splatvar_shift_v4i16:
840 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
841 ; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
844 ; XOP-LABEL: splatvar_shift_v4i16:
846 ; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
847 ; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
850 ; AVX512-LABEL: splatvar_shift_v4i16:
852 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
853 ; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
856 ; AVX512VL-LABEL: splatvar_shift_v4i16:
858 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
859 ; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
860 ; AVX512VL-NEXT: retq
862 ; X86-SSE-LABEL: splatvar_shift_v4i16:
864 ; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
865 ; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
866 ; X86-SSE-NEXT: psllw %xmm1, %xmm0
868 %splat = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> zeroinitializer
869 %shift = shl <4 x i16> %a, %splat
873 define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
874 ; SSE2-LABEL: splatvar_shift_v2i16:
876 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
877 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
878 ; SSE2-NEXT: psllw %xmm1, %xmm0
881 ; SSE41-LABEL: splatvar_shift_v2i16:
883 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
884 ; SSE41-NEXT: psllw %xmm1, %xmm0
887 ; AVX-LABEL: splatvar_shift_v2i16:
889 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
890 ; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
893 ; XOP-LABEL: splatvar_shift_v2i16:
895 ; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
896 ; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
899 ; AVX512-LABEL: splatvar_shift_v2i16:
901 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
902 ; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
905 ; AVX512VL-LABEL: splatvar_shift_v2i16:
907 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
908 ; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
909 ; AVX512VL-NEXT: retq
911 ; X86-SSE-LABEL: splatvar_shift_v2i16:
913 ; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
914 ; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
915 ; X86-SSE-NEXT: psllw %xmm1, %xmm0
917 %splat = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> zeroinitializer
918 %shift = shl <2 x i16> %a, %splat
922 define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
923 ; SSE2-LABEL: splatvar_shift_v8i8:
925 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
926 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
927 ; SSE2-NEXT: psllw %xmm1, %xmm0
928 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
929 ; SSE2-NEXT: psllw %xmm1, %xmm2
930 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
931 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
932 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
933 ; SSE2-NEXT: pand %xmm1, %xmm0
936 ; SSE41-LABEL: splatvar_shift_v8i8:
938 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
939 ; SSE41-NEXT: psllw %xmm1, %xmm0
940 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
941 ; SSE41-NEXT: psllw %xmm1, %xmm2
942 ; SSE41-NEXT: pxor %xmm1, %xmm1
943 ; SSE41-NEXT: pshufb %xmm1, %xmm2
944 ; SSE41-NEXT: pand %xmm2, %xmm0
947 ; AVX1-LABEL: splatvar_shift_v8i8:
949 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
950 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
951 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
952 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1
953 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
954 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
955 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
958 ; AVX2-LABEL: splatvar_shift_v8i8:
960 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
961 ; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0
962 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
963 ; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
964 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
965 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
968 ; XOPAVX1-LABEL: splatvar_shift_v8i8:
970 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
971 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
972 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
975 ; XOPAVX2-LABEL: splatvar_shift_v8i8:
977 ; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
978 ; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
981 ; AVX512DQ-LABEL: splatvar_shift_v8i8:
983 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
984 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
985 ; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
986 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
987 ; AVX512DQ-NEXT: vzeroupper
988 ; AVX512DQ-NEXT: retq
990 ; AVX512BW-LABEL: splatvar_shift_v8i8:
992 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
993 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
994 ; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
995 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
996 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
997 ; AVX512BW-NEXT: vzeroupper
998 ; AVX512BW-NEXT: retq
1000 ; AVX512DQVL-LABEL: splatvar_shift_v8i8:
1001 ; AVX512DQVL: # %bb.0:
1002 ; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1003 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1004 ; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
1005 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1006 ; AVX512DQVL-NEXT: vzeroupper
1007 ; AVX512DQVL-NEXT: retq
1009 ; AVX512BWVL-LABEL: splatvar_shift_v8i8:
1010 ; AVX512BWVL: # %bb.0:
1011 ; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1012 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1013 ; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
1014 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1015 ; AVX512BWVL-NEXT: vzeroupper
1016 ; AVX512BWVL-NEXT: retq
1018 ; X86-SSE-LABEL: splatvar_shift_v8i8:
1020 ; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1021 ; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1022 ; X86-SSE-NEXT: psllw %xmm1, %xmm0
1023 ; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2
1024 ; X86-SSE-NEXT: psllw %xmm1, %xmm2
1025 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1026 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
1027 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1028 ; X86-SSE-NEXT: pand %xmm1, %xmm0
1029 ; X86-SSE-NEXT: retl
1030 %splat = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer
1031 %shift = shl <8 x i8> %a, %splat
1035 define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
1036 ; SSE2-LABEL: splatvar_shift_v4i8:
1038 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1039 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1040 ; SSE2-NEXT: psllw %xmm1, %xmm0
1041 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1042 ; SSE2-NEXT: psllw %xmm1, %xmm2
1043 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1044 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
1045 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1046 ; SSE2-NEXT: pand %xmm1, %xmm0
1049 ; SSE41-LABEL: splatvar_shift_v4i8:
1051 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1052 ; SSE41-NEXT: psllw %xmm1, %xmm0
1053 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
1054 ; SSE41-NEXT: psllw %xmm1, %xmm2
1055 ; SSE41-NEXT: pxor %xmm1, %xmm1
1056 ; SSE41-NEXT: pshufb %xmm1, %xmm2
1057 ; SSE41-NEXT: pand %xmm2, %xmm0
1060 ; AVX1-LABEL: splatvar_shift_v4i8:
1062 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1063 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
1064 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1065 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1
1066 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1067 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1068 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1071 ; AVX2-LABEL: splatvar_shift_v4i8:
1073 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1074 ; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0
1075 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1076 ; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
1077 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
1078 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1081 ; XOPAVX1-LABEL: splatvar_shift_v4i8:
1083 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1084 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1085 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
1086 ; XOPAVX1-NEXT: retq
1088 ; XOPAVX2-LABEL: splatvar_shift_v4i8:
1090 ; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
1091 ; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
1092 ; XOPAVX2-NEXT: retq
1094 ; AVX512DQ-LABEL: splatvar_shift_v4i8:
1095 ; AVX512DQ: # %bb.0:
1096 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1097 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1098 ; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
1099 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1100 ; AVX512DQ-NEXT: vzeroupper
1101 ; AVX512DQ-NEXT: retq
1103 ; AVX512BW-LABEL: splatvar_shift_v4i8:
1104 ; AVX512BW: # %bb.0:
1105 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1106 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1107 ; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
1108 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1109 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1110 ; AVX512BW-NEXT: vzeroupper
1111 ; AVX512BW-NEXT: retq
1113 ; AVX512DQVL-LABEL: splatvar_shift_v4i8:
1114 ; AVX512DQVL: # %bb.0:
1115 ; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1116 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1117 ; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
1118 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1119 ; AVX512DQVL-NEXT: vzeroupper
1120 ; AVX512DQVL-NEXT: retq
1122 ; AVX512BWVL-LABEL: splatvar_shift_v4i8:
1123 ; AVX512BWVL: # %bb.0:
1124 ; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1125 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1126 ; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
1127 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1128 ; AVX512BWVL-NEXT: vzeroupper
1129 ; AVX512BWVL-NEXT: retq
1131 ; X86-SSE-LABEL: splatvar_shift_v4i8:
1133 ; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1134 ; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1135 ; X86-SSE-NEXT: psllw %xmm1, %xmm0
1136 ; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2
1137 ; X86-SSE-NEXT: psllw %xmm1, %xmm2
1138 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1139 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
1140 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1141 ; X86-SSE-NEXT: pand %xmm1, %xmm0
1142 ; X86-SSE-NEXT: retl
1143 %splat = shufflevector <4 x i8> %b, <4 x i8> undef, <4 x i32> zeroinitializer
1144 %shift = shl <4 x i8> %a, %splat
1148 define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
1149 ; SSE2-LABEL: splatvar_shift_v2i8:
1151 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1152 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1153 ; SSE2-NEXT: psllw %xmm1, %xmm0
1154 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1155 ; SSE2-NEXT: psllw %xmm1, %xmm2
1156 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1157 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
1158 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1159 ; SSE2-NEXT: pand %xmm1, %xmm0
1162 ; SSE41-LABEL: splatvar_shift_v2i8:
1164 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1165 ; SSE41-NEXT: psllw %xmm1, %xmm0
1166 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
1167 ; SSE41-NEXT: psllw %xmm1, %xmm2
1168 ; SSE41-NEXT: pxor %xmm1, %xmm1
1169 ; SSE41-NEXT: pshufb %xmm1, %xmm2
1170 ; SSE41-NEXT: pand %xmm2, %xmm0
1173 ; AVX1-LABEL: splatvar_shift_v2i8:
1175 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1176 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
1177 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1178 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1
1179 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1180 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1181 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1184 ; AVX2-LABEL: splatvar_shift_v2i8:
1186 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1187 ; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0
1188 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1189 ; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
1190 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
1191 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1194 ; XOP-LABEL: splatvar_shift_v2i8:
1196 ; XOP-NEXT: insertq {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
1197 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
1200 ; AVX512DQ-LABEL: splatvar_shift_v2i8:
1201 ; AVX512DQ: # %bb.0:
1202 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1203 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1204 ; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
1205 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1206 ; AVX512DQ-NEXT: vzeroupper
1207 ; AVX512DQ-NEXT: retq
1209 ; AVX512BW-LABEL: splatvar_shift_v2i8:
1210 ; AVX512BW: # %bb.0:
1211 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1212 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1213 ; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
1214 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1215 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1216 ; AVX512BW-NEXT: vzeroupper
1217 ; AVX512BW-NEXT: retq
1219 ; AVX512DQVL-LABEL: splatvar_shift_v2i8:
1220 ; AVX512DQVL: # %bb.0:
1221 ; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1222 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1223 ; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
1224 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1225 ; AVX512DQVL-NEXT: vzeroupper
1226 ; AVX512DQVL-NEXT: retq
1228 ; AVX512BWVL-LABEL: splatvar_shift_v2i8:
1229 ; AVX512BWVL: # %bb.0:
1230 ; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1231 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1232 ; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
1233 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1234 ; AVX512BWVL-NEXT: vzeroupper
1235 ; AVX512BWVL-NEXT: retq
1237 ; X86-SSE-LABEL: splatvar_shift_v2i8:
1239 ; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1240 ; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1241 ; X86-SSE-NEXT: psllw %xmm1, %xmm0
1242 ; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2
1243 ; X86-SSE-NEXT: psllw %xmm1, %xmm2
1244 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1245 ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
1246 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1247 ; X86-SSE-NEXT: pand %xmm1, %xmm0
1248 ; X86-SSE-NEXT: retl
1249 %splat = shufflevector <2 x i8> %b, <2 x i8> undef, <2 x i32> zeroinitializer
1250 %shift = shl <2 x i8> %a, %splat
1258 define <2 x i32> @constant_shift_v2i32(<2 x i32> %a) nounwind {
1259 ; SSE2-LABEL: constant_shift_v2i32:
1261 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1262 ; SSE2-NEXT: pslld $4, %xmm0
1263 ; SSE2-NEXT: pslld $5, %xmm1
1264 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1267 ; SSE41-LABEL: constant_shift_v2i32:
1269 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1270 ; SSE41-NEXT: pslld $5, %xmm1
1271 ; SSE41-NEXT: pslld $4, %xmm0
1272 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1275 ; AVX1-LABEL: constant_shift_v2i32:
1277 ; AVX1-NEXT: vpslld $5, %xmm0, %xmm1
1278 ; AVX1-NEXT: vpslld $4, %xmm0, %xmm0
1279 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1282 ; AVX2-LABEL: constant_shift_v2i32:
1284 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1287 ; XOPAVX1-LABEL: constant_shift_v2i32:
1289 ; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1290 ; XOPAVX1-NEXT: retq
1292 ; XOPAVX2-LABEL: constant_shift_v2i32:
1294 ; XOPAVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1295 ; XOPAVX2-NEXT: retq
1297 ; AVX512-LABEL: constant_shift_v2i32:
1299 ; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1302 ; AVX512VL-LABEL: constant_shift_v2i32:
1303 ; AVX512VL: # %bb.0:
1304 ; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1305 ; AVX512VL-NEXT: retq
1307 ; X86-SSE-LABEL: constant_shift_v2i32:
1309 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1310 ; X86-SSE-NEXT: pslld $4, %xmm0
1311 ; X86-SSE-NEXT: pslld $5, %xmm1
1312 ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1313 ; X86-SSE-NEXT: retl
1314 %shift = shl <2 x i32> %a, <i32 4, i32 5>
1315 ret <2 x i32> %shift
1318 define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
1319 ; SSE-LABEL: constant_shift_v4i16:
1321 ; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1324 ; AVX-LABEL: constant_shift_v4i16:
1326 ; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1329 ; XOP-LABEL: constant_shift_v4i16:
1331 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1334 ; AVX512DQ-LABEL: constant_shift_v4i16:
1335 ; AVX512DQ: # %bb.0:
1336 ; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1337 ; AVX512DQ-NEXT: retq
1339 ; AVX512BW-LABEL: constant_shift_v4i16:
1340 ; AVX512BW: # %bb.0:
1341 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1342 ; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3]
1343 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1344 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1345 ; AVX512BW-NEXT: vzeroupper
1346 ; AVX512BW-NEXT: retq
1348 ; AVX512DQVL-LABEL: constant_shift_v4i16:
1349 ; AVX512DQVL: # %bb.0:
1350 ; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1351 ; AVX512DQVL-NEXT: retq
1353 ; AVX512BWVL-LABEL: constant_shift_v4i16:
1354 ; AVX512BWVL: # %bb.0:
1355 ; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1356 ; AVX512BWVL-NEXT: retq
1358 ; X86-SSE-LABEL: constant_shift_v4i16:
1360 ; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1361 ; X86-SSE-NEXT: retl
1362 %shift = shl <4 x i16> %a, <i16 0, i16 1, i16 2, i16 3>
1363 ret <4 x i16> %shift
1366 define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind {
1367 ; SSE2-LABEL: constant_shift_v2i16:
1369 ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1372 ; SSE41-LABEL: constant_shift_v2i16:
1374 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1375 ; SSE41-NEXT: psllw $3, %xmm1
1376 ; SSE41-NEXT: psllw $2, %xmm0
1377 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1380 ; AVX-LABEL: constant_shift_v2i16:
1382 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm1
1383 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm0
1384 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1387 ; XOP-LABEL: constant_shift_v2i16:
1389 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1392 ; AVX512DQ-LABEL: constant_shift_v2i16:
1393 ; AVX512DQ: # %bb.0:
1394 ; AVX512DQ-NEXT: vpsllw $3, %xmm0, %xmm1
1395 ; AVX512DQ-NEXT: vpsllw $2, %xmm0, %xmm0
1396 ; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1397 ; AVX512DQ-NEXT: retq
1399 ; AVX512BW-LABEL: constant_shift_v2i16:
1400 ; AVX512BW: # %bb.0:
1401 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1402 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,3,2,3,2,3,2,3]
1403 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1404 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1405 ; AVX512BW-NEXT: vzeroupper
1406 ; AVX512BW-NEXT: retq
1408 ; AVX512DQVL-LABEL: constant_shift_v2i16:
1409 ; AVX512DQVL: # %bb.0:
1410 ; AVX512DQVL-NEXT: vpsllw $3, %xmm0, %xmm1
1411 ; AVX512DQVL-NEXT: vpsllw $2, %xmm0, %xmm0
1412 ; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1413 ; AVX512DQVL-NEXT: retq
1415 ; AVX512BWVL-LABEL: constant_shift_v2i16:
1416 ; AVX512BWVL: # %bb.0:
1417 ; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1418 ; AVX512BWVL-NEXT: retq
1420 ; X86-SSE-LABEL: constant_shift_v2i16:
1422 ; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1423 ; X86-SSE-NEXT: retl
1424 %shift = shl <2 x i16> %a, <i16 2, i16 3>
1425 ret <2 x i16> %shift
1428 define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
1429 ; SSE2-LABEL: constant_shift_v8i8:
1431 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1432 ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1433 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1434 ; SSE2-NEXT: pxor %xmm1, %xmm1
1435 ; SSE2-NEXT: packuswb %xmm1, %xmm0
1438 ; SSE41-LABEL: constant_shift_v8i8:
1440 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1441 ; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1442 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1443 ; SSE41-NEXT: pxor %xmm1, %xmm1
1444 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1447 ; AVX1-LABEL: constant_shift_v8i8:
1449 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1450 ; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1451 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1452 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1453 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1456 ; AVX2-LABEL: constant_shift_v8i8:
1458 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1459 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1460 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1461 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1462 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1463 ; AVX2-NEXT: vzeroupper
1466 ; XOP-LABEL: constant_shift_v8i8:
1468 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1471 ; AVX512DQ-LABEL: constant_shift_v8i8:
1472 ; AVX512DQ: # %bb.0:
1473 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1474 ; AVX512DQ-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1475 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1476 ; AVX512DQ-NEXT: vzeroupper
1477 ; AVX512DQ-NEXT: retq
1479 ; AVX512BW-LABEL: constant_shift_v8i8:
1480 ; AVX512BW: # %bb.0:
1481 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,0,0,0,0,0,0,0]
1482 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1483 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1484 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1485 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1486 ; AVX512BW-NEXT: vzeroupper
1487 ; AVX512BW-NEXT: retq
1489 ; AVX512DQVL-LABEL: constant_shift_v8i8:
1490 ; AVX512DQVL: # %bb.0:
1491 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1492 ; AVX512DQVL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1493 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1494 ; AVX512DQVL-NEXT: vzeroupper
1495 ; AVX512DQVL-NEXT: retq
1497 ; AVX512BWVL-LABEL: constant_shift_v8i8:
1498 ; AVX512BWVL: # %bb.0:
1499 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1500 ; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1501 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1502 ; AVX512BWVL-NEXT: vzeroupper
1503 ; AVX512BWVL-NEXT: retq
1505 ; X86-SSE-LABEL: constant_shift_v8i8:
1507 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1508 ; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1509 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1510 ; X86-SSE-NEXT: pxor %xmm1, %xmm1
1511 ; X86-SSE-NEXT: packuswb %xmm1, %xmm0
1512 ; X86-SSE-NEXT: retl
1513 %shift = shl <8 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
1517 define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
1518 ; SSE2-LABEL: constant_shift_v4i8:
1520 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1521 ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1522 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1523 ; SSE2-NEXT: pxor %xmm1, %xmm1
1524 ; SSE2-NEXT: packuswb %xmm1, %xmm0
1527 ; SSE41-LABEL: constant_shift_v4i8:
1529 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1530 ; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1531 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1532 ; SSE41-NEXT: pxor %xmm1, %xmm1
1533 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1536 ; AVX1-LABEL: constant_shift_v4i8:
1538 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1539 ; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1540 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1541 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1542 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1545 ; AVX2-LABEL: constant_shift_v4i8:
1547 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1548 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1549 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1550 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1551 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1552 ; AVX2-NEXT: vzeroupper
1555 ; XOP-LABEL: constant_shift_v4i8:
1557 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1560 ; AVX512DQ-LABEL: constant_shift_v4i8:
1561 ; AVX512DQ: # %bb.0:
1562 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1563 ; AVX512DQ-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1564 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1565 ; AVX512DQ-NEXT: vzeroupper
1566 ; AVX512DQ-NEXT: retq
1568 ; AVX512BW-LABEL: constant_shift_v4i8:
1569 ; AVX512BW: # %bb.0:
1570 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,0,0,0,0,0,0,0,0,0,0,0,0]
1571 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1572 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1573 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1574 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1575 ; AVX512BW-NEXT: vzeroupper
1576 ; AVX512BW-NEXT: retq
1578 ; AVX512DQVL-LABEL: constant_shift_v4i8:
1579 ; AVX512DQVL: # %bb.0:
1580 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1581 ; AVX512DQVL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1582 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1583 ; AVX512DQVL-NEXT: vzeroupper
1584 ; AVX512DQVL-NEXT: retq
1586 ; AVX512BWVL-LABEL: constant_shift_v4i8:
1587 ; AVX512BWVL: # %bb.0:
1588 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1589 ; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1590 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1591 ; AVX512BWVL-NEXT: vzeroupper
1592 ; AVX512BWVL-NEXT: retq
1594 ; X86-SSE-LABEL: constant_shift_v4i8:
1596 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1597 ; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1598 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1599 ; X86-SSE-NEXT: pxor %xmm1, %xmm1
1600 ; X86-SSE-NEXT: packuswb %xmm1, %xmm0
1601 ; X86-SSE-NEXT: retl
1602 %shift = shl <4 x i8> %a, <i8 0, i8 1, i8 2, i8 3>
1606 define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
1607 ; SSE2-LABEL: constant_shift_v2i8:
1609 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1610 ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1611 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1612 ; SSE2-NEXT: pxor %xmm1, %xmm1
1613 ; SSE2-NEXT: packuswb %xmm1, %xmm0
1616 ; SSE41-LABEL: constant_shift_v2i8:
1618 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1619 ; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1620 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1621 ; SSE41-NEXT: pxor %xmm1, %xmm1
1622 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1625 ; AVX1-LABEL: constant_shift_v2i8:
1627 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1628 ; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1629 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1630 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1631 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1634 ; AVX2-LABEL: constant_shift_v2i8:
1636 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1637 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1638 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1639 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1640 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1641 ; AVX2-NEXT: vzeroupper
1644 ; XOP-LABEL: constant_shift_v2i8:
1646 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1649 ; AVX512DQ-LABEL: constant_shift_v2i8:
1650 ; AVX512DQ: # %bb.0:
1651 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1652 ; AVX512DQ-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1653 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1654 ; AVX512DQ-NEXT: vzeroupper
1655 ; AVX512DQ-NEXT: retq
1657 ; AVX512BW-LABEL: constant_shift_v2i8:
1658 ; AVX512BW: # %bb.0:
1659 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1660 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1661 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1662 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1663 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1664 ; AVX512BW-NEXT: vzeroupper
1665 ; AVX512BW-NEXT: retq
1667 ; AVX512DQVL-LABEL: constant_shift_v2i8:
1668 ; AVX512DQVL: # %bb.0:
1669 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1670 ; AVX512DQVL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1671 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
1672 ; AVX512DQVL-NEXT: vzeroupper
1673 ; AVX512DQVL-NEXT: retq
1675 ; AVX512BWVL-LABEL: constant_shift_v2i8:
1676 ; AVX512BWVL: # %bb.0:
1677 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1678 ; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1679 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
1680 ; AVX512BWVL-NEXT: vzeroupper
1681 ; AVX512BWVL-NEXT: retq
1683 ; X86-SSE-LABEL: constant_shift_v2i8:
1685 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1686 ; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1687 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1688 ; X86-SSE-NEXT: pxor %xmm1, %xmm1
1689 ; X86-SSE-NEXT: packuswb %xmm1, %xmm0
1690 ; X86-SSE-NEXT: retl
1691 %shift = shl <2 x i8> %a, <i8 2, i8 3>
1696 ; Uniform Constant Shifts
1699 define <2 x i32> @splatconstant_shift_v2i32(<2 x i32> %a) nounwind {
1700 ; SSE-LABEL: splatconstant_shift_v2i32:
1702 ; SSE-NEXT: pslld $5, %xmm0
1705 ; AVX-LABEL: splatconstant_shift_v2i32:
1707 ; AVX-NEXT: vpslld $5, %xmm0, %xmm0
1710 ; XOP-LABEL: splatconstant_shift_v2i32:
1712 ; XOP-NEXT: vpslld $5, %xmm0, %xmm0
1715 ; AVX512-LABEL: splatconstant_shift_v2i32:
1717 ; AVX512-NEXT: vpslld $5, %xmm0, %xmm0
1720 ; AVX512VL-LABEL: splatconstant_shift_v2i32:
1721 ; AVX512VL: # %bb.0:
1722 ; AVX512VL-NEXT: vpslld $5, %xmm0, %xmm0
1723 ; AVX512VL-NEXT: retq
1725 ; X86-SSE-LABEL: splatconstant_shift_v2i32:
1727 ; X86-SSE-NEXT: pslld $5, %xmm0
1728 ; X86-SSE-NEXT: retl
1729 %shift = shl <2 x i32> %a, <i32 5, i32 5>
1730 ret <2 x i32> %shift
1733 define <4 x i16> @splatconstant_shift_v4i16(<4 x i16> %a) nounwind {
1734 ; SSE-LABEL: splatconstant_shift_v4i16:
1736 ; SSE-NEXT: psllw $3, %xmm0
1739 ; AVX-LABEL: splatconstant_shift_v4i16:
1741 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1744 ; XOP-LABEL: splatconstant_shift_v4i16:
1746 ; XOP-NEXT: vpsllw $3, %xmm0, %xmm0
1749 ; AVX512-LABEL: splatconstant_shift_v4i16:
1751 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1754 ; AVX512VL-LABEL: splatconstant_shift_v4i16:
1755 ; AVX512VL: # %bb.0:
1756 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1757 ; AVX512VL-NEXT: retq
1759 ; X86-SSE-LABEL: splatconstant_shift_v4i16:
1761 ; X86-SSE-NEXT: psllw $3, %xmm0
1762 ; X86-SSE-NEXT: retl
1763 %shift = shl <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
1764 ret <4 x i16> %shift
1767 define <2 x i16> @splatconstant_shift_v2i16(<2 x i16> %a) nounwind {
1768 ; SSE-LABEL: splatconstant_shift_v2i16:
1770 ; SSE-NEXT: psllw $3, %xmm0
1773 ; AVX-LABEL: splatconstant_shift_v2i16:
1775 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1778 ; XOP-LABEL: splatconstant_shift_v2i16:
1780 ; XOP-NEXT: vpsllw $3, %xmm0, %xmm0
1783 ; AVX512-LABEL: splatconstant_shift_v2i16:
1785 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1788 ; AVX512VL-LABEL: splatconstant_shift_v2i16:
1789 ; AVX512VL: # %bb.0:
1790 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1791 ; AVX512VL-NEXT: retq
1793 ; X86-SSE-LABEL: splatconstant_shift_v2i16:
1795 ; X86-SSE-NEXT: psllw $3, %xmm0
1796 ; X86-SSE-NEXT: retl
1797 %shift = shl <2 x i16> %a, <i16 3, i16 3>
1798 ret <2 x i16> %shift
1801 define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
1802 ; SSE-LABEL: splatconstant_shift_v8i8:
1804 ; SSE-NEXT: psllw $3, %xmm0
1805 ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1808 ; AVX-LABEL: splatconstant_shift_v8i8:
1810 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1811 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1814 ; XOP-LABEL: splatconstant_shift_v8i8:
1816 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1819 ; AVX512-LABEL: splatconstant_shift_v8i8:
1821 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1822 ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1825 ; AVX512VL-LABEL: splatconstant_shift_v8i8:
1826 ; AVX512VL: # %bb.0:
1827 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1828 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1829 ; AVX512VL-NEXT: retq
1831 ; X86-SSE-LABEL: splatconstant_shift_v8i8:
1833 ; X86-SSE-NEXT: psllw $3, %xmm0
1834 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1835 ; X86-SSE-NEXT: retl
1836 %shift = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
1840 define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
1841 ; SSE-LABEL: splatconstant_shift_v4i8:
1843 ; SSE-NEXT: psllw $3, %xmm0
1844 ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1847 ; AVX-LABEL: splatconstant_shift_v4i8:
1849 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1850 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1853 ; XOP-LABEL: splatconstant_shift_v4i8:
1855 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1858 ; AVX512-LABEL: splatconstant_shift_v4i8:
1860 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1861 ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1864 ; AVX512VL-LABEL: splatconstant_shift_v4i8:
1865 ; AVX512VL: # %bb.0:
1866 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1867 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1868 ; AVX512VL-NEXT: retq
1870 ; X86-SSE-LABEL: splatconstant_shift_v4i8:
1872 ; X86-SSE-NEXT: psllw $3, %xmm0
1873 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1874 ; X86-SSE-NEXT: retl
1875 %shift = shl <4 x i8> %a, <i8 3, i8 3, i8 3, i8 3>
1879 define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
1880 ; SSE-LABEL: splatconstant_shift_v2i8:
1882 ; SSE-NEXT: psllw $3, %xmm0
1883 ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1886 ; AVX-LABEL: splatconstant_shift_v2i8:
1888 ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
1889 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1892 ; XOP-LABEL: splatconstant_shift_v2i8:
1894 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1897 ; AVX512-LABEL: splatconstant_shift_v2i8:
1899 ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
1900 ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1903 ; AVX512VL-LABEL: splatconstant_shift_v2i8:
1904 ; AVX512VL: # %bb.0:
1905 ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
1906 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1907 ; AVX512VL-NEXT: retq
1909 ; X86-SSE-LABEL: splatconstant_shift_v2i8:
1911 ; X86-SSE-NEXT: psllw $3, %xmm0
1912 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1913 ; X86-SSE-NEXT: retl
1914 %shift = shl <2 x i8> %a, <i8 3, i8 3>