1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
9 define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
10 ; ALL-LABEL: var_shift_v8i64:
12 ; ALL-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
14 %shift = shl <8 x i64> %a, %b
18 define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
19 ; ALL-LABEL: var_shift_v16i32:
21 ; ALL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
23 %shift = shl <16 x i32> %a, %b
27 define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
28 ; AVX512DQ-LABEL: var_shift_v32i16:
30 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
31 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm3
32 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
33 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
34 ; AVX512DQ-NEXT: vpsllvd %zmm3, %zmm2, %zmm2
35 ; AVX512DQ-NEXT: vpmovdw %zmm2, %ymm2
36 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
37 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
38 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
39 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
40 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
43 ; AVX512BW-LABEL: var_shift_v32i16:
45 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
47 %shift = shl <32 x i16> %a, %b
51 define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
52 ; AVX512DQ-LABEL: var_shift_v64i8:
54 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2
55 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3
56 ; AVX512DQ-NEXT: vpsllw $4, %ymm3, %ymm4
57 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
58 ; AVX512DQ-NEXT: vpand %ymm5, %ymm4, %ymm4
59 ; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2
60 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
61 ; AVX512DQ-NEXT: vpsllw $2, %ymm3, %ymm4
62 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
63 ; AVX512DQ-NEXT: vpand %ymm6, %ymm4, %ymm4
64 ; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
65 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
66 ; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm4
67 ; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
68 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
69 ; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm3
70 ; AVX512DQ-NEXT: vpand %ymm5, %ymm3, %ymm3
71 ; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1
72 ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
73 ; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm3
74 ; AVX512DQ-NEXT: vpand %ymm6, %ymm3, %ymm3
75 ; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1
76 ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
77 ; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm3
78 ; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1
79 ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
80 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
83 ; AVX512BW-LABEL: var_shift_v64i8:
85 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
86 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
87 ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
88 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
89 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
90 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
91 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
92 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
93 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
94 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
95 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
96 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
97 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
99 %shift = shl <64 x i8> %a, %b
104 ; Uniform Variable Shifts
107 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
108 ; ALL-LABEL: splatvar_shift_v8i64:
110 ; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0
112 %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
113 %shift = shl <8 x i64> %a, %splat
117 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
118 ; ALL-LABEL: splatvar_shift_v16i32:
120 ; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
121 ; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
123 %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
124 %shift = shl <16 x i32> %a, %splat
125 ret <16 x i32> %shift
128 define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
129 ; AVX512DQ-LABEL: splatvar_shift_v32i16:
131 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
132 ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
133 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
134 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
135 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
136 ; AVX512DQ-NEXT: retq
138 ; AVX512BW-LABEL: splatvar_shift_v32i16:
140 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
141 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
142 ; AVX512BW-NEXT: retq
143 %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
144 %shift = shl <32 x i16> %a, %splat
145 ret <32 x i16> %shift
148 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
149 ; AVX512DQ-LABEL: splatvar_shift_v64i8:
151 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
152 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
153 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
154 ; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
155 ; AVX512DQ-NEXT: vpsllw %xmm1, %xmm3, %xmm3
156 ; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3
157 ; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
158 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
159 ; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
160 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
161 ; AVX512DQ-NEXT: retq
163 ; AVX512BW-LABEL: splatvar_shift_v64i8:
165 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
166 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
167 ; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
168 ; AVX512BW-NEXT: vpsllw %xmm1, %xmm2, %xmm1
169 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
170 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
171 ; AVX512BW-NEXT: retq
172 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
173 %shift = shl <64 x i8> %a, %splat
181 define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
182 ; ALL-LABEL: constant_shift_v8i64:
184 ; ALL-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
186 %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
190 define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
191 ; ALL-LABEL: constant_shift_v16i32:
193 ; ALL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
195 %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
196 ret <16 x i32> %shift
199 define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
200 ; AVX512DQ-LABEL: constant_shift_v32i16:
202 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
203 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
204 ; AVX512DQ-NEXT: vpmullw %ymm2, %ymm1, %ymm1
205 ; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0
206 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
207 ; AVX512DQ-NEXT: retq
209 ; AVX512BW-LABEL: constant_shift_v32i16:
211 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
212 ; AVX512BW-NEXT: retq
213 %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
214 ret <32 x i16> %shift
217 define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
218 ; AVX512DQ-LABEL: constant_shift_v64i8:
220 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
221 ; AVX512DQ-NEXT: vpsllw $4, %ymm1, %ymm2
222 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
223 ; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
224 ; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
225 ; AVX512DQ-NEXT: # ymm4 = mem[0,1,0,1]
226 ; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
227 ; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm2
228 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
229 ; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
230 ; AVX512DQ-NEXT: vpaddb %ymm4, %ymm4, %ymm6
231 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
232 ; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm2
233 ; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm7
234 ; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
235 ; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2
236 ; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
237 ; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
238 ; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2
239 ; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
240 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
241 ; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2
242 ; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm0, %ymm0
243 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
244 ; AVX512DQ-NEXT: retq
246 ; AVX512BW-LABEL: constant_shift_v64i8:
248 ; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
249 ; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
250 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
251 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
252 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
253 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
254 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
255 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
256 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
257 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
258 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
259 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
260 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
261 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
262 ; AVX512BW-NEXT: retq
263 %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
268 ; Uniform Constant Shifts
271 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
272 ; ALL-LABEL: splatconstant_shift_v8i64:
274 ; ALL-NEXT: vpsllq $7, %zmm0, %zmm0
276 %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
280 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
281 ; ALL-LABEL: splatconstant_shift_v16i32:
283 ; ALL-NEXT: vpslld $5, %zmm0, %zmm0
285 %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
286 ret <16 x i32> %shift
289 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
290 ; AVX512DQ-LABEL: splatconstant_shift_v32i16:
292 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
293 ; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1
294 ; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
295 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
296 ; AVX512DQ-NEXT: retq
298 ; AVX512BW-LABEL: splatconstant_shift_v32i16:
300 ; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
301 ; AVX512BW-NEXT: retq
302 %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
303 ret <32 x i16> %shift
306 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
307 ; AVX512DQ-LABEL: splatconstant_shift_v64i8:
309 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
310 ; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1
311 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
312 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
313 ; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
314 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
315 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
316 ; AVX512DQ-NEXT: retq
318 ; AVX512BW-LABEL: splatconstant_shift_v64i8:
320 ; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
321 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
322 ; AVX512BW-NEXT: retq
323 %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>