1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512DQ
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512BW
9 define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
10 ; ALL-LABEL: var_shift_v8i64:
12 ; ALL-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
14 %shift = shl <8 x i64> %a, %b
18 define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
19 ; ALL-LABEL: var_shift_v16i32:
21 ; ALL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
23 %shift = shl <16 x i32> %a, %b
27 define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
28 ; AVX512DQ-LABEL: var_shift_v32i16:
30 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
31 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
32 ; AVX512DQ-NEXT: vpsllvd %zmm2, %zmm3, %zmm2
33 ; AVX512DQ-NEXT: vpmovdw %zmm2, %ymm2
34 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
35 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
36 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
37 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
38 ; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
39 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
40 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
43 ; AVX512BW-LABEL: var_shift_v32i16:
45 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
47 %shift = shl <32 x i16> %a, %b
51 define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
52 ; AVX512DQ-LABEL: var_shift_v64i8:
54 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
55 ; AVX512DQ-NEXT: vpsllw $4, %ymm2, %ymm3
56 ; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
57 ; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3
58 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm5
59 ; AVX512DQ-NEXT: vpsllw $5, %ymm5, %ymm5
60 ; AVX512DQ-NEXT: vpblendvb %ymm5, %ymm3, %ymm2, %ymm2
61 ; AVX512DQ-NEXT: vpsllw $2, %ymm2, %ymm3
62 ; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
63 ; AVX512DQ-NEXT: vpand %ymm6, %ymm3, %ymm3
64 ; AVX512DQ-NEXT: vpaddb %ymm5, %ymm5, %ymm5
65 ; AVX512DQ-NEXT: vpblendvb %ymm5, %ymm3, %ymm2, %ymm2
66 ; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm3
67 ; AVX512DQ-NEXT: vpaddb %ymm5, %ymm5, %ymm5
68 ; AVX512DQ-NEXT: vpblendvb %ymm5, %ymm3, %ymm2, %ymm2
69 ; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm3
70 ; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3
71 ; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1
72 ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
73 ; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm3
74 ; AVX512DQ-NEXT: vpand %ymm6, %ymm3, %ymm3
75 ; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1
76 ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
77 ; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm3
78 ; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1
79 ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
80 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
83 ; AVX512BW-LABEL: var_shift_v64i8:
85 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
86 ; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
87 ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
88 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
89 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
90 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
91 ; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
92 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
93 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
94 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
95 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
96 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
97 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
99 %shift = shl <64 x i8> %a, %b
104 ; Uniform Variable Shifts
107 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
108 ; ALL-LABEL: splatvar_shift_v8i64:
110 ; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0
112 %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
113 %shift = shl <8 x i64> %a, %splat
117 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
118 ; ALL-LABEL: splatvar_shift_v16i32:
120 ; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
121 ; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
123 %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
124 %shift = shl <16 x i32> %a, %splat
125 ret <16 x i32> %shift
128 define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
129 ; AVX512DQ-LABEL: splatvar_shift_v32i16:
131 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
132 ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
133 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
134 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
135 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
136 ; AVX512DQ-NEXT: retq
138 ; AVX512BW-LABEL: splatvar_shift_v32i16:
140 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
141 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
142 ; AVX512BW-NEXT: retq
143 %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
144 %shift = shl <32 x i16> %a, %splat
145 ret <32 x i16> %shift
148 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
149 ; AVX512DQ-LABEL: splatvar_shift_v64i8:
151 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
152 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
153 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
154 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
155 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
156 ; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
157 ; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1
158 ; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
159 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
160 ; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
161 ; AVX512DQ-NEXT: retq
163 ; AVX512BW-LABEL: splatvar_shift_v64i8:
165 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
166 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
167 ; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
168 ; AVX512BW-NEXT: vpsllw %xmm1, %xmm2, %xmm1
169 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
170 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
171 ; AVX512BW-NEXT: retq
172 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
173 %shift = shl <64 x i8> %a, %splat
178 ; Uniform Variable Modulo Shifts
181 define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
182 ; ALL-LABEL: splatvar_modulo_shift_v8i64:
184 ; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
185 ; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0
187 %mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
188 %splat = shufflevector <8 x i64> %mod, <8 x i64> undef, <8 x i32> zeroinitializer
189 %shift = shl <8 x i64> %a, %splat
193 define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
194 ; ALL-LABEL: splatvar_modulo_shift_v16i32:
196 ; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
197 ; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
199 %mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
200 %splat = shufflevector <16 x i32> %mod, <16 x i32> undef, <16 x i32> zeroinitializer
201 %shift = shl <16 x i32> %a, %splat
202 ret <16 x i32> %shift
205 define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
206 ; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
208 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
209 ; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
210 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
211 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
212 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
213 ; AVX512DQ-NEXT: retq
215 ; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
217 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
218 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
219 ; AVX512BW-NEXT: retq
220 %mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
221 %splat = shufflevector <32 x i16> %mod, <32 x i16> undef, <32 x i32> zeroinitializer
222 %shift = shl <32 x i16> %a, %splat
223 ret <32 x i16> %shift
226 define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
227 ; AVX512DQ-LABEL: splatvar_modulo_shift_v64i8:
229 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
230 ; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
231 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
232 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
233 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
234 ; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
235 ; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1
236 ; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
237 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
238 ; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
239 ; AVX512DQ-NEXT: retq
241 ; AVX512BW-LABEL: splatvar_modulo_shift_v64i8:
243 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
244 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
245 ; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
246 ; AVX512BW-NEXT: vpsllw %xmm1, %xmm2, %xmm1
247 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
248 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
249 ; AVX512BW-NEXT: retq
250 %mod = and <64 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
251 %splat = shufflevector <64 x i8> %mod, <64 x i8> undef, <64 x i32> zeroinitializer
252 %shift = shl <64 x i8> %a, %splat
260 define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
261 ; ALL-LABEL: constant_shift_v8i64:
263 ; ALL-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
265 %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
269 define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
270 ; ALL-LABEL: constant_shift_v16i32:
272 ; ALL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
274 %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
275 ret <16 x i32> %shift
278 define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
279 ; AVX512DQ-LABEL: constant_shift_v32i16:
281 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
282 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
283 ; AVX512DQ-NEXT: vpmullw %ymm2, %ymm1, %ymm1
284 ; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0
285 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
286 ; AVX512DQ-NEXT: retq
288 ; AVX512BW-LABEL: constant_shift_v32i16:
290 ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
291 ; AVX512BW-NEXT: retq
292 %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
293 ret <32 x i16> %shift
296 define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
297 ; AVX512DQ-LABEL: constant_shift_v64i8:
299 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
300 ; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0]
301 ; AVX512DQ-NEXT: # ymm2 = mem[0,1,0,1]
302 ; AVX512DQ-NEXT: vpmaddubsw %ymm2, %ymm1, %ymm3
303 ; AVX512DQ-NEXT: vpmaddubsw %ymm2, %ymm0, %ymm2
304 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
305 ; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1]
306 ; AVX512DQ-NEXT: # ymm3 = mem[0,1,0,1]
307 ; AVX512DQ-NEXT: vpmaddubsw %ymm3, %ymm0, %ymm0
308 ; AVX512DQ-NEXT: vpsllw $8, %ymm0, %ymm0
309 ; AVX512DQ-NEXT: vpmaddubsw %ymm3, %ymm1, %ymm1
310 ; AVX512DQ-NEXT: vpsllw $8, %ymm1, %ymm1
311 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
312 ; AVX512DQ-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm0
313 ; AVX512DQ-NEXT: retq
315 ; AVX512BW-LABEL: constant_shift_v64i8:
317 ; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 # [1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0]
318 ; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1]
319 ; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0
320 ; AVX512BW-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
321 ; AVX512BW-NEXT: retq
322 %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
327 ; Uniform Constant Shifts
330 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
331 ; ALL-LABEL: splatconstant_shift_v8i64:
333 ; ALL-NEXT: vpsllq $7, %zmm0, %zmm0
335 %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
339 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
340 ; ALL-LABEL: splatconstant_shift_v16i32:
342 ; ALL-NEXT: vpslld $5, %zmm0, %zmm0
344 %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
345 ret <16 x i32> %shift
348 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
349 ; AVX512DQ-LABEL: splatconstant_shift_v32i16:
351 ; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm1
352 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
353 ; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
354 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
355 ; AVX512DQ-NEXT: retq
357 ; AVX512BW-LABEL: splatconstant_shift_v32i16:
359 ; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
360 ; AVX512BW-NEXT: retq
361 %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
362 ret <32 x i16> %shift
365 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
366 ; AVX512DQ-LABEL: splatconstant_shift_v64i8:
368 ; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm1
369 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
370 ; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
371 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
372 ; AVX512DQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
373 ; AVX512DQ-NEXT: retq
375 ; AVX512BW-LABEL: splatconstant_shift_v64i8:
377 ; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
378 ; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
379 ; AVX512BW-NEXT: retq
380 %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>