1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
9 define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
10 ; ALL-LABEL: var_shift_v8i64:
12 ; ALL-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
14 %shift = shl <8 x i64> %a, %b
18 define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
19 ; ALL-LABEL: var_shift_v16i32:
21 ; ALL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
23 %shift = shl <16 x i32> %a, %b
27 define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
28 ; AVX512DQ-LABEL: var_shift_v32i16:
30 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
31 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
32 ; AVX512DQ-NEXT: vpsllvd %zmm2, %zmm0, %zmm0
33 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
34 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
35 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
36 ; AVX512DQ-NEXT: vpsllvd %zmm2, %zmm1, %zmm1
37 ; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1
40 ; AVX512BW-LABEL: var_shift_v32i16:
42 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
44 %shift = shl <32 x i16> %a, %b
48 define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
49 ; AVX512DQ-LABEL: var_shift_v64i8:
51 ; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm4
52 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
53 ; AVX512DQ-NEXT: vpand %ymm5, %ymm4, %ymm4
54 ; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2
55 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
56 ; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm4
57 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
58 ; AVX512DQ-NEXT: vpand %ymm6, %ymm4, %ymm4
59 ; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
60 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
61 ; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm4
62 ; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
63 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
64 ; AVX512DQ-NEXT: vpsllw $4, %ymm1, %ymm2
65 ; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
66 ; AVX512DQ-NEXT: vpsllw $5, %ymm3, %ymm3
67 ; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
68 ; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm2
69 ; AVX512DQ-NEXT: vpand %ymm6, %ymm2, %ymm2
70 ; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3
71 ; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
72 ; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm2
73 ; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3
74 ; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
77 ; AVX512BW-LABEL: var_shift_v64i8:
79 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
80 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
81 ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
82 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
83 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
84 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
85 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
86 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
87 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
88 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
89 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
90 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
91 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
93 %shift = shl <64 x i8> %a, %b
98 ; Uniform Variable Shifts
101 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
102 ; ALL-LABEL: splatvar_shift_v8i64:
104 ; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0
106 %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
107 %shift = shl <8 x i64> %a, %splat
111 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
112 ; ALL-LABEL: splatvar_shift_v16i32:
114 ; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
115 ; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
117 %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
118 %shift = shl <16 x i32> %a, %splat
119 ret <16 x i32> %shift
122 define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
123 ; AVX512DQ-LABEL: splatvar_shift_v32i16:
125 ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
126 ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm0, %ymm0
127 ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm1, %ymm1
128 ; AVX512DQ-NEXT: retq
130 ; AVX512BW-LABEL: splatvar_shift_v32i16:
132 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
133 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
134 ; AVX512BW-NEXT: retq
135 %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
136 %shift = shl <32 x i16> %a, %splat
137 ret <32 x i16> %shift
140 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
141 ; AVX512DQ-LABEL: splatvar_shift_v64i8:
143 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
144 ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm0, %ymm0
145 ; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
146 ; AVX512DQ-NEXT: vpsllw %xmm2, %xmm3, %xmm3
147 ; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3
148 ; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
149 ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm1, %ymm1
150 ; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm1
151 ; AVX512DQ-NEXT: retq
153 ; AVX512BW-LABEL: splatvar_shift_v64i8:
155 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
156 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
157 ; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
158 ; AVX512BW-NEXT: vpsllw %xmm1, %xmm2, %xmm1
159 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
160 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
161 ; AVX512BW-NEXT: retq
162 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
163 %shift = shl <64 x i8> %a, %splat
171 define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
172 ; ALL-LABEL: constant_shift_v8i64:
174 ; ALL-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
176 %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
180 define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
181 ; ALL-LABEL: constant_shift_v16i32:
183 ; ALL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
185 %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
186 ret <16 x i32> %shift
189 define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
190 ; AVX512DQ-LABEL: constant_shift_v32i16:
192 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
193 ; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0
194 ; AVX512DQ-NEXT: vpmullw %ymm2, %ymm1, %ymm1
195 ; AVX512DQ-NEXT: retq
197 ; AVX512BW-LABEL: constant_shift_v32i16:
199 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
200 ; AVX512BW-NEXT: retq
201 %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
202 ret <32 x i16> %shift
205 define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
206 ; AVX512DQ-LABEL: constant_shift_v64i8:
208 ; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2
209 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
210 ; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
211 ; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
212 ; AVX512DQ-NEXT: # ymm4 = mem[0,1,0,1]
213 ; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
214 ; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2
215 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
216 ; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
217 ; AVX512DQ-NEXT: vpaddb %ymm4, %ymm4, %ymm6
218 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
219 ; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2
220 ; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm7
221 ; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm0, %ymm0
222 ; AVX512DQ-NEXT: vpsllw $4, %ymm1, %ymm2
223 ; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
224 ; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
225 ; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm2
226 ; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
227 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
228 ; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm2
229 ; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
230 ; AVX512DQ-NEXT: retq
232 ; AVX512BW-LABEL: constant_shift_v64i8:
234 ; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
235 ; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
236 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
237 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
238 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
239 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
240 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
241 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
242 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
243 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
244 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
245 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
246 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
247 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
248 ; AVX512BW-NEXT: retq
249 %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
254 ; Uniform Constant Shifts
257 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
258 ; ALL-LABEL: splatconstant_shift_v8i64:
260 ; ALL-NEXT: vpsllq $7, %zmm0, %zmm0
262 %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
266 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
267 ; ALL-LABEL: splatconstant_shift_v16i32:
269 ; ALL-NEXT: vpslld $5, %zmm0, %zmm0
271 %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
272 ret <16 x i32> %shift
275 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
276 ; AVX512DQ-LABEL: splatconstant_shift_v32i16:
278 ; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
279 ; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1
280 ; AVX512DQ-NEXT: retq
282 ; AVX512BW-LABEL: splatconstant_shift_v32i16:
284 ; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
285 ; AVX512BW-NEXT: retq
286 %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
287 ret <32 x i16> %shift
290 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
291 ; AVX512DQ-LABEL: splatconstant_shift_v64i8:
293 ; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
294 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
295 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
296 ; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1
297 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
298 ; AVX512DQ-NEXT: retq
300 ; AVX512BW-LABEL: splatconstant_shift_v64i8:
302 ; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
303 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
304 ; AVX512BW-NEXT: retq
305 %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>