1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
9 define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
10 ; ALL-LABEL: var_shift_v8i64:
12 ; ALL-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
14 %shift = shl <8 x i64> %a, %b
18 define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
19 ; ALL-LABEL: var_shift_v16i32:
21 ; ALL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
23 %shift = shl <16 x i32> %a, %b
27 define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
28 ; AVX512DQ-LABEL: var_shift_v32i16:
30 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
31 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
32 ; AVX512DQ-NEXT: vpsllvd %zmm2, %zmm0, %zmm0
33 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
34 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
35 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
36 ; AVX512DQ-NEXT: vpsllvd %zmm2, %zmm1, %zmm1
37 ; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1
40 ; AVX512BW-LABEL: var_shift_v32i16:
42 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
44 %shift = shl <32 x i16> %a, %b
48 define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
49 ; AVX512DQ-LABEL: var_shift_v64i8:
51 ; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm4
52 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
53 ; AVX512DQ-NEXT: vpand %ymm5, %ymm4, %ymm4
54 ; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2
55 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
56 ; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm4
57 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
58 ; AVX512DQ-NEXT: vpand %ymm6, %ymm4, %ymm4
59 ; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
60 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
61 ; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm4
62 ; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
63 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
64 ; AVX512DQ-NEXT: vpsllw $4, %ymm1, %ymm2
65 ; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
66 ; AVX512DQ-NEXT: vpsllw $5, %ymm3, %ymm3
67 ; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
68 ; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm2
69 ; AVX512DQ-NEXT: vpand %ymm6, %ymm2, %ymm2
70 ; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3
71 ; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
72 ; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm2
73 ; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3
74 ; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
77 ; AVX512BW-LABEL: var_shift_v64i8:
79 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
80 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
81 ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
82 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
83 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
84 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
85 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
86 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
87 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
88 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
89 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
90 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
91 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
93 %shift = shl <64 x i8> %a, %b
98 ; Uniform Variable Shifts
101 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
102 ; ALL-LABEL: splatvar_shift_v8i64:
104 ; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0
106 %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
107 %shift = shl <8 x i64> %a, %splat
111 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
112 ; ALL-LABEL: splatvar_shift_v16i32:
114 ; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
115 ; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
117 %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
118 %shift = shl <16 x i32> %a, %splat
119 ret <16 x i32> %shift
122 define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
123 ; AVX512DQ-LABEL: splatvar_shift_v32i16:
125 ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
126 ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm0, %ymm0
127 ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm1, %ymm1
128 ; AVX512DQ-NEXT: retq
130 ; AVX512BW-LABEL: splatvar_shift_v32i16:
132 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
133 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
134 ; AVX512BW-NEXT: retq
135 %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
136 %shift = shl <32 x i16> %a, %splat
137 ret <32 x i16> %shift
140 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
141 ; AVX512DQ-LABEL: splatvar_shift_v64i8:
143 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
144 ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm0, %ymm0
145 ; AVX512DQ-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
146 ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm3, %ymm3
147 ; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3
148 ; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
149 ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm1, %ymm1
150 ; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm1
151 ; AVX512DQ-NEXT: retq
153 ; AVX512BW-LABEL: splatvar_shift_v64i8:
155 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
156 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
157 ; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
158 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm2, %zmm1
159 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
160 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
161 ; AVX512BW-NEXT: retq
162 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
163 %shift = shl <64 x i8> %a, %splat
171 define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
172 ; ALL-LABEL: constant_shift_v8i64:
174 ; ALL-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
176 %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
180 define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
181 ; ALL-LABEL: constant_shift_v16i32:
183 ; ALL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
185 %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
186 ret <16 x i32> %shift
189 define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
190 ; AVX512DQ-LABEL: constant_shift_v32i16:
192 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
193 ; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0
194 ; AVX512DQ-NEXT: vpmullw %ymm2, %ymm1, %ymm1
195 ; AVX512DQ-NEXT: retq
197 ; AVX512BW-LABEL: constant_shift_v32i16:
199 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
200 ; AVX512BW-NEXT: retq
201 %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
202 ret <32 x i16> %shift
205 define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
206 ; AVX512DQ-LABEL: constant_shift_v64i8:
208 ; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2
209 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
210 ; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
211 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
212 ; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
213 ; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2
214 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
215 ; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
216 ; AVX512DQ-NEXT: vpaddb %ymm4, %ymm4, %ymm6
217 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
218 ; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2
219 ; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm7
220 ; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm0, %ymm0
221 ; AVX512DQ-NEXT: vpsllw $4, %ymm1, %ymm2
222 ; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
223 ; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
224 ; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm2
225 ; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
226 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
227 ; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm2
228 ; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
229 ; AVX512DQ-NEXT: retq
231 ; AVX512BW-LABEL: constant_shift_v64i8:
233 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
234 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
235 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
236 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
237 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
238 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
239 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
240 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
241 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
242 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
243 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
244 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
245 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
246 ; AVX512BW-NEXT: retq
247 %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
252 ; Uniform Constant Shifts
255 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
256 ; ALL-LABEL: splatconstant_shift_v8i64:
258 ; ALL-NEXT: vpsllq $7, %zmm0, %zmm0
260 %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
264 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
265 ; ALL-LABEL: splatconstant_shift_v16i32:
267 ; ALL-NEXT: vpslld $5, %zmm0, %zmm0
269 %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
270 ret <16 x i32> %shift
273 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
274 ; AVX512DQ-LABEL: splatconstant_shift_v32i16:
276 ; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
277 ; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1
278 ; AVX512DQ-NEXT: retq
280 ; AVX512BW-LABEL: splatconstant_shift_v32i16:
282 ; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
283 ; AVX512BW-NEXT: retq
284 %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
285 ret <32 x i16> %shift
288 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
289 ; AVX512DQ-LABEL: splatconstant_shift_v64i8:
291 ; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
292 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
293 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
294 ; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1
295 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
296 ; AVX512DQ-NEXT: retq
298 ; AVX512BW-LABEL: splatconstant_shift_v64i8:
300 ; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
301 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
302 ; AVX512BW-NEXT: retq
303 %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>