1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
7 declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
8 declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
9 declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
10 declare <64 x i8> @llvm.fshl.v64i8(<64 x i8>, <64 x i8>, <64 x i8>)
16 define <8 x i64> @var_funnnel_v8i64(<8 x i64> %x, <8 x i64> %amt) nounwind {
17 ; AVX512-LABEL: var_funnnel_v8i64:
19 ; AVX512-NEXT: vprolvq %zmm1, %zmm0, %zmm0
21 %res = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x, <8 x i64> %x, <8 x i64> %amt)
25 define <16 x i32> @var_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounwind {
26 ; AVX512-LABEL: var_funnnel_v16i32:
28 ; AVX512-NEXT: vprolvd %zmm1, %zmm0, %zmm0
30 %res = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x, <16 x i32> %x, <16 x i32> %amt)
34 define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
35 ; AVX512F-LABEL: var_funnnel_v32i16:
37 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38 ; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2
39 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
40 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
41 ; AVX512F-NEXT: vpsllvd %zmm5, %zmm0, %zmm5
42 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
43 ; AVX512F-NEXT: vpsubw %ymm2, %ymm6, %ymm2
44 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
45 ; AVX512F-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0
46 ; AVX512F-NEXT: vpord %zmm0, %zmm5, %zmm0
47 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
48 ; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm2
49 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
50 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
51 ; AVX512F-NEXT: vpsllvd %zmm3, %zmm1, %zmm3
52 ; AVX512F-NEXT: vpsubw %ymm2, %ymm6, %ymm2
53 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
54 ; AVX512F-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1
55 ; AVX512F-NEXT: vpord %zmm1, %zmm3, %zmm1
56 ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
59 ; AVX512VL-LABEL: var_funnnel_v32i16:
61 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
62 ; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
63 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
64 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
65 ; AVX512VL-NEXT: vpsllvd %zmm5, %zmm0, %zmm5
66 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
67 ; AVX512VL-NEXT: vpsubw %ymm2, %ymm6, %ymm2
68 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
69 ; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0
70 ; AVX512VL-NEXT: vpord %zmm0, %zmm5, %zmm0
71 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
72 ; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm2
73 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
74 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
75 ; AVX512VL-NEXT: vpsllvd %zmm3, %zmm1, %zmm3
76 ; AVX512VL-NEXT: vpsubw %ymm2, %ymm6, %ymm2
77 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
78 ; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1
79 ; AVX512VL-NEXT: vpord %zmm1, %zmm3, %zmm1
80 ; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1
83 ; AVX512BW-LABEL: var_funnnel_v32i16:
85 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
86 ; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3
87 ; AVX512BW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3
88 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
89 ; AVX512BW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
90 ; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
91 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
92 ; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
95 ; AVX512VLBW-LABEL: var_funnnel_v32i16:
96 ; AVX512VLBW: # %bb.0:
97 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
98 ; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3
99 ; AVX512VLBW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3
100 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
101 ; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
102 ; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
103 ; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
104 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
105 ; AVX512VLBW-NEXT: retq
106 %res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %amt)
110 define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
111 ; AVX512F-LABEL: var_funnnel_v64i8:
113 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4
114 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
115 ; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
116 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm6
117 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
118 ; AVX512F-NEXT: vpand %ymm7, %ymm6, %ymm6
119 ; AVX512F-NEXT: vpor %ymm4, %ymm6, %ymm4
120 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
121 ; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
122 ; AVX512F-NEXT: vpsllw $5, %ymm2, %ymm2
123 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
124 ; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm4
125 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
126 ; AVX512F-NEXT: vpand %ymm8, %ymm4, %ymm4
127 ; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm9
128 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm10 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
129 ; AVX512F-NEXT: vpand %ymm10, %ymm9, %ymm9
130 ; AVX512F-NEXT: vpor %ymm4, %ymm9, %ymm4
131 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
132 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
133 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm4
134 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
135 ; AVX512F-NEXT: vpand %ymm9, %ymm4, %ymm4
136 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm11
137 ; AVX512F-NEXT: vpor %ymm4, %ymm11, %ymm4
138 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
139 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
140 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2
141 ; AVX512F-NEXT: vpand %ymm5, %ymm2, %ymm2
142 ; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm4
143 ; AVX512F-NEXT: vpand %ymm7, %ymm4, %ymm4
144 ; AVX512F-NEXT: vpor %ymm2, %ymm4, %ymm2
145 ; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
146 ; AVX512F-NEXT: vpsllw $5, %ymm3, %ymm3
147 ; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
148 ; AVX512F-NEXT: vpsrlw $6, %ymm1, %ymm2
149 ; AVX512F-NEXT: vpand %ymm8, %ymm2, %ymm2
150 ; AVX512F-NEXT: vpsllw $2, %ymm1, %ymm4
151 ; AVX512F-NEXT: vpand %ymm10, %ymm4, %ymm4
152 ; AVX512F-NEXT: vpor %ymm2, %ymm4, %ymm2
153 ; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm3
154 ; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
155 ; AVX512F-NEXT: vpsrlw $7, %ymm1, %ymm2
156 ; AVX512F-NEXT: vpand %ymm9, %ymm2, %ymm2
157 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm4
158 ; AVX512F-NEXT: vpor %ymm2, %ymm4, %ymm2
159 ; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm3
160 ; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
163 ; AVX512VL-LABEL: var_funnnel_v64i8:
165 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4
166 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
167 ; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
168 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm6
169 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
170 ; AVX512VL-NEXT: vpand %ymm7, %ymm6, %ymm6
171 ; AVX512VL-NEXT: vpor %ymm4, %ymm6, %ymm4
172 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
173 ; AVX512VL-NEXT: vpand %ymm6, %ymm2, %ymm2
174 ; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2
175 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
176 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm4
177 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
178 ; AVX512VL-NEXT: vpand %ymm8, %ymm4, %ymm4
179 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm9
180 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm10 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
181 ; AVX512VL-NEXT: vpand %ymm10, %ymm9, %ymm9
182 ; AVX512VL-NEXT: vpor %ymm4, %ymm9, %ymm4
183 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
184 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
185 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm4
186 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
187 ; AVX512VL-NEXT: vpand %ymm9, %ymm4, %ymm4
188 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm11
189 ; AVX512VL-NEXT: vpor %ymm4, %ymm11, %ymm4
190 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
191 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
192 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
193 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
194 ; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm4
195 ; AVX512VL-NEXT: vpand %ymm7, %ymm4, %ymm4
196 ; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2
197 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
198 ; AVX512VL-NEXT: vpsllw $5, %ymm3, %ymm3
199 ; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
200 ; AVX512VL-NEXT: vpsrlw $6, %ymm1, %ymm2
201 ; AVX512VL-NEXT: vpand %ymm8, %ymm2, %ymm2
202 ; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm4
203 ; AVX512VL-NEXT: vpand %ymm10, %ymm4, %ymm4
204 ; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2
205 ; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm3
206 ; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
207 ; AVX512VL-NEXT: vpsrlw $7, %ymm1, %ymm2
208 ; AVX512VL-NEXT: vpand %ymm9, %ymm2, %ymm2
209 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm4
210 ; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2
211 ; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm3
212 ; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
213 ; AVX512VL-NEXT: retq
215 ; AVX512BW-LABEL: var_funnnel_v64i8:
217 ; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
218 ; AVX512BW-NEXT: vpsubb %zmm1, %zmm2, %zmm2
219 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
220 ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm2
221 ; AVX512BW-NEXT: vpsllw $5, %zmm2, %zmm2
222 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm4
223 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
224 ; AVX512BW-NEXT: vpmovb2m %zmm2, %k2
225 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2
226 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
227 ; AVX512BW-NEXT: vpblendmb %zmm2, %zmm0, %zmm2 {%k2}
228 ; AVX512BW-NEXT: vpsrlw $2, %zmm2, %zmm5
229 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
230 ; AVX512BW-NEXT: vmovdqu8 %zmm5, %zmm2 {%k1}
231 ; AVX512BW-NEXT: vpsrlw $1, %zmm2, %zmm5
232 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
233 ; AVX512BW-NEXT: vpaddb %zmm4, %zmm4, %zmm4
234 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
235 ; AVX512BW-NEXT: vmovdqu8 %zmm5, %zmm2 {%k1}
236 ; AVX512BW-NEXT: vpandq %zmm3, %zmm1, %zmm1
237 ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
238 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm3
239 ; AVX512BW-NEXT: vpmovb2m %zmm3, %k1
240 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k2
241 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
242 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
243 ; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k2}
244 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm1
245 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
246 ; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
247 ; AVX512BW-NEXT: vpaddb %zmm3, %zmm3, %zmm1
248 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
249 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
250 ; AVX512BW-NEXT: vporq %zmm2, %zmm0, %zmm0
251 ; AVX512BW-NEXT: retq
253 ; AVX512VLBW-LABEL: var_funnnel_v64i8:
254 ; AVX512VLBW: # %bb.0:
255 ; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
256 ; AVX512VLBW-NEXT: vpsubb %zmm1, %zmm2, %zmm2
257 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
258 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm2
259 ; AVX512VLBW-NEXT: vpsllw $5, %zmm2, %zmm2
260 ; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm4
261 ; AVX512VLBW-NEXT: vpmovb2m %zmm4, %k1
262 ; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k2
263 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm2
264 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
265 ; AVX512VLBW-NEXT: vpblendmb %zmm2, %zmm0, %zmm2 {%k2}
266 ; AVX512VLBW-NEXT: vpsrlw $2, %zmm2, %zmm5
267 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
268 ; AVX512VLBW-NEXT: vmovdqu8 %zmm5, %zmm2 {%k1}
269 ; AVX512VLBW-NEXT: vpsrlw $1, %zmm2, %zmm5
270 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
271 ; AVX512VLBW-NEXT: vpaddb %zmm4, %zmm4, %zmm4
272 ; AVX512VLBW-NEXT: vpmovb2m %zmm4, %k1
273 ; AVX512VLBW-NEXT: vmovdqu8 %zmm5, %zmm2 {%k1}
274 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm1, %zmm1
275 ; AVX512VLBW-NEXT: vpsllw $5, %zmm1, %zmm1
276 ; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm3
277 ; AVX512VLBW-NEXT: vpmovb2m %zmm3, %k1
278 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k2
279 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
280 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
281 ; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k2}
282 ; AVX512VLBW-NEXT: vpsllw $2, %zmm0, %zmm1
283 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
284 ; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
285 ; AVX512VLBW-NEXT: vpaddb %zmm3, %zmm3, %zmm1
286 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
287 ; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
288 ; AVX512VLBW-NEXT: vporq %zmm2, %zmm0, %zmm0
289 ; AVX512VLBW-NEXT: retq
290 %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> %amt)
295 ; Uniform Variable Shifts
298 define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %amt) nounwind {
299 ; AVX512-LABEL: splatvar_funnnel_v8i64:
301 ; AVX512-NEXT: vpbroadcastq %xmm1, %zmm1
302 ; AVX512-NEXT: vprolvq %zmm1, %zmm0, %zmm0
304 %splat = shufflevector <8 x i64> %amt, <8 x i64> undef, <8 x i32> zeroinitializer
305 %res = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x, <8 x i64> %x, <8 x i64> %splat)
309 define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounwind {
310 ; AVX512-LABEL: splatvar_funnnel_v16i32:
312 ; AVX512-NEXT: vpbroadcastd %xmm1, %zmm1
313 ; AVX512-NEXT: vprolvd %zmm1, %zmm0, %zmm0
315 %splat = shufflevector <16 x i32> %amt, <16 x i32> undef, <16 x i32> zeroinitializer
316 %res = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x, <16 x i32> %x, <16 x i32> %splat)
320 define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
321 ; AVX512F-LABEL: splatvar_funnnel_v32i16:
323 ; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm2
324 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
325 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
326 ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4
327 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
328 ; AVX512F-NEXT: vpsubw %xmm2, %xmm5, %xmm2
329 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
330 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
331 ; AVX512F-NEXT: vpor %ymm0, %ymm4, %ymm0
332 ; AVX512F-NEXT: vpsllw %xmm3, %ymm1, %ymm3
333 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
334 ; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
337 ; AVX512VL-LABEL: splatvar_funnnel_v32i16:
339 ; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm2
340 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
341 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
342 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4
343 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
344 ; AVX512VL-NEXT: vpsubw %xmm2, %xmm5, %xmm2
345 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
346 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
347 ; AVX512VL-NEXT: vpor %ymm0, %ymm4, %ymm0
348 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm1, %ymm3
349 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
350 ; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
351 ; AVX512VL-NEXT: retq
353 ; AVX512BW-LABEL: splatvar_funnnel_v32i16:
355 ; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
356 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
357 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
358 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
359 ; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm3
360 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
361 ; AVX512BW-NEXT: vpsubw %xmm1, %xmm4, %xmm1
362 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
363 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
364 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
365 ; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
366 ; AVX512BW-NEXT: retq
368 ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
369 ; AVX512VLBW: # %bb.0:
370 ; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm1
371 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
372 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
373 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
374 ; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm3
375 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
376 ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm4, %xmm1
377 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
378 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
379 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
380 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
381 ; AVX512VLBW-NEXT: retq
382 %splat = shufflevector <32 x i16> %amt, <32 x i16> undef, <32 x i32> zeroinitializer
383 %res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %splat)
387 define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
388 ; AVX512F-LABEL: splatvar_funnnel_v64i8:
390 ; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
391 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
392 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
393 ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4
394 ; AVX512F-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5
395 ; AVX512F-NEXT: vpsllw %xmm3, %ymm5, %ymm6
396 ; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6
397 ; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
398 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
399 ; AVX512F-NEXT: vpsubb %xmm2, %xmm7, %xmm2
400 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
401 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
402 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm5, %ymm5
403 ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
404 ; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5
405 ; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
406 ; AVX512F-NEXT: vpor %ymm0, %ymm4, %ymm0
407 ; AVX512F-NEXT: vpsllw %xmm3, %ymm1, %ymm3
408 ; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
409 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
410 ; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
411 ; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
414 ; AVX512VL-LABEL: splatvar_funnnel_v64i8:
416 ; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
417 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
418 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
419 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4
420 ; AVX512VL-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5
421 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm5, %ymm6
422 ; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6
423 ; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
424 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
425 ; AVX512VL-NEXT: vpsubb %xmm2, %xmm7, %xmm2
426 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
427 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
428 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm5, %ymm5
429 ; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
430 ; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5
431 ; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0
432 ; AVX512VL-NEXT: vpor %ymm0, %ymm4, %ymm0
433 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm1, %ymm3
434 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
435 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
436 ; AVX512VL-NEXT: vpand %ymm5, %ymm1, %ymm1
437 ; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
438 ; AVX512VL-NEXT: retq
440 ; AVX512BW-LABEL: splatvar_funnnel_v64i8:
442 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
443 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
444 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
445 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
446 ; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm4
447 ; AVX512BW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5
448 ; AVX512BW-NEXT: vpsllw %xmm3, %zmm5, %zmm3
449 ; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3
450 ; AVX512BW-NEXT: vpandq %zmm3, %zmm4, %zmm3
451 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
452 ; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
453 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
454 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
455 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
456 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm5, %zmm1
457 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
458 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
459 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
460 ; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
461 ; AVX512BW-NEXT: retq
463 ; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
464 ; AVX512VLBW: # %bb.0:
465 ; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1
466 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
467 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
468 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
469 ; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm4
470 ; AVX512VLBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5
471 ; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm5, %zmm3
472 ; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3
473 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm4, %zmm3
474 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
475 ; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
476 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
477 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
478 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
479 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm5, %zmm1
480 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
481 ; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1
482 ; AVX512VLBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
483 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
484 ; AVX512VLBW-NEXT: retq
485 %splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer
486 %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> %splat)
494 define <8 x i64> @constant_funnnel_v8i64(<8 x i64> %x) nounwind {
495 ; AVX512-LABEL: constant_funnnel_v8i64:
497 ; AVX512-NEXT: vprolvq {{.*}}(%rip), %zmm0, %zmm0
499 %res = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x, <8 x i64> %x, <8 x i64> <i64 4, i64 14, i64 50, i64 60, i64 4, i64 14, i64 50, i64 60>)
503 define <16 x i32> @constant_funnnel_v16i32(<16 x i32> %x) nounwind {
504 ; AVX512-LABEL: constant_funnnel_v16i32:
506 ; AVX512-NEXT: vprolvd {{.*}}(%rip), %zmm0, %zmm0
508 %res = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x, <16 x i32> %x, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>)
512 define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
513 ; AVX512F-LABEL: constant_funnnel_v32i16:
515 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = <u,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768>
516 ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm3
517 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm4 = ymm0[0],ymm3[1,2,3,4,5,6,7],ymm0[8],ymm3[9,10,11,12,13,14,15]
518 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
519 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
520 ; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm0
521 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
522 ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm2
523 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm1[0],ymm2[1,2,3,4,5,6,7],ymm1[8],ymm2[9,10,11,12,13,14,15]
524 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
525 ; AVX512F-NEXT: vpmullw %ymm4, %ymm1, %ymm1
526 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
529 ; AVX512VL-LABEL: constant_funnnel_v32i16:
531 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768>
532 ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm3
533 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm4 = ymm0[0],ymm3[1,2,3,4,5,6,7],ymm0[8],ymm3[9,10,11,12,13,14,15]
534 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
535 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
536 ; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm0
537 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
538 ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm2
539 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm1[0],ymm2[1,2,3,4,5,6,7],ymm1[8],ymm2[9,10,11,12,13,14,15]
540 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
541 ; AVX512VL-NEXT: vpmullw %ymm4, %ymm1, %ymm1
542 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
543 ; AVX512VL-NEXT: retq
545 ; AVX512BW-LABEL: constant_funnnel_v32i16:
547 ; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
548 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
549 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
550 ; AVX512BW-NEXT: retq
552 ; AVX512VLBW-LABEL: constant_funnnel_v32i16:
553 ; AVX512VLBW: # %bb.0:
554 ; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
555 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
556 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
557 ; AVX512VLBW-NEXT: retq
558 %res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
562 define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
563 ; AVX512F-LABEL: constant_funnnel_v64i8:
565 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
566 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
567 ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
568 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
569 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm2
570 ; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm5
571 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
572 ; AVX512F-NEXT: vpand %ymm6, %ymm5, %ymm5
573 ; AVX512F-NEXT: vpaddb %ymm4, %ymm4, %ymm7
574 ; AVX512F-NEXT: vpblendvb %ymm7, %ymm5, %ymm2, %ymm2
575 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm5
576 ; AVX512F-NEXT: vpaddb %ymm7, %ymm7, %ymm8
577 ; AVX512F-NEXT: vpblendvb %ymm8, %ymm5, %ymm2, %ymm2
578 ; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5
579 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm9 = ymm0[8],ymm5[8],ymm0[9],ymm5[9],ymm0[10],ymm5[10],ymm0[11],ymm5[11],ymm0[12],ymm5[12],ymm0[13],ymm5[13],ymm0[14],ymm5[14],ymm0[15],ymm5[15],ymm0[24],ymm5[24],ymm0[25],ymm5[25],ymm0[26],ymm5[26],ymm0[27],ymm5[27],ymm0[28],ymm5[28],ymm0[29],ymm5[29],ymm0[30],ymm5[30],ymm0[31],ymm5[31]
580 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm10 = [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2]
581 ; AVX512F-NEXT: # ymm10 = mem[0,1,0,1]
582 ; AVX512F-NEXT: vpmullw %ymm10, %ymm9, %ymm9
583 ; AVX512F-NEXT: vpsrlw $8, %ymm9, %ymm9
584 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[4],ymm5[4],ymm0[5],ymm5[5],ymm0[6],ymm5[6],ymm0[7],ymm5[7],ymm0[16],ymm5[16],ymm0[17],ymm5[17],ymm0[18],ymm5[18],ymm0[19],ymm5[19],ymm0[20],ymm5[20],ymm0[21],ymm5[21],ymm0[22],ymm5[22],ymm0[23],ymm5[23]
585 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm11 = [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128]
586 ; AVX512F-NEXT: # ymm11 = mem[0,1,0,1]
587 ; AVX512F-NEXT: vpmullw %ymm11, %ymm0, %ymm0
588 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
589 ; AVX512F-NEXT: vpackuswb %ymm9, %ymm0, %ymm0
590 ; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
591 ; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
592 ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
593 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm2
594 ; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm3
595 ; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
596 ; AVX512F-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
597 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm3
598 ; AVX512F-NEXT: vpblendvb %ymm8, %ymm3, %ymm2, %ymm2
599 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm5[8],ymm1[9],ymm5[9],ymm1[10],ymm5[10],ymm1[11],ymm5[11],ymm1[12],ymm5[12],ymm1[13],ymm5[13],ymm1[14],ymm5[14],ymm1[15],ymm5[15],ymm1[24],ymm5[24],ymm1[25],ymm5[25],ymm1[26],ymm5[26],ymm1[27],ymm5[27],ymm1[28],ymm5[28],ymm1[29],ymm5[29],ymm1[30],ymm5[30],ymm1[31],ymm5[31]
600 ; AVX512F-NEXT: vpmullw %ymm10, %ymm3, %ymm3
601 ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
602 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm5[0],ymm1[1],ymm5[1],ymm1[2],ymm5[2],ymm1[3],ymm5[3],ymm1[4],ymm5[4],ymm1[5],ymm5[5],ymm1[6],ymm5[6],ymm1[7],ymm5[7],ymm1[16],ymm5[16],ymm1[17],ymm5[17],ymm1[18],ymm5[18],ymm1[19],ymm5[19],ymm1[20],ymm5[20],ymm1[21],ymm5[21],ymm1[22],ymm5[22],ymm1[23],ymm5[23]
603 ; AVX512F-NEXT: vpmullw %ymm11, %ymm1, %ymm1
604 ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
605 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm1, %ymm1
606 ; AVX512F-NEXT: vpor %ymm1, %ymm2, %ymm1
609 ; AVX512VL-LABEL: constant_funnnel_v64i8:
611 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
612 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
613 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
614 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
615 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm2
616 ; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm5
617 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
618 ; AVX512VL-NEXT: vpand %ymm6, %ymm5, %ymm5
619 ; AVX512VL-NEXT: vpaddb %ymm4, %ymm4, %ymm7
620 ; AVX512VL-NEXT: vpblendvb %ymm7, %ymm5, %ymm2, %ymm2
621 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm5
622 ; AVX512VL-NEXT: vpaddb %ymm7, %ymm7, %ymm8
623 ; AVX512VL-NEXT: vpblendvb %ymm8, %ymm5, %ymm2, %ymm2
624 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
625 ; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
626 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm9 = [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2]
627 ; AVX512VL-NEXT: # ymm9 = mem[0,1,0,1]
628 ; AVX512VL-NEXT: vpmullw %ymm9, %ymm5, %ymm5
629 ; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
630 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
631 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
632 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm10 = [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128]
633 ; AVX512VL-NEXT: # ymm10 = mem[0,1,0,1]
634 ; AVX512VL-NEXT: vpmullw %ymm10, %ymm0, %ymm0
635 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
636 ; AVX512VL-NEXT: vpackuswb %ymm5, %ymm0, %ymm0
637 ; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
638 ; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
639 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
640 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm2
641 ; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm3
642 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
643 ; AVX512VL-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
644 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm3
645 ; AVX512VL-NEXT: vpblendvb %ymm8, %ymm3, %ymm2, %ymm2
646 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
647 ; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
648 ; AVX512VL-NEXT: vpmullw %ymm9, %ymm3, %ymm3
649 ; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
650 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
651 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
652 ; AVX512VL-NEXT: vpmullw %ymm10, %ymm1, %ymm1
653 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
654 ; AVX512VL-NEXT: vpackuswb %ymm3, %ymm1, %ymm1
655 ; AVX512VL-NEXT: vpor %ymm1, %ymm2, %ymm1
656 ; AVX512VL-NEXT: retq
658 ; AVX512BW-LABEL: constant_funnnel_v64i8:
660 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
661 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
662 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
663 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
664 ; AVX512BW-NEXT: vpblendmb %zmm2, %zmm0, %zmm2 {%k1}
665 ; AVX512BW-NEXT: vpsllw $2, %zmm2, %zmm3
666 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
667 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
668 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
669 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
670 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
671 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
672 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2 {%k1}
673 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
674 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
675 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm1, %zmm1
676 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
677 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
678 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
679 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
680 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
681 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
682 ; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
683 ; AVX512BW-NEXT: retq
685 ; AVX512VLBW-LABEL: constant_funnnel_v64i8:
686 ; AVX512VLBW: # %bb.0:
687 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
688 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
689 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
690 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
691 ; AVX512VLBW-NEXT: vpblendmb %zmm2, %zmm0, %zmm2 {%k1}
692 ; AVX512VLBW-NEXT: vpsllw $2, %zmm2, %zmm3
693 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
694 ; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
695 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
696 ; AVX512VLBW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
697 ; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
698 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
699 ; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm2 {%k1}
700 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
701 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
702 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm1, %zmm1
703 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
704 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
705 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm0, %zmm0
706 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
707 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm0, %zmm0
708 ; AVX512VLBW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
709 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
710 ; AVX512VLBW-NEXT: retq
711 %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
716 ; Uniform Constant Shifts
719 define <8 x i64> @splatconstant_funnnel_v8i64(<8 x i64> %x) nounwind {
720 ; AVX512-LABEL: splatconstant_funnnel_v8i64:
722 ; AVX512-NEXT: vprolq $14, %zmm0, %zmm0
724 %res = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x, <8 x i64> %x, <8 x i64> <i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14>)
728 define <16 x i32> @splatconstant_funnnel_v16i32(<16 x i32> %x) nounwind {
729 ; AVX512-LABEL: splatconstant_funnnel_v16i32:
731 ; AVX512-NEXT: vprold $4, %zmm0, %zmm0
733 %res = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x, <16 x i32> %x, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>)
737 define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
738 ; AVX512F-LABEL: splatconstant_funnnel_v32i16:
740 ; AVX512F-NEXT: vpsrlw $9, %ymm0, %ymm2
741 ; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
742 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
743 ; AVX512F-NEXT: vpsrlw $9, %ymm1, %ymm2
744 ; AVX512F-NEXT: vpsllw $7, %ymm1, %ymm1
745 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
748 ; AVX512VL-LABEL: splatconstant_funnnel_v32i16:
750 ; AVX512VL-NEXT: vpsrlw $9, %ymm0, %ymm2
751 ; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0
752 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
753 ; AVX512VL-NEXT: vpsrlw $9, %ymm1, %ymm2
754 ; AVX512VL-NEXT: vpsllw $7, %ymm1, %ymm1
755 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
756 ; AVX512VL-NEXT: retq
758 ; AVX512BW-LABEL: splatconstant_funnnel_v32i16:
760 ; AVX512BW-NEXT: vpsrlw $9, %zmm0, %zmm1
761 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0
762 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
763 ; AVX512BW-NEXT: retq
765 ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16:
766 ; AVX512VLBW: # %bb.0:
767 ; AVX512VLBW-NEXT: vpsrlw $9, %zmm0, %zmm1
768 ; AVX512VLBW-NEXT: vpsllw $7, %zmm0, %zmm0
769 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
770 ; AVX512VLBW-NEXT: retq
771 %res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
775 define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
776 ; AVX512F-LABEL: splatconstant_funnnel_v64i8:
778 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2
779 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
780 ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
781 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
782 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
783 ; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
784 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
785 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2
786 ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
787 ; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1
788 ; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
789 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
792 ; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
794 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
795 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
796 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
797 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
798 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
799 ; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
800 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
801 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
802 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
803 ; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
804 ; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1
805 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
806 ; AVX512VL-NEXT: retq
808 ; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
810 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
811 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
812 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
813 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
814 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
815 ; AVX512BW-NEXT: retq
817 ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
818 ; AVX512VLBW: # %bb.0:
819 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1
820 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
821 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
822 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
823 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
824 ; AVX512VLBW-NEXT: retq
825 %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)