1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512VL
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512VLBW
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefix=AVX512VBMI2
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefix=AVX512VLVBMI2
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=XOPAVX1
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=XOPAVX2
13 declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
14 declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
15 declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
16 declare <32 x i8> @llvm.fshr.v32i8(<32 x i8>, <32 x i8>, <32 x i8>)
22 define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
23 ; AVX1-LABEL: var_funnnel_v4i64:
25 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
26 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm3
27 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
28 ; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm5
29 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
30 ; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm4
31 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
32 ; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm5
33 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
34 ; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
35 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7]
36 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
37 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
38 ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
39 ; AVX1-NEXT: vpsubq %xmm4, %xmm5, %xmm4
40 ; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [63,63]
41 ; AVX1-NEXT: # xmm6 = mem[0,0]
42 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
43 ; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm7
44 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
45 ; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm2
46 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm7[0,1,2,3],xmm2[4,5,6,7]
47 ; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1
48 ; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
49 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm4
50 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
51 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
52 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
53 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
54 ; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0
57 ; AVX2-LABEL: var_funnnel_v4i64:
59 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
60 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3
61 ; AVX2-NEXT: vpsrlvq %ymm3, %ymm0, %ymm3
62 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
63 ; AVX2-NEXT: vpsubq %ymm1, %ymm4, %ymm1
64 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
65 ; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
66 ; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
69 ; AVX512F-LABEL: var_funnnel_v4i64:
71 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
72 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
73 ; AVX512F-NEXT: vprorvq %zmm1, %zmm0, %zmm0
74 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
77 ; AVX512VL-LABEL: var_funnnel_v4i64:
79 ; AVX512VL-NEXT: vprorvq %ymm1, %ymm0, %ymm0
82 ; AVX512BW-LABEL: var_funnnel_v4i64:
84 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
85 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
86 ; AVX512BW-NEXT: vprorvq %zmm1, %zmm0, %zmm0
87 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
90 ; AVX512VLBW-LABEL: var_funnnel_v4i64:
91 ; AVX512VLBW: # %bb.0:
92 ; AVX512VLBW-NEXT: vprorvq %ymm1, %ymm0, %ymm0
93 ; AVX512VLBW-NEXT: retq
95 ; AVX512VBMI2-LABEL: var_funnnel_v4i64:
96 ; AVX512VBMI2: # %bb.0:
97 ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
98 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
99 ; AVX512VBMI2-NEXT: vprorvq %zmm1, %zmm0, %zmm0
100 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
101 ; AVX512VBMI2-NEXT: retq
103 ; AVX512VLVBMI2-LABEL: var_funnnel_v4i64:
104 ; AVX512VLVBMI2: # %bb.0:
105 ; AVX512VLVBMI2-NEXT: vprorvq %ymm1, %ymm0, %ymm0
106 ; AVX512VLVBMI2-NEXT: retq
108 ; XOPAVX1-LABEL: var_funnnel_v4i64:
110 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
111 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
112 ; XOPAVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
113 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
114 ; XOPAVX1-NEXT: vprotq %xmm2, %xmm4, %xmm2
115 ; XOPAVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1
116 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0
117 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
120 ; XOPAVX2-LABEL: var_funnnel_v4i64:
122 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
123 ; XOPAVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm1
124 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
125 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
126 ; XOPAVX2-NEXT: vprotq %xmm2, %xmm3, %xmm2
127 ; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0
128 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
130 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> %amt)
134 define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind {
135 ; AVX1-LABEL: var_funnnel_v8i32:
137 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
138 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
139 ; AVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
140 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [31,31,31,31]
141 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
142 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
143 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [1065353216,1065353216,1065353216,1065353216]
144 ; AVX1-NEXT: vpaddd %xmm5, %xmm2, %xmm2
145 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
146 ; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
147 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
148 ; AVX1-NEXT: vpshufd {{.*#+}} xmm8 = xmm7[1,1,3,3]
149 ; AVX1-NEXT: vpmuludq %xmm6, %xmm8, %xmm6
150 ; AVX1-NEXT: vpmuludq %xmm2, %xmm7, %xmm2
151 ; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
152 ; AVX1-NEXT: vpblendw {{.*#+}} xmm7 = xmm7[0,1],xmm6[2,3],xmm7[4,5],xmm6[6,7]
153 ; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[0,0,2,2]
154 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3],xmm2[4,5],xmm6[6,7]
155 ; AVX1-NEXT: vpor %xmm7, %xmm2, %xmm2
156 ; AVX1-NEXT: vpsubd %xmm1, %xmm3, %xmm1
157 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
158 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
159 ; AVX1-NEXT: vpaddd %xmm5, %xmm1, %xmm1
160 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
161 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
162 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
163 ; AVX1-NEXT: vpmuludq %xmm3, %xmm4, %xmm3
164 ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
165 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
166 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
167 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
168 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
169 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
170 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
173 ; AVX2-LABEL: var_funnnel_v8i32:
175 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31]
176 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
177 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2
178 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32]
179 ; AVX2-NEXT: vpsubd %ymm1, %ymm3, %ymm1
180 ; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
181 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
184 ; AVX512F-LABEL: var_funnnel_v8i32:
186 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
187 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
188 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
189 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
192 ; AVX512VL-LABEL: var_funnnel_v8i32:
194 ; AVX512VL-NEXT: vprorvd %ymm1, %ymm0, %ymm0
195 ; AVX512VL-NEXT: retq
197 ; AVX512BW-LABEL: var_funnnel_v8i32:
199 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
200 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
201 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
202 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
203 ; AVX512BW-NEXT: retq
205 ; AVX512VLBW-LABEL: var_funnnel_v8i32:
206 ; AVX512VLBW: # %bb.0:
207 ; AVX512VLBW-NEXT: vprorvd %ymm1, %ymm0, %ymm0
208 ; AVX512VLBW-NEXT: retq
210 ; AVX512VBMI2-LABEL: var_funnnel_v8i32:
211 ; AVX512VBMI2: # %bb.0:
212 ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
213 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
214 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
215 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
216 ; AVX512VBMI2-NEXT: retq
218 ; AVX512VLVBMI2-LABEL: var_funnnel_v8i32:
219 ; AVX512VLVBMI2: # %bb.0:
220 ; AVX512VLVBMI2-NEXT: vprorvd %ymm1, %ymm0, %ymm0
221 ; AVX512VLVBMI2-NEXT: retq
223 ; XOPAVX1-LABEL: var_funnnel_v8i32:
225 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
226 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
227 ; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
228 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
229 ; XOPAVX1-NEXT: vprotd %xmm2, %xmm4, %xmm2
230 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm3, %xmm1
231 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
232 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
235 ; XOPAVX2-LABEL: var_funnnel_v8i32:
237 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
238 ; XOPAVX2-NEXT: vpsubd %ymm1, %ymm2, %ymm1
239 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
240 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
241 ; XOPAVX2-NEXT: vprotd %xmm2, %xmm3, %xmm2
242 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0
243 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
245 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> %amt)
249 define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
250 ; AVX1-LABEL: var_funnnel_v16i16:
252 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
253 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
254 ; AVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2
255 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15]
256 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
257 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm2[4,4,5,5,6,6,7,7]
258 ; AVX1-NEXT: vpslld $23, %xmm5, %xmm5
259 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [1065353216,1065353216,1065353216,1065353216]
260 ; AVX1-NEXT: vpaddd %xmm6, %xmm5, %xmm5
261 ; AVX1-NEXT: vcvttps2dq %xmm5, %xmm5
262 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
263 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
264 ; AVX1-NEXT: vpaddd %xmm6, %xmm2, %xmm2
265 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
266 ; AVX1-NEXT: vpackusdw %xmm5, %xmm2, %xmm2
267 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
268 ; AVX1-NEXT: vpmulhuw %xmm2, %xmm5, %xmm7
269 ; AVX1-NEXT: vpmullw %xmm2, %xmm5, %xmm2
270 ; AVX1-NEXT: vpor %xmm7, %xmm2, %xmm2
271 ; AVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1
272 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
273 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4,4,5,5,6,6,7,7]
274 ; AVX1-NEXT: vpslld $23, %xmm3, %xmm3
275 ; AVX1-NEXT: vpaddd %xmm6, %xmm3, %xmm3
276 ; AVX1-NEXT: vcvttps2dq %xmm3, %xmm3
277 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
278 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
279 ; AVX1-NEXT: vpaddd %xmm6, %xmm1, %xmm1
280 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
281 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
282 ; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm3
283 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
284 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
285 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
288 ; AVX2-LABEL: var_funnnel_v16i16:
290 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
291 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
292 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
293 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
294 ; AVX2-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
295 ; AVX2-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm2[1],ymm3[2],ymm2[3],ymm3[4],ymm2[5],ymm3[6],ymm2[7],ymm3[8],ymm2[9],ymm3[10],ymm2[11],ymm3[12],ymm2[13],ymm3[14],ymm2[15]
296 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
297 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
298 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
299 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
300 ; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
303 ; AVX512F-LABEL: var_funnnel_v16i16:
305 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
306 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
307 ; AVX512F-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
308 ; AVX512F-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
309 ; AVX512F-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
310 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm2[1],ymm3[2],ymm2[3],ymm3[4],ymm2[5],ymm3[6],ymm2[7],ymm3[8],ymm2[9],ymm3[10],ymm2[11],ymm3[12],ymm2[13],ymm3[14],ymm2[15]
311 ; AVX512F-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
312 ; AVX512F-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
313 ; AVX512F-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
314 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
315 ; AVX512F-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
318 ; AVX512VL-LABEL: var_funnnel_v16i16:
320 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
321 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
322 ; AVX512VL-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
323 ; AVX512VL-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
324 ; AVX512VL-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
325 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm2[1],ymm3[2],ymm2[3],ymm3[4],ymm2[5],ymm3[6],ymm2[7],ymm3[8],ymm2[9],ymm3[10],ymm2[11],ymm3[12],ymm2[13],ymm3[14],ymm2[15]
326 ; AVX512VL-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
327 ; AVX512VL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
328 ; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
329 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
330 ; AVX512VL-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
331 ; AVX512VL-NEXT: retq
333 ; AVX512BW-LABEL: var_funnnel_v16i16:
335 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
336 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
337 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2
338 ; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
339 ; AVX512BW-NEXT: vpsubw %ymm1, %ymm3, %ymm1
340 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
341 ; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
342 ; AVX512BW-NEXT: retq
344 ; AVX512VLBW-LABEL: var_funnnel_v16i16:
345 ; AVX512VLBW: # %bb.0:
346 ; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
347 ; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2
348 ; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
349 ; AVX512VLBW-NEXT: vpsubw %ymm1, %ymm3, %ymm1
350 ; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
351 ; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
352 ; AVX512VLBW-NEXT: retq
354 ; AVX512VBMI2-LABEL: var_funnnel_v16i16:
355 ; AVX512VBMI2: # %bb.0:
356 ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
357 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
358 ; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
359 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
360 ; AVX512VBMI2-NEXT: retq
362 ; AVX512VLVBMI2-LABEL: var_funnnel_v16i16:
363 ; AVX512VLVBMI2: # %bb.0:
364 ; AVX512VLVBMI2-NEXT: vpshrdvw %ymm1, %ymm0, %ymm0
365 ; AVX512VLVBMI2-NEXT: retq
367 ; XOPAVX1-LABEL: var_funnnel_v16i16:
369 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
370 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
371 ; XOPAVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2
372 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
373 ; XOPAVX1-NEXT: vprotw %xmm2, %xmm4, %xmm2
374 ; XOPAVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1
375 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0
376 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
379 ; XOPAVX2-LABEL: var_funnnel_v16i16:
381 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
382 ; XOPAVX2-NEXT: vpsubw %ymm1, %ymm2, %ymm1
383 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
384 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
385 ; XOPAVX2-NEXT: vprotw %xmm2, %xmm3, %xmm2
386 ; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0
387 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
389 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> %amt)
393 define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
394 ; AVX1-LABEL: var_funnnel_v32i8:
396 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
397 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3
398 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
399 ; AVX1-NEXT: vpandn %xmm3, %xmm4, %xmm3
400 ; AVX1-NEXT: vpsllw $4, %xmm2, %xmm5
401 ; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm5
402 ; AVX1-NEXT: vpor %xmm3, %xmm5, %xmm3
403 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
404 ; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
405 ; AVX1-NEXT: vpsubb %xmm5, %xmm6, %xmm5
406 ; AVX1-NEXT: vpsllw $5, %xmm5, %xmm5
407 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
408 ; AVX1-NEXT: vpsrlw $6, %xmm2, %xmm3
409 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
410 ; AVX1-NEXT: vpandn %xmm3, %xmm7, %xmm3
411 ; AVX1-NEXT: vpsllw $2, %xmm2, %xmm8
412 ; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm8
413 ; AVX1-NEXT: vpor %xmm3, %xmm8, %xmm3
414 ; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5
415 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
416 ; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm3
417 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
418 ; AVX1-NEXT: vpand %xmm3, %xmm8, %xmm3
419 ; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm9
420 ; AVX1-NEXT: vpor %xmm3, %xmm9, %xmm3
421 ; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5
422 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
423 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3
424 ; AVX1-NEXT: vpandn %xmm3, %xmm4, %xmm3
425 ; AVX1-NEXT: vpsllw $4, %xmm0, %xmm5
426 ; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm4
427 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
428 ; AVX1-NEXT: vpsubb %xmm1, %xmm6, %xmm1
429 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1
430 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
431 ; AVX1-NEXT: vpsrlw $6, %xmm0, %xmm3
432 ; AVX1-NEXT: vpandn %xmm3, %xmm7, %xmm3
433 ; AVX1-NEXT: vpsllw $2, %xmm0, %xmm4
434 ; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
435 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
436 ; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1
437 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
438 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm3
439 ; AVX1-NEXT: vpand %xmm3, %xmm8, %xmm3
440 ; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm4
441 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
442 ; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1
443 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
444 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
447 ; AVX2-LABEL: var_funnnel_v32i8:
449 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2
450 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
451 ; AVX2-NEXT: vpsllw $4, %ymm0, %ymm3
452 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
453 ; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
454 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
455 ; AVX2-NEXT: vpsubb %ymm1, %ymm3, %ymm1
456 ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
457 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
458 ; AVX2-NEXT: vpsrlw $6, %ymm0, %ymm2
459 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
460 ; AVX2-NEXT: vpsllw $2, %ymm0, %ymm3
461 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
462 ; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
463 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
464 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
465 ; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm2
466 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
467 ; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm3
468 ; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
469 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
470 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
473 ; AVX512F-LABEL: var_funnnel_v32i8:
475 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2
476 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm3
477 ; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3
478 ; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1
479 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
480 ; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm2
481 ; AVX512F-NEXT: vpsllw $6, %ymm0, %ymm3
482 ; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3
483 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
484 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
485 ; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm2
486 ; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm3
487 ; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3
488 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
489 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
492 ; AVX512VL-LABEL: var_funnnel_v32i8:
494 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
495 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
496 ; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3
497 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
498 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
499 ; AVX512VL-NEXT: vpsrlw $2, %ymm0, %ymm2
500 ; AVX512VL-NEXT: vpsllw $6, %ymm0, %ymm3
501 ; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3
502 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
503 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
504 ; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm2
505 ; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm3
506 ; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3
507 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
508 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
509 ; AVX512VL-NEXT: retq
511 ; AVX512BW-LABEL: var_funnnel_v32i8:
513 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
514 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
515 ; AVX512BW-NEXT: vpxor %xmm3, %xmm3, %xmm3
516 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm3[8],ymm1[9],ymm3[9],ymm1[10],ymm3[10],ymm1[11],ymm3[11],ymm1[12],ymm3[12],ymm1[13],ymm3[13],ymm1[14],ymm3[14],ymm1[15],ymm3[15],ymm1[24],ymm3[24],ymm1[25],ymm3[25],ymm1[26],ymm3[26],ymm1[27],ymm3[27],ymm1[28],ymm3[28],ymm1[29],ymm3[29],ymm1[30],ymm3[30],ymm1[31],ymm3[31]
517 ; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
518 ; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
519 ; AVX512BW-NEXT: vpand %ymm4, %ymm2, %ymm2
520 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
521 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[4],ymm3[4],ymm1[5],ymm3[5],ymm1[6],ymm3[6],ymm1[7],ymm3[7],ymm1[16],ymm3[16],ymm1[17],ymm3[17],ymm1[18],ymm3[18],ymm1[19],ymm3[19],ymm1[20],ymm3[20],ymm1[21],ymm3[21],ymm1[22],ymm3[22],ymm1[23],ymm3[23]
522 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
523 ; AVX512BW-NEXT: vpand %ymm4, %ymm0, %ymm0
524 ; AVX512BW-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
525 ; AVX512BW-NEXT: retq
527 ; AVX512VLBW-LABEL: var_funnnel_v32i8:
528 ; AVX512VLBW: # %bb.0:
529 ; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
530 ; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
531 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
532 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
533 ; AVX512VLBW-NEXT: vpsrlvw %ymm3, %ymm4, %ymm3
534 ; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
535 ; AVX512VLBW-NEXT: vpand %ymm4, %ymm3, %ymm3
536 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
537 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
538 ; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
539 ; AVX512VLBW-NEXT: vpand %ymm4, %ymm0, %ymm0
540 ; AVX512VLBW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
541 ; AVX512VLBW-NEXT: retq
543 ; AVX512VBMI2-LABEL: var_funnnel_v32i8:
544 ; AVX512VBMI2: # %bb.0:
545 ; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
546 ; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
547 ; AVX512VBMI2-NEXT: vpxor %xmm3, %xmm3, %xmm3
548 ; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm3[8],ymm1[9],ymm3[9],ymm1[10],ymm3[10],ymm1[11],ymm3[11],ymm1[12],ymm3[12],ymm1[13],ymm3[13],ymm1[14],ymm3[14],ymm1[15],ymm3[15],ymm1[24],ymm3[24],ymm1[25],ymm3[25],ymm1[26],ymm3[26],ymm1[27],ymm3[27],ymm1[28],ymm3[28],ymm1[29],ymm3[29],ymm1[30],ymm3[30],ymm1[31],ymm3[31]
549 ; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
550 ; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
551 ; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[4],ymm3[4],ymm1[5],ymm3[5],ymm1[6],ymm3[6],ymm1[7],ymm3[7],ymm1[16],ymm3[16],ymm1[17],ymm3[17],ymm1[18],ymm3[18],ymm1[19],ymm3[19],ymm1[20],ymm3[20],ymm1[21],ymm3[21],ymm1[22],ymm3[22],ymm1[23],ymm3[23]
552 ; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1
553 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,64,66,68,70,72,74,76,78,16,18,20,22,24,26,28,30,80,82,84,86,88,90,92,94]
554 ; AVX512VBMI2-NEXT: vpermi2b %zmm2, %zmm1, %zmm0
555 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
556 ; AVX512VBMI2-NEXT: retq
558 ; AVX512VLVBMI2-LABEL: var_funnnel_v32i8:
559 ; AVX512VLVBMI2: # %bb.0:
560 ; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
561 ; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
562 ; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
563 ; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
564 ; AVX512VLVBMI2-NEXT: vpsrlvw %ymm3, %ymm4, %ymm3
565 ; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
566 ; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
567 ; AVX512VLVBMI2-NEXT: vpsrlvw %ymm1, %ymm0, %ymm1
568 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
569 ; AVX512VLVBMI2-NEXT: vpermi2b %ymm3, %ymm1, %ymm0
570 ; AVX512VLVBMI2-NEXT: retq
572 ; XOPAVX1-LABEL: var_funnnel_v32i8:
574 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
575 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
576 ; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2
577 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
578 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm4, %xmm2
579 ; XOPAVX1-NEXT: vpsubb %xmm1, %xmm3, %xmm1
580 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0
581 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
584 ; XOPAVX2-LABEL: var_funnnel_v32i8:
586 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
587 ; XOPAVX2-NEXT: vpsubb %ymm1, %ymm2, %ymm1
588 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
589 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
590 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm3, %xmm2
591 ; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0
592 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
594 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> %amt)
599 ; Uniform Variable Shifts
602 define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
603 ; AVX1-LABEL: splatvar_funnnel_v4i64:
605 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [63,63]
606 ; AVX1-NEXT: # xmm2 = mem[0,0]
607 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
608 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
609 ; AVX1-NEXT: vpsrlq %xmm3, %xmm4, %xmm5
610 ; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
611 ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3
612 ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
613 ; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1
614 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
615 ; AVX1-NEXT: vpsllq %xmm1, %xmm4, %xmm2
616 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
617 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
618 ; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0
621 ; AVX2-LABEL: splatvar_funnnel_v4i64:
623 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
624 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
625 ; AVX2-NEXT: vpsrlq %xmm3, %ymm0, %ymm3
626 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
627 ; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
628 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
629 ; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
630 ; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
633 ; AVX512F-LABEL: splatvar_funnnel_v4i64:
635 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
636 ; AVX512F-NEXT: vpbroadcastq %xmm1, %ymm1
637 ; AVX512F-NEXT: vprorvq %zmm1, %zmm0, %zmm0
638 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
641 ; AVX512VL-LABEL: splatvar_funnnel_v4i64:
643 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
644 ; AVX512VL-NEXT: vprorvq %ymm1, %ymm0, %ymm0
645 ; AVX512VL-NEXT: retq
647 ; AVX512BW-LABEL: splatvar_funnnel_v4i64:
649 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
650 ; AVX512BW-NEXT: vpbroadcastq %xmm1, %ymm1
651 ; AVX512BW-NEXT: vprorvq %zmm1, %zmm0, %zmm0
652 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
653 ; AVX512BW-NEXT: retq
655 ; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
656 ; AVX512VLBW: # %bb.0:
657 ; AVX512VLBW-NEXT: vpbroadcastq %xmm1, %ymm1
658 ; AVX512VLBW-NEXT: vprorvq %ymm1, %ymm0, %ymm0
659 ; AVX512VLBW-NEXT: retq
661 ; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64:
662 ; AVX512VBMI2: # %bb.0:
663 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
664 ; AVX512VBMI2-NEXT: vpbroadcastq %xmm1, %ymm1
665 ; AVX512VBMI2-NEXT: vprorvq %zmm1, %zmm0, %zmm0
666 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
667 ; AVX512VBMI2-NEXT: retq
669 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v4i64:
670 ; AVX512VLVBMI2: # %bb.0:
671 ; AVX512VLVBMI2-NEXT: vpbroadcastq %xmm1, %ymm1
672 ; AVX512VLVBMI2-NEXT: vprorvq %ymm1, %ymm0, %ymm0
673 ; AVX512VLVBMI2-NEXT: retq
675 ; XOPAVX1-LABEL: splatvar_funnnel_v4i64:
677 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
678 ; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
679 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
680 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
681 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm2, %xmm2
682 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0
683 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
686 ; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
688 ; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
689 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
690 ; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1
691 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
692 ; XOPAVX2-NEXT: vprotq %xmm1, %xmm2, %xmm2
693 ; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0
694 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
696 %splat = shufflevector <4 x i64> %amt, <4 x i64> undef, <4 x i32> zeroinitializer
697 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> %splat)
701 define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind {
702 ; AVX1-LABEL: splatvar_funnnel_v8i32:
704 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
705 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
706 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,2,3,3]
707 ; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
708 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[2,2,3,3]
709 ; AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm4
710 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
711 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
712 ; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
713 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
714 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
715 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
716 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm3[0,2],ymm0[4,6],ymm3[4,6]
719 ; AVX2-LABEL: splatvar_funnnel_v8i32:
721 ; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm0[2,2,3,3,6,6,7,7]
722 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
723 ; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
724 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
725 ; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
726 ; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
729 ; AVX512F-LABEL: splatvar_funnnel_v8i32:
731 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
732 ; AVX512F-NEXT: vpbroadcastd %xmm1, %ymm1
733 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
734 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
737 ; AVX512VL-LABEL: splatvar_funnnel_v8i32:
739 ; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
740 ; AVX512VL-NEXT: vprorvd %ymm1, %ymm0, %ymm0
741 ; AVX512VL-NEXT: retq
743 ; AVX512BW-LABEL: splatvar_funnnel_v8i32:
745 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
746 ; AVX512BW-NEXT: vpbroadcastd %xmm1, %ymm1
747 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
748 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
749 ; AVX512BW-NEXT: retq
751 ; AVX512VLBW-LABEL: splatvar_funnnel_v8i32:
752 ; AVX512VLBW: # %bb.0:
753 ; AVX512VLBW-NEXT: vpbroadcastd %xmm1, %ymm1
754 ; AVX512VLBW-NEXT: vprorvd %ymm1, %ymm0, %ymm0
755 ; AVX512VLBW-NEXT: retq
757 ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32:
758 ; AVX512VBMI2: # %bb.0:
759 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
760 ; AVX512VBMI2-NEXT: vpbroadcastd %xmm1, %ymm1
761 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
762 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
763 ; AVX512VBMI2-NEXT: retq
765 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i32:
766 ; AVX512VLVBMI2: # %bb.0:
767 ; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm1, %ymm1
768 ; AVX512VLVBMI2-NEXT: vprorvd %ymm1, %ymm0, %ymm0
769 ; AVX512VLVBMI2-NEXT: retq
771 ; XOPAVX1-LABEL: splatvar_funnnel_v8i32:
773 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
774 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
775 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
776 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
777 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm2, %xmm2
778 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
779 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
782 ; XOPAVX2-LABEL: splatvar_funnnel_v8i32:
784 ; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
785 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
786 ; XOPAVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1
787 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
788 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm2, %xmm2
789 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0
790 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
792 %splat = shufflevector <8 x i32> %amt, <8 x i32> undef, <8 x i32> zeroinitializer
793 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> %splat)
797 define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
798 ; AVX1-LABEL: splatvar_funnnel_v16i16:
800 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
801 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
802 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
803 ; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm5
804 ; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm1
805 ; AVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm2
806 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
807 ; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
808 ; AVX1-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
809 ; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0
810 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
811 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
812 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
815 ; AVX2-LABEL: splatvar_funnnel_v16i16:
817 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
818 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
819 ; AVX2-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
820 ; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm1
821 ; AVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0
822 ; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
823 ; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
826 ; AVX512F-LABEL: splatvar_funnnel_v16i16:
828 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
829 ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
830 ; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
831 ; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1
832 ; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0
833 ; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
834 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
837 ; AVX512VL-LABEL: splatvar_funnnel_v16i16:
839 ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
840 ; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
841 ; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
842 ; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1
843 ; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0
844 ; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
845 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
846 ; AVX512VL-NEXT: retq
848 ; AVX512BW-LABEL: splatvar_funnnel_v16i16:
850 ; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
851 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
852 ; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
853 ; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
854 ; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
855 ; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
856 ; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
857 ; AVX512BW-NEXT: retq
859 ; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
860 ; AVX512VLBW: # %bb.0:
861 ; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
862 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
863 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
864 ; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
865 ; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
866 ; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
867 ; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
868 ; AVX512VLBW-NEXT: retq
870 ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
871 ; AVX512VBMI2: # %bb.0:
872 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
873 ; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
874 ; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
875 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
876 ; AVX512VBMI2-NEXT: retq
878 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16:
879 ; AVX512VLVBMI2: # %bb.0:
880 ; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
881 ; AVX512VLVBMI2-NEXT: vpshrdvw %ymm1, %ymm0, %ymm0
882 ; AVX512VLVBMI2-NEXT: retq
884 ; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
886 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
887 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
888 ; XOPAVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
889 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
890 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
891 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm2, %xmm2
892 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0
893 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
896 ; XOPAVX2-LABEL: splatvar_funnnel_v16i16:
898 ; XOPAVX2-NEXT: vpbroadcastw %xmm1, %xmm1
899 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
900 ; XOPAVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
901 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
902 ; XOPAVX2-NEXT: vprotw %xmm1, %xmm2, %xmm2
903 ; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0
904 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
906 %splat = shufflevector <16 x i16> %amt, <16 x i16> undef, <16 x i32> zeroinitializer
907 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> %splat)
911 define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
912 ; AVX1-LABEL: splatvar_funnnel_v32i8:
914 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
915 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
916 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
917 ; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
918 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
919 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
920 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
921 ; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
922 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
923 ; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
924 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
925 ; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
926 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
927 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
928 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
929 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
930 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
931 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
934 ; AVX2-LABEL: splatvar_funnnel_v32i8:
936 ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
937 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
938 ; AVX2-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
939 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
940 ; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
941 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
942 ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
943 ; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
944 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
947 ; AVX512F-LABEL: splatvar_funnnel_v32i8:
949 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
950 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
951 ; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
952 ; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
953 ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
954 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
955 ; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
956 ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
957 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
960 ; AVX512VL-LABEL: splatvar_funnnel_v32i8:
962 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
963 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
964 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
965 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
966 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
967 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
968 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
969 ; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
970 ; AVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
971 ; AVX512VL-NEXT: retq
973 ; AVX512BW-LABEL: splatvar_funnnel_v32i8:
975 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
976 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
977 ; AVX512BW-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
978 ; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
979 ; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm2
980 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
981 ; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
982 ; AVX512BW-NEXT: vpand %ymm3, %ymm0, %ymm0
983 ; AVX512BW-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
984 ; AVX512BW-NEXT: retq
986 ; AVX512VLBW-LABEL: splatvar_funnnel_v32i8:
987 ; AVX512VLBW: # %bb.0:
988 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
989 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
990 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
991 ; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
992 ; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm2
993 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
994 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
995 ; AVX512VLBW-NEXT: vpand %ymm3, %ymm0, %ymm0
996 ; AVX512VLBW-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
997 ; AVX512VLBW-NEXT: retq
999 ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
1000 ; AVX512VBMI2: # %bb.0:
1001 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,64,66,68,70,72,74,76,78,16,18,20,22,24,26,28,30,80,82,84,86,88,90,92,94]
1002 ; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1003 ; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1004 ; AVX512VBMI2-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
1005 ; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1006 ; AVX512VBMI2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
1007 ; AVX512VBMI2-NEXT: vpermt2b %zmm3, %zmm2, %zmm0
1008 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1009 ; AVX512VBMI2-NEXT: retq
1011 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
1012 ; AVX512VLVBMI2: # %bb.0:
1013 ; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1014 ; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1015 ; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
1016 ; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1017 ; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %ymm0, %ymm1
1018 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
1019 ; AVX512VLVBMI2-NEXT: vpermi2b %ymm2, %ymm1, %ymm0
1020 ; AVX512VLVBMI2-NEXT: retq
1022 ; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
1024 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1025 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1026 ; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
1027 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1028 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm2, %xmm2
1029 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0
1030 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1031 ; XOPAVX1-NEXT: retq
1033 ; XOPAVX2-LABEL: splatvar_funnnel_v32i8:
1035 ; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
1036 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1037 ; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
1038 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1039 ; XOPAVX2-NEXT: vprotb %xmm1, %xmm2, %xmm2
1040 ; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0
1041 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1042 ; XOPAVX2-NEXT: retq
1043 %splat = shufflevector <32 x i8> %amt, <32 x i8> undef, <32 x i32> zeroinitializer
1044 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> %splat)
1052 define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x) nounwind {
1053 ; AVX1-LABEL: constant_funnnel_v4i64:
1055 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1056 ; AVX1-NEXT: vpsllq $4, %xmm1, %xmm2
1057 ; AVX1-NEXT: vpsllq $14, %xmm1, %xmm3
1058 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1059 ; AVX1-NEXT: vpsllq $50, %xmm0, %xmm3
1060 ; AVX1-NEXT: vpsllq $60, %xmm0, %xmm4
1061 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
1062 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1063 ; AVX1-NEXT: vpsrlq $60, %xmm1, %xmm3
1064 ; AVX1-NEXT: vpsrlq $50, %xmm1, %xmm1
1065 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
1066 ; AVX1-NEXT: vpsrlq $14, %xmm0, %xmm3
1067 ; AVX1-NEXT: vpsrlq $4, %xmm0, %xmm0
1068 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
1069 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1070 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
1073 ; AVX2-LABEL: constant_funnnel_v4i64:
1075 ; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1076 ; AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1077 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1080 ; AVX512F-LABEL: constant_funnnel_v4i64:
1082 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1083 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4,14,50,60]
1084 ; AVX512F-NEXT: vprorvq %zmm1, %zmm0, %zmm0
1085 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1086 ; AVX512F-NEXT: retq
1088 ; AVX512VL-LABEL: constant_funnnel_v4i64:
1089 ; AVX512VL: # %bb.0:
1090 ; AVX512VL-NEXT: vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1091 ; AVX512VL-NEXT: retq
1093 ; AVX512BW-LABEL: constant_funnnel_v4i64:
1094 ; AVX512BW: # %bb.0:
1095 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1096 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4,14,50,60]
1097 ; AVX512BW-NEXT: vprorvq %zmm1, %zmm0, %zmm0
1098 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1099 ; AVX512BW-NEXT: retq
1101 ; AVX512VLBW-LABEL: constant_funnnel_v4i64:
1102 ; AVX512VLBW: # %bb.0:
1103 ; AVX512VLBW-NEXT: vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1104 ; AVX512VLBW-NEXT: retq
1106 ; AVX512VBMI2-LABEL: constant_funnnel_v4i64:
1107 ; AVX512VBMI2: # %bb.0:
1108 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1109 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,14,50,60]
1110 ; AVX512VBMI2-NEXT: vprorvq %zmm1, %zmm0, %zmm0
1111 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1112 ; AVX512VBMI2-NEXT: retq
1114 ; AVX512VLVBMI2-LABEL: constant_funnnel_v4i64:
1115 ; AVX512VLVBMI2: # %bb.0:
1116 ; AVX512VLVBMI2-NEXT: vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1117 ; AVX512VLVBMI2-NEXT: retq
1119 ; XOPAVX1-LABEL: constant_funnnel_v4i64:
1121 ; XOPAVX1-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1122 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1123 ; XOPAVX1-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1124 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1125 ; XOPAVX1-NEXT: retq
1127 ; XOPAVX2-LABEL: constant_funnnel_v4i64:
1129 ; XOPAVX2-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1130 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1131 ; XOPAVX2-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1132 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1133 ; XOPAVX2-NEXT: retq
1134 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> <i64 4, i64 14, i64 50, i64 60>)
1138 define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x) nounwind {
1139 ; AVX1-LABEL: constant_funnnel_v8i32:
1141 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1142 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1143 ; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1144 ; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1145 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
1146 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
1147 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
1148 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1149 ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
1150 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1151 ; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1152 ; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1153 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1154 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
1155 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
1156 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
1157 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
1158 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1161 ; AVX2-LABEL: constant_funnnel_v8i32:
1163 ; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1164 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1165 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1168 ; AVX512F-LABEL: constant_funnnel_v8i32:
1170 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1171 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11]
1172 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
1173 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1174 ; AVX512F-NEXT: retq
1176 ; AVX512VL-LABEL: constant_funnnel_v8i32:
1177 ; AVX512VL: # %bb.0:
1178 ; AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1179 ; AVX512VL-NEXT: retq
1181 ; AVX512BW-LABEL: constant_funnnel_v8i32:
1182 ; AVX512BW: # %bb.0:
1183 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1184 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11]
1185 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
1186 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1187 ; AVX512BW-NEXT: retq
1189 ; AVX512VLBW-LABEL: constant_funnnel_v8i32:
1190 ; AVX512VLBW: # %bb.0:
1191 ; AVX512VLBW-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1192 ; AVX512VLBW-NEXT: retq
1194 ; AVX512VBMI2-LABEL: constant_funnnel_v8i32:
1195 ; AVX512VBMI2: # %bb.0:
1196 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1197 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11]
1198 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
1199 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1200 ; AVX512VBMI2-NEXT: retq
1202 ; AVX512VLVBMI2-LABEL: constant_funnnel_v8i32:
1203 ; AVX512VLVBMI2: # %bb.0:
1204 ; AVX512VLVBMI2-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1205 ; AVX512VLVBMI2-NEXT: retq
1207 ; XOPAVX1-LABEL: constant_funnnel_v8i32:
1209 ; XOPAVX1-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1210 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1211 ; XOPAVX1-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1212 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1213 ; XOPAVX1-NEXT: retq
1215 ; XOPAVX2-LABEL: constant_funnnel_v8i32:
1217 ; XOPAVX2-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1218 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1219 ; XOPAVX2-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1220 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1221 ; XOPAVX2-NEXT: retq
1222 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>)
1226 define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x) nounwind {
1227 ; AVX1-LABEL: constant_funnnel_v16i16:
1229 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1230 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [256,128,64,32,16,8,4,2]
1231 ; AVX1-NEXT: vpmulhuw %xmm2, %xmm1, %xmm3
1232 ; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1
1233 ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
1234 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,32768,16384,8192,4096,2048,1024,512]
1235 ; AVX1-NEXT: vpmulhuw %xmm2, %xmm0, %xmm3
1236 ; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0
1237 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
1238 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1241 ; AVX2-LABEL: constant_funnnel_v16i16:
1243 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
1244 ; AVX2-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
1245 ; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
1246 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
1249 ; AVX512F-LABEL: constant_funnnel_v16i16:
1251 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
1252 ; AVX512F-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
1253 ; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0
1254 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
1255 ; AVX512F-NEXT: retq
1257 ; AVX512VL-LABEL: constant_funnnel_v16i16:
1258 ; AVX512VL: # %bb.0:
1259 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
1260 ; AVX512VL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
1261 ; AVX512VL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
1262 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
1263 ; AVX512VL-NEXT: retq
1265 ; AVX512BW-LABEL: constant_funnnel_v16i16:
1266 ; AVX512BW: # %bb.0:
1267 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1268 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
1269 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1
1270 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
1271 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
1272 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
1273 ; AVX512BW-NEXT: retq
1275 ; AVX512VLBW-LABEL: constant_funnnel_v16i16:
1276 ; AVX512VLBW: # %bb.0:
1277 ; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1278 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1279 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
1280 ; AVX512VLBW-NEXT: retq
1282 ; AVX512VBMI2-LABEL: constant_funnnel_v16i16:
1283 ; AVX512VBMI2: # %bb.0:
1284 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1285 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
1286 ; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
1287 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1288 ; AVX512VBMI2-NEXT: retq
1290 ; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16:
1291 ; AVX512VLVBMI2: # %bb.0:
1292 ; AVX512VLVBMI2-NEXT: vpshrdvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1293 ; AVX512VLVBMI2-NEXT: retq
1295 ; XOPAVX1-LABEL: constant_funnnel_v16i16:
1297 ; XOPAVX1-NEXT: vprotw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1298 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1299 ; XOPAVX1-NEXT: vprotw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1300 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1301 ; XOPAVX1-NEXT: retq
1303 ; XOPAVX2-LABEL: constant_funnnel_v16i16:
1305 ; XOPAVX2-NEXT: vprotw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1306 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1307 ; XOPAVX2-NEXT: vprotw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1308 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1309 ; XOPAVX2-NEXT: retq
1310 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
1314 define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind {
1315 ; AVX1-LABEL: constant_funnnel_v32i8:
1317 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1318 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1319 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128]
1320 ; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
1321 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
1322 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1323 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,128,64,32,16,8,4,2]
1324 ; AVX1-NEXT: vpmullw %xmm4, %xmm1, %xmm1
1325 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
1326 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
1327 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1328 ; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
1329 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
1330 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1331 ; AVX1-NEXT: vpmullw %xmm4, %xmm0, %xmm0
1332 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
1333 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
1334 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1337 ; AVX2-LABEL: constant_funnnel_v32i8:
1339 ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1340 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1341 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
1342 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1343 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1344 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
1345 ; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1348 ; AVX512F-LABEL: constant_funnnel_v32i8:
1350 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1351 ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1352 ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
1353 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1354 ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1355 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
1356 ; AVX512F-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1357 ; AVX512F-NEXT: retq
1359 ; AVX512VL-LABEL: constant_funnnel_v32i8:
1360 ; AVX512VL: # %bb.0:
1361 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1362 ; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1363 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
1364 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1365 ; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1366 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
1367 ; AVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1368 ; AVX512VL-NEXT: retq
1370 ; AVX512BW-LABEL: constant_funnnel_v32i8:
1371 ; AVX512BW: # %bb.0:
1372 ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,1,0,2,0,3,0,4,0,5,0,6,0,7,0,0,0,1,0,2,0,3,0,4,0,5,0,6,0,7,0]
1373 ; AVX512BW-NEXT: # ymm1 = mem[0,1,0,1]
1374 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1375 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm2, %zmm1
1376 ; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1
1377 ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,0,7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0,3,0,2,0,1,0]
1378 ; AVX512BW-NEXT: # ymm2 = mem[0,1,0,1]
1379 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1380 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
1381 ; AVX512BW-NEXT: vpsrlw $8, %ymm0, %ymm0
1382 ; AVX512BW-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1383 ; AVX512BW-NEXT: retq
1385 ; AVX512VLBW-LABEL: constant_funnnel_v32i8:
1386 ; AVX512VLBW: # %bb.0:
1387 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1388 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1389 ; AVX512VLBW-NEXT: vpsrlw $8, %ymm1, %ymm1
1390 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1391 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1392 ; AVX512VLBW-NEXT: vpsrlw $8, %ymm0, %ymm0
1393 ; AVX512VLBW-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1394 ; AVX512VLBW-NEXT: retq
1396 ; AVX512VBMI2-LABEL: constant_funnnel_v32i8:
1397 ; AVX512VBMI2: # %bb.0:
1398 ; AVX512VBMI2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,1,0,2,0,3,0,4,0,5,0,6,0,7,0,0,0,1,0,2,0,3,0,4,0,5,0,6,0,7,0]
1399 ; AVX512VBMI2-NEXT: # ymm1 = mem[0,1,0,1]
1400 ; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1401 ; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm2, %zmm1
1402 ; AVX512VBMI2-NEXT: vpsrlw $8, %ymm1, %ymm1
1403 ; AVX512VBMI2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,0,7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0,3,0,2,0,1,0]
1404 ; AVX512VBMI2-NEXT: # ymm2 = mem[0,1,0,1]
1405 ; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1406 ; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
1407 ; AVX512VBMI2-NEXT: vpsrlw $8, %ymm0, %ymm0
1408 ; AVX512VBMI2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1409 ; AVX512VBMI2-NEXT: retq
1411 ; AVX512VLVBMI2-LABEL: constant_funnnel_v32i8:
1412 ; AVX512VLVBMI2: # %bb.0:
1413 ; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1414 ; AVX512VLVBMI2-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1415 ; AVX512VLVBMI2-NEXT: vpsrlw $8, %ymm1, %ymm1
1416 ; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1417 ; AVX512VLVBMI2-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1418 ; AVX512VLVBMI2-NEXT: vpsrlw $8, %ymm0, %ymm0
1419 ; AVX512VLVBMI2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1420 ; AVX512VLVBMI2-NEXT: retq
1422 ; XOPAVX1-LABEL: constant_funnnel_v32i8:
1424 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1425 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
1426 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm1, %xmm1
1427 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm0, %xmm0
1428 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1429 ; XOPAVX1-NEXT: retq
1431 ; XOPAVX2-LABEL: constant_funnnel_v32i8:
1433 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1434 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
1435 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm1, %xmm1
1436 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm0, %xmm0
1437 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1438 ; XOPAVX2-NEXT: retq
1439 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
1444 ; Uniform Constant Shifts
1447 define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x) nounwind {
1448 ; AVX1-LABEL: splatconstant_funnnel_v4i64:
1450 ; AVX1-NEXT: vpsllq $50, %xmm0, %xmm1
1451 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1452 ; AVX1-NEXT: vpsllq $50, %xmm2, %xmm3
1453 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
1454 ; AVX1-NEXT: vpsrlq $14, %xmm0, %xmm0
1455 ; AVX1-NEXT: vpsrlq $14, %xmm2, %xmm2
1456 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1457 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1460 ; AVX2-LABEL: splatconstant_funnnel_v4i64:
1462 ; AVX2-NEXT: vpsllq $50, %ymm0, %ymm1
1463 ; AVX2-NEXT: vpsrlq $14, %ymm0, %ymm0
1464 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1467 ; AVX512F-LABEL: splatconstant_funnnel_v4i64:
1469 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1470 ; AVX512F-NEXT: vprorq $14, %zmm0, %zmm0
1471 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1472 ; AVX512F-NEXT: retq
1474 ; AVX512VL-LABEL: splatconstant_funnnel_v4i64:
1475 ; AVX512VL: # %bb.0:
1476 ; AVX512VL-NEXT: vprorq $14, %ymm0, %ymm0
1477 ; AVX512VL-NEXT: retq
1479 ; AVX512BW-LABEL: splatconstant_funnnel_v4i64:
1480 ; AVX512BW: # %bb.0:
1481 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1482 ; AVX512BW-NEXT: vprorq $14, %zmm0, %zmm0
1483 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1484 ; AVX512BW-NEXT: retq
1486 ; AVX512VLBW-LABEL: splatconstant_funnnel_v4i64:
1487 ; AVX512VLBW: # %bb.0:
1488 ; AVX512VLBW-NEXT: vprorq $14, %ymm0, %ymm0
1489 ; AVX512VLBW-NEXT: retq
1491 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i64:
1492 ; AVX512VBMI2: # %bb.0:
1493 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1494 ; AVX512VBMI2-NEXT: vprorq $14, %zmm0, %zmm0
1495 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1496 ; AVX512VBMI2-NEXT: retq
1498 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i64:
1499 ; AVX512VLVBMI2: # %bb.0:
1500 ; AVX512VLVBMI2-NEXT: vprorq $14, %ymm0, %ymm0
1501 ; AVX512VLVBMI2-NEXT: retq
1503 ; XOPAVX1-LABEL: splatconstant_funnnel_v4i64:
1505 ; XOPAVX1-NEXT: vprotq $50, %xmm0, %xmm1
1506 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1507 ; XOPAVX1-NEXT: vprotq $50, %xmm0, %xmm0
1508 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1509 ; XOPAVX1-NEXT: retq
1511 ; XOPAVX2-LABEL: splatconstant_funnnel_v4i64:
1513 ; XOPAVX2-NEXT: vprotq $50, %xmm0, %xmm1
1514 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1515 ; XOPAVX2-NEXT: vprotq $50, %xmm0, %xmm0
1516 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1517 ; XOPAVX2-NEXT: retq
1518 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> <i64 14, i64 14, i64 14, i64 14>)
1522 define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x) nounwind {
1523 ; AVX1-LABEL: splatconstant_funnnel_v8i32:
1525 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1526 ; AVX1-NEXT: vpsrld $4, %xmm1, %xmm2
1527 ; AVX1-NEXT: vpslld $28, %xmm1, %xmm1
1528 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1529 ; AVX1-NEXT: vpsrld $4, %xmm0, %xmm2
1530 ; AVX1-NEXT: vpslld $28, %xmm0, %xmm0
1531 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1532 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1535 ; AVX2-LABEL: splatconstant_funnnel_v8i32:
1537 ; AVX2-NEXT: vpsrld $4, %ymm0, %ymm1
1538 ; AVX2-NEXT: vpslld $28, %ymm0, %ymm0
1539 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1542 ; AVX512F-LABEL: splatconstant_funnnel_v8i32:
1544 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1545 ; AVX512F-NEXT: vprord $4, %zmm0, %zmm0
1546 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1547 ; AVX512F-NEXT: retq
1549 ; AVX512VL-LABEL: splatconstant_funnnel_v8i32:
1550 ; AVX512VL: # %bb.0:
1551 ; AVX512VL-NEXT: vprord $4, %ymm0, %ymm0
1552 ; AVX512VL-NEXT: retq
1554 ; AVX512BW-LABEL: splatconstant_funnnel_v8i32:
1555 ; AVX512BW: # %bb.0:
1556 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1557 ; AVX512BW-NEXT: vprord $4, %zmm0, %zmm0
1558 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1559 ; AVX512BW-NEXT: retq
1561 ; AVX512VLBW-LABEL: splatconstant_funnnel_v8i32:
1562 ; AVX512VLBW: # %bb.0:
1563 ; AVX512VLBW-NEXT: vprord $4, %ymm0, %ymm0
1564 ; AVX512VLBW-NEXT: retq
1566 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i32:
1567 ; AVX512VBMI2: # %bb.0:
1568 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1569 ; AVX512VBMI2-NEXT: vprord $4, %zmm0, %zmm0
1570 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1571 ; AVX512VBMI2-NEXT: retq
1573 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i32:
1574 ; AVX512VLVBMI2: # %bb.0:
1575 ; AVX512VLVBMI2-NEXT: vprord $4, %ymm0, %ymm0
1576 ; AVX512VLVBMI2-NEXT: retq
1578 ; XOPAVX1-LABEL: splatconstant_funnnel_v8i32:
1580 ; XOPAVX1-NEXT: vprotd $28, %xmm0, %xmm1
1581 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1582 ; XOPAVX1-NEXT: vprotd $28, %xmm0, %xmm0
1583 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1584 ; XOPAVX1-NEXT: retq
1586 ; XOPAVX2-LABEL: splatconstant_funnnel_v8i32:
1588 ; XOPAVX2-NEXT: vprotd $28, %xmm0, %xmm1
1589 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1590 ; XOPAVX2-NEXT: vprotd $28, %xmm0, %xmm0
1591 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1592 ; XOPAVX2-NEXT: retq
1593 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>)
1597 define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind {
1598 ; AVX1-LABEL: splatconstant_funnnel_v16i16:
1600 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1601 ; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm2
1602 ; AVX1-NEXT: vpsllw $9, %xmm1, %xmm1
1603 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1604 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm2
1605 ; AVX1-NEXT: vpsllw $9, %xmm0, %xmm0
1606 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1607 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1610 ; AVX2-LABEL: splatconstant_funnnel_v16i16:
1612 ; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm1
1613 ; AVX2-NEXT: vpsllw $9, %ymm0, %ymm0
1614 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1617 ; AVX512F-LABEL: splatconstant_funnnel_v16i16:
1619 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm1
1620 ; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm0
1621 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
1622 ; AVX512F-NEXT: retq
1624 ; AVX512VL-LABEL: splatconstant_funnnel_v16i16:
1625 ; AVX512VL: # %bb.0:
1626 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm1
1627 ; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm0
1628 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
1629 ; AVX512VL-NEXT: retq
1631 ; AVX512BW-LABEL: splatconstant_funnnel_v16i16:
1632 ; AVX512BW: # %bb.0:
1633 ; AVX512BW-NEXT: vpsrlw $7, %ymm0, %ymm1
1634 ; AVX512BW-NEXT: vpsllw $9, %ymm0, %ymm0
1635 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
1636 ; AVX512BW-NEXT: retq
1638 ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16:
1639 ; AVX512VLBW: # %bb.0:
1640 ; AVX512VLBW-NEXT: vpsrlw $7, %ymm0, %ymm1
1641 ; AVX512VLBW-NEXT: vpsllw $9, %ymm0, %ymm0
1642 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
1643 ; AVX512VLBW-NEXT: retq
1645 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16:
1646 ; AVX512VBMI2: # %bb.0:
1647 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1648 ; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm0, %zmm0
1649 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1650 ; AVX512VBMI2-NEXT: retq
1652 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16:
1653 ; AVX512VLVBMI2: # %bb.0:
1654 ; AVX512VLVBMI2-NEXT: vpshrdw $7, %ymm0, %ymm0, %ymm0
1655 ; AVX512VLVBMI2-NEXT: retq
1657 ; XOPAVX1-LABEL: splatconstant_funnnel_v16i16:
1659 ; XOPAVX1-NEXT: vprotw $9, %xmm0, %xmm1
1660 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1661 ; XOPAVX1-NEXT: vprotw $9, %xmm0, %xmm0
1662 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1663 ; XOPAVX1-NEXT: retq
1665 ; XOPAVX2-LABEL: splatconstant_funnnel_v16i16:
1667 ; XOPAVX2-NEXT: vprotw $9, %xmm0, %xmm1
1668 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1669 ; XOPAVX2-NEXT: vprotw $9, %xmm0, %xmm0
1670 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1671 ; XOPAVX2-NEXT: retq
1672 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1676 define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
1677 ; AVX1-LABEL: splatconstant_funnnel_v32i8:
1679 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1680 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
1681 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1682 ; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
1683 ; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
1684 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
1685 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1686 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2
1687 ; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
1688 ; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0
1689 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
1690 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1691 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1694 ; AVX2-LABEL: splatconstant_funnnel_v32i8:
1696 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm1
1697 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1698 ; AVX2-NEXT: vpsllw $4, %ymm0, %ymm0
1699 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1700 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1703 ; AVX512F-LABEL: splatconstant_funnnel_v32i8:
1705 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
1706 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
1707 ; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
1708 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1709 ; AVX512F-NEXT: retq
1711 ; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
1712 ; AVX512VL: # %bb.0:
1713 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
1714 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
1715 ; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
1716 ; AVX512VL-NEXT: retq
1718 ; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
1719 ; AVX512BW: # %bb.0:
1720 ; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1
1721 ; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0
1722 ; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
1723 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1724 ; AVX512BW-NEXT: retq
1726 ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
1727 ; AVX512VLBW: # %bb.0:
1728 ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1
1729 ; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0
1730 ; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
1731 ; AVX512VLBW-NEXT: retq
1733 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8:
1734 ; AVX512VBMI2: # %bb.0:
1735 ; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1
1736 ; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
1737 ; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
1738 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1739 ; AVX512VBMI2-NEXT: retq
1741 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
1742 ; AVX512VLVBMI2: # %bb.0:
1743 ; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm1
1744 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
1745 ; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
1746 ; AVX512VLVBMI2-NEXT: retq
1748 ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
1750 ; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm1
1751 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1752 ; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm0
1753 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1754 ; XOPAVX1-NEXT: retq
1756 ; XOPAVX2-LABEL: splatconstant_funnnel_v32i8:
1758 ; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm1
1759 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1760 ; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm0
1761 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1762 ; XOPAVX2-NEXT: retq
1763 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)