1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512VL
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512VLBW
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefix=AVX512VBMI2
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefix=AVX512VLVBMI2
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=XOPAVX1
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=XOPAVX2
13 declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
14 declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
15 declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
16 declare <32 x i8> @llvm.fshr.v32i8(<32 x i8>, <32 x i8>, <32 x i8>)
22 define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
23 ; AVX1-LABEL: var_funnnel_v4i64:
25 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [63,63,63,63]
26 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm3
27 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
28 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
29 ; AVX1-NEXT: vpsrlq %xmm4, %xmm5, %xmm6
30 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
31 ; AVX1-NEXT: vpsrlq %xmm4, %xmm5, %xmm4
32 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm6[0,1,2,3],xmm4[4,5,6,7]
33 ; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm6
34 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
35 ; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
36 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm6[0,1,2,3],xmm3[4,5,6,7]
37 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
38 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
39 ; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
40 ; AVX1-NEXT: vpsubq %xmm4, %xmm6, %xmm4
41 ; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm4
42 ; AVX1-NEXT: vpsllq %xmm4, %xmm5, %xmm7
43 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
44 ; AVX1-NEXT: vpsllq %xmm4, %xmm5, %xmm4
45 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm7[0,1,2,3],xmm4[4,5,6,7]
46 ; AVX1-NEXT: vpsubq %xmm1, %xmm6, %xmm1
47 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
48 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm2
49 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
50 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
51 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
52 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
53 ; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0
56 ; AVX2-LABEL: var_funnnel_v4i64:
58 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
59 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3
60 ; AVX2-NEXT: vpsrlvq %ymm3, %ymm0, %ymm3
61 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
62 ; AVX2-NEXT: vpsubq %ymm1, %ymm4, %ymm1
63 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
64 ; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
65 ; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
68 ; AVX512F-LABEL: var_funnnel_v4i64:
70 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
71 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
72 ; AVX512F-NEXT: vprorvq %zmm1, %zmm0, %zmm0
73 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
76 ; AVX512VL-LABEL: var_funnnel_v4i64:
78 ; AVX512VL-NEXT: vprorvq %ymm1, %ymm0, %ymm0
81 ; AVX512BW-LABEL: var_funnnel_v4i64:
83 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
84 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
85 ; AVX512BW-NEXT: vprorvq %zmm1, %zmm0, %zmm0
86 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
89 ; AVX512VLBW-LABEL: var_funnnel_v4i64:
90 ; AVX512VLBW: # %bb.0:
91 ; AVX512VLBW-NEXT: vprorvq %ymm1, %ymm0, %ymm0
92 ; AVX512VLBW-NEXT: retq
94 ; AVX512VBMI2-LABEL: var_funnnel_v4i64:
95 ; AVX512VBMI2: # %bb.0:
96 ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
97 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
98 ; AVX512VBMI2-NEXT: vprorvq %zmm1, %zmm0, %zmm0
99 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
100 ; AVX512VBMI2-NEXT: retq
102 ; AVX512VLVBMI2-LABEL: var_funnnel_v4i64:
103 ; AVX512VLVBMI2: # %bb.0:
104 ; AVX512VLVBMI2-NEXT: vprorvq %ymm1, %ymm0, %ymm0
105 ; AVX512VLVBMI2-NEXT: retq
107 ; XOPAVX1-LABEL: var_funnnel_v4i64:
109 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
110 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
111 ; XOPAVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
112 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
113 ; XOPAVX1-NEXT: vprotq %xmm2, %xmm4, %xmm2
114 ; XOPAVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1
115 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0
116 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
119 ; XOPAVX2-LABEL: var_funnnel_v4i64:
121 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
122 ; XOPAVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm1
123 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
124 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
125 ; XOPAVX2-NEXT: vprotq %xmm2, %xmm3, %xmm2
126 ; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0
127 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
129 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> %amt)
133 define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind {
134 ; AVX1-LABEL: var_funnnel_v8i32:
136 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
137 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
138 ; AVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
139 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [31,31,31,31]
140 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
141 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
142 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [1065353216,1065353216,1065353216,1065353216]
143 ; AVX1-NEXT: vpaddd %xmm5, %xmm2, %xmm2
144 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
145 ; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
146 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
147 ; AVX1-NEXT: vpshufd {{.*#+}} xmm8 = xmm7[1,1,3,3]
148 ; AVX1-NEXT: vpmuludq %xmm6, %xmm8, %xmm6
149 ; AVX1-NEXT: vpmuludq %xmm2, %xmm7, %xmm2
150 ; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
151 ; AVX1-NEXT: vpblendw {{.*#+}} xmm7 = xmm7[0,1],xmm6[2,3],xmm7[4,5],xmm6[6,7]
152 ; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[0,0,2,2]
153 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3],xmm2[4,5],xmm6[6,7]
154 ; AVX1-NEXT: vpor %xmm7, %xmm2, %xmm2
155 ; AVX1-NEXT: vpsubd %xmm1, %xmm3, %xmm1
156 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
157 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
158 ; AVX1-NEXT: vpaddd %xmm5, %xmm1, %xmm1
159 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
160 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
161 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
162 ; AVX1-NEXT: vpmuludq %xmm3, %xmm4, %xmm3
163 ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
164 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
165 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
166 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
167 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
168 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
169 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
172 ; AVX2-LABEL: var_funnnel_v8i32:
174 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31]
175 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
176 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2
177 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32]
178 ; AVX2-NEXT: vpsubd %ymm1, %ymm3, %ymm1
179 ; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
180 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
183 ; AVX512F-LABEL: var_funnnel_v8i32:
185 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
186 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
187 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
188 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
191 ; AVX512VL-LABEL: var_funnnel_v8i32:
193 ; AVX512VL-NEXT: vprorvd %ymm1, %ymm0, %ymm0
194 ; AVX512VL-NEXT: retq
196 ; AVX512BW-LABEL: var_funnnel_v8i32:
198 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
199 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
200 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
201 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
202 ; AVX512BW-NEXT: retq
204 ; AVX512VLBW-LABEL: var_funnnel_v8i32:
205 ; AVX512VLBW: # %bb.0:
206 ; AVX512VLBW-NEXT: vprorvd %ymm1, %ymm0, %ymm0
207 ; AVX512VLBW-NEXT: retq
209 ; AVX512VBMI2-LABEL: var_funnnel_v8i32:
210 ; AVX512VBMI2: # %bb.0:
211 ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
212 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
213 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
214 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
215 ; AVX512VBMI2-NEXT: retq
217 ; AVX512VLVBMI2-LABEL: var_funnnel_v8i32:
218 ; AVX512VLVBMI2: # %bb.0:
219 ; AVX512VLVBMI2-NEXT: vprorvd %ymm1, %ymm0, %ymm0
220 ; AVX512VLVBMI2-NEXT: retq
222 ; XOPAVX1-LABEL: var_funnnel_v8i32:
224 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
225 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
226 ; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
227 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
228 ; XOPAVX1-NEXT: vprotd %xmm2, %xmm4, %xmm2
229 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm3, %xmm1
230 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
231 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
234 ; XOPAVX2-LABEL: var_funnnel_v8i32:
236 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
237 ; XOPAVX2-NEXT: vpsubd %ymm1, %ymm2, %ymm1
238 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
239 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
240 ; XOPAVX2-NEXT: vprotd %xmm2, %xmm3, %xmm2
241 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0
242 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
244 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> %amt)
248 define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
249 ; AVX1-LABEL: var_funnnel_v16i16:
251 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
252 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
253 ; AVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2
254 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15]
255 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
256 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm2[4,4,5,5,6,6,7,7]
257 ; AVX1-NEXT: vpslld $23, %xmm5, %xmm5
258 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [1065353216,1065353216,1065353216,1065353216]
259 ; AVX1-NEXT: vpaddd %xmm6, %xmm5, %xmm5
260 ; AVX1-NEXT: vcvttps2dq %xmm5, %xmm5
261 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
262 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
263 ; AVX1-NEXT: vpaddd %xmm6, %xmm2, %xmm2
264 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
265 ; AVX1-NEXT: vpackusdw %xmm5, %xmm2, %xmm2
266 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
267 ; AVX1-NEXT: vpmulhuw %xmm2, %xmm5, %xmm7
268 ; AVX1-NEXT: vpmullw %xmm2, %xmm5, %xmm2
269 ; AVX1-NEXT: vpor %xmm7, %xmm2, %xmm2
270 ; AVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1
271 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
272 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4,4,5,5,6,6,7,7]
273 ; AVX1-NEXT: vpslld $23, %xmm3, %xmm3
274 ; AVX1-NEXT: vpaddd %xmm6, %xmm3, %xmm3
275 ; AVX1-NEXT: vcvttps2dq %xmm3, %xmm3
276 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
277 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
278 ; AVX1-NEXT: vpaddd %xmm6, %xmm1, %xmm1
279 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
280 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
281 ; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm3
282 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
283 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
284 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
287 ; AVX2-LABEL: var_funnnel_v16i16:
289 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
290 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
291 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
292 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
293 ; AVX2-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
294 ; AVX2-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm2[1],ymm3[2],ymm2[3],ymm3[4],ymm2[5],ymm3[6],ymm2[7],ymm3[8],ymm2[9],ymm3[10],ymm2[11],ymm3[12],ymm2[13],ymm3[14],ymm2[15]
295 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
296 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
297 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
298 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
299 ; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
302 ; AVX512F-LABEL: var_funnnel_v16i16:
304 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
305 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
306 ; AVX512F-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
307 ; AVX512F-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
308 ; AVX512F-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
309 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm2[1],ymm3[2],ymm2[3],ymm3[4],ymm2[5],ymm3[6],ymm2[7],ymm3[8],ymm2[9],ymm3[10],ymm2[11],ymm3[12],ymm2[13],ymm3[14],ymm2[15]
310 ; AVX512F-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
311 ; AVX512F-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
312 ; AVX512F-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
313 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
314 ; AVX512F-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
317 ; AVX512VL-LABEL: var_funnnel_v16i16:
319 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
320 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
321 ; AVX512VL-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
322 ; AVX512VL-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
323 ; AVX512VL-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
324 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm2[1],ymm3[2],ymm2[3],ymm3[4],ymm2[5],ymm3[6],ymm2[7],ymm3[8],ymm2[9],ymm3[10],ymm2[11],ymm3[12],ymm2[13],ymm3[14],ymm2[15]
325 ; AVX512VL-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
326 ; AVX512VL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
327 ; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
328 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
329 ; AVX512VL-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
330 ; AVX512VL-NEXT: retq
332 ; AVX512BW-LABEL: var_funnnel_v16i16:
334 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
335 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
336 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2
337 ; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
338 ; AVX512BW-NEXT: vpsubw %ymm1, %ymm3, %ymm1
339 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
340 ; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
341 ; AVX512BW-NEXT: retq
343 ; AVX512VLBW-LABEL: var_funnnel_v16i16:
344 ; AVX512VLBW: # %bb.0:
345 ; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
346 ; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2
347 ; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
348 ; AVX512VLBW-NEXT: vpsubw %ymm1, %ymm3, %ymm1
349 ; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
350 ; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
351 ; AVX512VLBW-NEXT: retq
353 ; AVX512VBMI2-LABEL: var_funnnel_v16i16:
354 ; AVX512VBMI2: # %bb.0:
355 ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
356 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
357 ; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
358 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
359 ; AVX512VBMI2-NEXT: retq
361 ; AVX512VLVBMI2-LABEL: var_funnnel_v16i16:
362 ; AVX512VLVBMI2: # %bb.0:
363 ; AVX512VLVBMI2-NEXT: vpshrdvw %ymm1, %ymm0, %ymm0
364 ; AVX512VLVBMI2-NEXT: retq
366 ; XOPAVX1-LABEL: var_funnnel_v16i16:
368 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
369 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
370 ; XOPAVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2
371 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
372 ; XOPAVX1-NEXT: vprotw %xmm2, %xmm4, %xmm2
373 ; XOPAVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1
374 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0
375 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
378 ; XOPAVX2-LABEL: var_funnnel_v16i16:
380 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
381 ; XOPAVX2-NEXT: vpsubw %ymm1, %ymm2, %ymm1
382 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
383 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
384 ; XOPAVX2-NEXT: vprotw %xmm2, %xmm3, %xmm2
385 ; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0
386 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
388 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> %amt)
392 define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
393 ; AVX1-LABEL: var_funnnel_v32i8:
395 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
396 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3
397 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
398 ; AVX1-NEXT: vpandn %xmm3, %xmm4, %xmm3
399 ; AVX1-NEXT: vpsllw $4, %xmm2, %xmm5
400 ; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm5
401 ; AVX1-NEXT: vpor %xmm3, %xmm5, %xmm3
402 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
403 ; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
404 ; AVX1-NEXT: vpsubb %xmm5, %xmm6, %xmm5
405 ; AVX1-NEXT: vpsllw $5, %xmm5, %xmm5
406 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
407 ; AVX1-NEXT: vpsrlw $6, %xmm2, %xmm3
408 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
409 ; AVX1-NEXT: vpandn %xmm3, %xmm7, %xmm3
410 ; AVX1-NEXT: vpsllw $2, %xmm2, %xmm8
411 ; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm8
412 ; AVX1-NEXT: vpor %xmm3, %xmm8, %xmm3
413 ; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5
414 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
415 ; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm3
416 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
417 ; AVX1-NEXT: vpand %xmm3, %xmm8, %xmm3
418 ; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm9
419 ; AVX1-NEXT: vpor %xmm3, %xmm9, %xmm3
420 ; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5
421 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
422 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3
423 ; AVX1-NEXT: vpandn %xmm3, %xmm4, %xmm3
424 ; AVX1-NEXT: vpsllw $4, %xmm0, %xmm5
425 ; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm4
426 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
427 ; AVX1-NEXT: vpsubb %xmm1, %xmm6, %xmm1
428 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1
429 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
430 ; AVX1-NEXT: vpsrlw $6, %xmm0, %xmm3
431 ; AVX1-NEXT: vpandn %xmm3, %xmm7, %xmm3
432 ; AVX1-NEXT: vpsllw $2, %xmm0, %xmm4
433 ; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
434 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
435 ; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1
436 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
437 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm3
438 ; AVX1-NEXT: vpand %xmm3, %xmm8, %xmm3
439 ; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm4
440 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
441 ; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1
442 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
443 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
446 ; AVX2-LABEL: var_funnnel_v32i8:
448 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2
449 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
450 ; AVX2-NEXT: vpsllw $4, %ymm0, %ymm3
451 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
452 ; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
453 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
454 ; AVX2-NEXT: vpsubb %ymm1, %ymm3, %ymm1
455 ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
456 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
457 ; AVX2-NEXT: vpsrlw $6, %ymm0, %ymm2
458 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
459 ; AVX2-NEXT: vpsllw $2, %ymm0, %ymm3
460 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
461 ; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
462 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
463 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
464 ; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm2
465 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
466 ; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm3
467 ; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
468 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
469 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
472 ; AVX512F-LABEL: var_funnnel_v32i8:
474 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2
475 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm3
476 ; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm3
477 ; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1
478 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
479 ; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm2
480 ; AVX512F-NEXT: vpsllw $6, %ymm0, %ymm3
481 ; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm3
482 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
483 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
484 ; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm2
485 ; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm3
486 ; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm3
487 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
488 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
491 ; AVX512VL-LABEL: var_funnnel_v32i8:
493 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
494 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
495 ; AVX512VL-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm3
496 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
497 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
498 ; AVX512VL-NEXT: vpsrlw $2, %ymm0, %ymm2
499 ; AVX512VL-NEXT: vpsllw $6, %ymm0, %ymm3
500 ; AVX512VL-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm3
501 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
502 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
503 ; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm2
504 ; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm3
505 ; AVX512VL-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm3
506 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
507 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
508 ; AVX512VL-NEXT: retq
510 ; AVX512BW-LABEL: var_funnnel_v32i8:
512 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
513 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
514 ; AVX512BW-NEXT: vpxor %xmm3, %xmm3, %xmm3
515 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm3[8],ymm1[9],ymm3[9],ymm1[10],ymm3[10],ymm1[11],ymm3[11],ymm1[12],ymm3[12],ymm1[13],ymm3[13],ymm1[14],ymm3[14],ymm1[15],ymm3[15],ymm1[24],ymm3[24],ymm1[25],ymm3[25],ymm1[26],ymm3[26],ymm1[27],ymm3[27],ymm1[28],ymm3[28],ymm1[29],ymm3[29],ymm1[30],ymm3[30],ymm1[31],ymm3[31]
516 ; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
517 ; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
518 ; AVX512BW-NEXT: vpand %ymm4, %ymm2, %ymm2
519 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
520 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[4],ymm3[4],ymm1[5],ymm3[5],ymm1[6],ymm3[6],ymm1[7],ymm3[7],ymm1[16],ymm3[16],ymm1[17],ymm3[17],ymm1[18],ymm3[18],ymm1[19],ymm3[19],ymm1[20],ymm3[20],ymm1[21],ymm3[21],ymm1[22],ymm3[22],ymm1[23],ymm3[23]
521 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
522 ; AVX512BW-NEXT: vpand %ymm4, %ymm0, %ymm0
523 ; AVX512BW-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
524 ; AVX512BW-NEXT: retq
526 ; AVX512VLBW-LABEL: var_funnnel_v32i8:
527 ; AVX512VLBW: # %bb.0:
528 ; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
529 ; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
530 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
531 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
532 ; AVX512VLBW-NEXT: vpsrlvw %ymm3, %ymm4, %ymm3
533 ; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
534 ; AVX512VLBW-NEXT: vpand %ymm4, %ymm3, %ymm3
535 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
536 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
537 ; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
538 ; AVX512VLBW-NEXT: vpand %ymm4, %ymm0, %ymm0
539 ; AVX512VLBW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
540 ; AVX512VLBW-NEXT: retq
542 ; AVX512VBMI2-LABEL: var_funnnel_v32i8:
543 ; AVX512VBMI2: # %bb.0:
544 ; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
545 ; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
546 ; AVX512VBMI2-NEXT: vpxor %xmm3, %xmm3, %xmm3
547 ; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm3[8],ymm1[9],ymm3[9],ymm1[10],ymm3[10],ymm1[11],ymm3[11],ymm1[12],ymm3[12],ymm1[13],ymm3[13],ymm1[14],ymm3[14],ymm1[15],ymm3[15],ymm1[24],ymm3[24],ymm1[25],ymm3[25],ymm1[26],ymm3[26],ymm1[27],ymm3[27],ymm1[28],ymm3[28],ymm1[29],ymm3[29],ymm1[30],ymm3[30],ymm1[31],ymm3[31]
548 ; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
549 ; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
550 ; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[4],ymm3[4],ymm1[5],ymm3[5],ymm1[6],ymm3[6],ymm1[7],ymm3[7],ymm1[16],ymm3[16],ymm1[17],ymm3[17],ymm1[18],ymm3[18],ymm1[19],ymm3[19],ymm1[20],ymm3[20],ymm1[21],ymm3[21],ymm1[22],ymm3[22],ymm1[23],ymm3[23]
551 ; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1
552 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,64,66,68,70,72,74,76,78,16,18,20,22,24,26,28,30,80,82,84,86,88,90,92,94]
553 ; AVX512VBMI2-NEXT: vpermi2b %zmm2, %zmm1, %zmm0
554 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
555 ; AVX512VBMI2-NEXT: retq
557 ; AVX512VLVBMI2-LABEL: var_funnnel_v32i8:
558 ; AVX512VLVBMI2: # %bb.0:
559 ; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
560 ; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
561 ; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
562 ; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
563 ; AVX512VLVBMI2-NEXT: vpsrlvw %ymm3, %ymm4, %ymm3
564 ; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
565 ; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
566 ; AVX512VLVBMI2-NEXT: vpsrlvw %ymm1, %ymm0, %ymm1
567 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
568 ; AVX512VLVBMI2-NEXT: vpermi2b %ymm3, %ymm1, %ymm0
569 ; AVX512VLVBMI2-NEXT: retq
571 ; XOPAVX1-LABEL: var_funnnel_v32i8:
573 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
574 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
575 ; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2
576 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
577 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm4, %xmm2
578 ; XOPAVX1-NEXT: vpsubb %xmm1, %xmm3, %xmm1
579 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0
580 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
583 ; XOPAVX2-LABEL: var_funnnel_v32i8:
585 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
586 ; XOPAVX2-NEXT: vpsubb %ymm1, %ymm2, %ymm1
587 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
588 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
589 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm3, %xmm2
590 ; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0
591 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
593 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> %amt)
598 ; Uniform Variable Shifts
601 define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
602 ; AVX1-LABEL: splatvar_funnnel_v4i64:
604 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
605 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
606 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
607 ; AVX1-NEXT: vpsrlq %xmm3, %xmm4, %xmm5
608 ; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
609 ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3
610 ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
611 ; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1
612 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
613 ; AVX1-NEXT: vpsllq %xmm1, %xmm4, %xmm2
614 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
615 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
616 ; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0
619 ; AVX2-LABEL: splatvar_funnnel_v4i64:
621 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
622 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
623 ; AVX2-NEXT: vpsrlq %xmm3, %ymm0, %ymm3
624 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
625 ; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
626 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
627 ; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
628 ; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
631 ; AVX512F-LABEL: splatvar_funnnel_v4i64:
633 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
634 ; AVX512F-NEXT: vpbroadcastq %xmm1, %ymm1
635 ; AVX512F-NEXT: vprorvq %zmm1, %zmm0, %zmm0
636 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
639 ; AVX512VL-LABEL: splatvar_funnnel_v4i64:
641 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
642 ; AVX512VL-NEXT: vprorvq %ymm1, %ymm0, %ymm0
643 ; AVX512VL-NEXT: retq
645 ; AVX512BW-LABEL: splatvar_funnnel_v4i64:
647 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
648 ; AVX512BW-NEXT: vpbroadcastq %xmm1, %ymm1
649 ; AVX512BW-NEXT: vprorvq %zmm1, %zmm0, %zmm0
650 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
651 ; AVX512BW-NEXT: retq
653 ; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
654 ; AVX512VLBW: # %bb.0:
655 ; AVX512VLBW-NEXT: vpbroadcastq %xmm1, %ymm1
656 ; AVX512VLBW-NEXT: vprorvq %ymm1, %ymm0, %ymm0
657 ; AVX512VLBW-NEXT: retq
659 ; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64:
660 ; AVX512VBMI2: # %bb.0:
661 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
662 ; AVX512VBMI2-NEXT: vpbroadcastq %xmm1, %ymm1
663 ; AVX512VBMI2-NEXT: vprorvq %zmm1, %zmm0, %zmm0
664 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
665 ; AVX512VBMI2-NEXT: retq
667 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v4i64:
668 ; AVX512VLVBMI2: # %bb.0:
669 ; AVX512VLVBMI2-NEXT: vpbroadcastq %xmm1, %ymm1
670 ; AVX512VLVBMI2-NEXT: vprorvq %ymm1, %ymm0, %ymm0
671 ; AVX512VLVBMI2-NEXT: retq
673 ; XOPAVX1-LABEL: splatvar_funnnel_v4i64:
675 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
676 ; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
677 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
678 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
679 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm2, %xmm2
680 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0
681 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
684 ; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
686 ; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
687 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
688 ; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1
689 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
690 ; XOPAVX2-NEXT: vprotq %xmm1, %xmm2, %xmm2
691 ; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0
692 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
694 %splat = shufflevector <4 x i64> %amt, <4 x i64> undef, <4 x i32> zeroinitializer
695 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> %splat)
699 define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind {
700 ; AVX1-LABEL: splatvar_funnnel_v8i32:
702 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
703 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
704 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,2,3,3]
705 ; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
706 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[2,2,3,3]
707 ; AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm4
708 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
709 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
710 ; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
711 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
712 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
713 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
714 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm3[0,2],ymm0[4,6],ymm3[4,6]
717 ; AVX2-LABEL: splatvar_funnnel_v8i32:
719 ; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm0[2,2,3,3,6,6,7,7]
720 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
721 ; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
722 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
723 ; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
724 ; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
727 ; AVX512F-LABEL: splatvar_funnnel_v8i32:
729 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
730 ; AVX512F-NEXT: vpbroadcastd %xmm1, %ymm1
731 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
732 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
735 ; AVX512VL-LABEL: splatvar_funnnel_v8i32:
737 ; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
738 ; AVX512VL-NEXT: vprorvd %ymm1, %ymm0, %ymm0
739 ; AVX512VL-NEXT: retq
741 ; AVX512BW-LABEL: splatvar_funnnel_v8i32:
743 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
744 ; AVX512BW-NEXT: vpbroadcastd %xmm1, %ymm1
745 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
746 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
747 ; AVX512BW-NEXT: retq
749 ; AVX512VLBW-LABEL: splatvar_funnnel_v8i32:
750 ; AVX512VLBW: # %bb.0:
751 ; AVX512VLBW-NEXT: vpbroadcastd %xmm1, %ymm1
752 ; AVX512VLBW-NEXT: vprorvd %ymm1, %ymm0, %ymm0
753 ; AVX512VLBW-NEXT: retq
755 ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32:
756 ; AVX512VBMI2: # %bb.0:
757 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
758 ; AVX512VBMI2-NEXT: vpbroadcastd %xmm1, %ymm1
759 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
760 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
761 ; AVX512VBMI2-NEXT: retq
763 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i32:
764 ; AVX512VLVBMI2: # %bb.0:
765 ; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm1, %ymm1
766 ; AVX512VLVBMI2-NEXT: vprorvd %ymm1, %ymm0, %ymm0
767 ; AVX512VLVBMI2-NEXT: retq
769 ; XOPAVX1-LABEL: splatvar_funnnel_v8i32:
771 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
772 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
773 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
774 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
775 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm2, %xmm2
776 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
777 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
780 ; XOPAVX2-LABEL: splatvar_funnnel_v8i32:
782 ; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
783 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
784 ; XOPAVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1
785 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
786 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm2, %xmm2
787 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0
788 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
790 %splat = shufflevector <8 x i32> %amt, <8 x i32> undef, <8 x i32> zeroinitializer
791 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> %splat)
795 define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
796 ; AVX1-LABEL: splatvar_funnnel_v16i16:
798 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
799 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
800 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
801 ; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm5
802 ; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm1
803 ; AVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm2
804 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
805 ; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
806 ; AVX1-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
807 ; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0
808 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
809 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
810 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
813 ; AVX2-LABEL: splatvar_funnnel_v16i16:
815 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
816 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
817 ; AVX2-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
818 ; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm1
819 ; AVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0
820 ; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
821 ; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
824 ; AVX512F-LABEL: splatvar_funnnel_v16i16:
826 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
827 ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
828 ; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
829 ; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1
830 ; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0
831 ; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
832 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
835 ; AVX512VL-LABEL: splatvar_funnnel_v16i16:
837 ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
838 ; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
839 ; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
840 ; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1
841 ; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0
842 ; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
843 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
844 ; AVX512VL-NEXT: retq
846 ; AVX512BW-LABEL: splatvar_funnnel_v16i16:
848 ; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
849 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
850 ; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
851 ; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
852 ; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
853 ; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
854 ; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
855 ; AVX512BW-NEXT: retq
857 ; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
858 ; AVX512VLBW: # %bb.0:
859 ; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
860 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
861 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
862 ; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
863 ; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
864 ; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
865 ; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
866 ; AVX512VLBW-NEXT: retq
868 ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
869 ; AVX512VBMI2: # %bb.0:
870 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
871 ; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
872 ; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
873 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
874 ; AVX512VBMI2-NEXT: retq
876 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16:
877 ; AVX512VLVBMI2: # %bb.0:
878 ; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
879 ; AVX512VLVBMI2-NEXT: vpshrdvw %ymm1, %ymm0, %ymm0
880 ; AVX512VLVBMI2-NEXT: retq
882 ; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
884 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
885 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
886 ; XOPAVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
887 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
888 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
889 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm2, %xmm2
890 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0
891 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
894 ; XOPAVX2-LABEL: splatvar_funnnel_v16i16:
896 ; XOPAVX2-NEXT: vpbroadcastw %xmm1, %xmm1
897 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
898 ; XOPAVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
899 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
900 ; XOPAVX2-NEXT: vprotw %xmm1, %xmm2, %xmm2
901 ; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0
902 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
904 %splat = shufflevector <16 x i16> %amt, <16 x i16> undef, <16 x i32> zeroinitializer
905 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> %splat)
909 define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
910 ; AVX1-LABEL: splatvar_funnnel_v32i8:
912 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
913 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
914 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
915 ; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
916 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
917 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
918 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
919 ; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
920 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
921 ; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
922 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
923 ; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
924 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
925 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
926 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
927 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
928 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
929 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
932 ; AVX2-LABEL: splatvar_funnnel_v32i8:
934 ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
935 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
936 ; AVX2-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
937 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
938 ; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
939 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
940 ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
941 ; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
942 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
945 ; AVX512F-LABEL: splatvar_funnnel_v32i8:
947 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
948 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
949 ; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
950 ; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
951 ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
952 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
953 ; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
954 ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
955 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
958 ; AVX512VL-LABEL: splatvar_funnnel_v32i8:
960 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
961 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
962 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
963 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
964 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
965 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
966 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
967 ; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
968 ; AVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
969 ; AVX512VL-NEXT: retq
971 ; AVX512BW-LABEL: splatvar_funnnel_v32i8:
973 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
974 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
975 ; AVX512BW-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
976 ; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
977 ; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm2
978 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
979 ; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
980 ; AVX512BW-NEXT: vpand %ymm3, %ymm0, %ymm0
981 ; AVX512BW-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
982 ; AVX512BW-NEXT: retq
984 ; AVX512VLBW-LABEL: splatvar_funnnel_v32i8:
985 ; AVX512VLBW: # %bb.0:
986 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
987 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
988 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
989 ; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
990 ; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm2
991 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
992 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
993 ; AVX512VLBW-NEXT: vpand %ymm3, %ymm0, %ymm0
994 ; AVX512VLBW-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
995 ; AVX512VLBW-NEXT: retq
997 ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
998 ; AVX512VBMI2: # %bb.0:
999 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,64,66,68,70,72,74,76,78,16,18,20,22,24,26,28,30,80,82,84,86,88,90,92,94]
1000 ; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1001 ; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1002 ; AVX512VBMI2-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
1003 ; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1004 ; AVX512VBMI2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
1005 ; AVX512VBMI2-NEXT: vpermt2b %zmm3, %zmm2, %zmm0
1006 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1007 ; AVX512VBMI2-NEXT: retq
1009 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
1010 ; AVX512VLVBMI2: # %bb.0:
1011 ; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1012 ; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1013 ; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
1014 ; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1015 ; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %ymm0, %ymm1
1016 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
1017 ; AVX512VLVBMI2-NEXT: vpermi2b %ymm2, %ymm1, %ymm0
1018 ; AVX512VLVBMI2-NEXT: retq
1020 ; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
1022 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1023 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1024 ; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
1025 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1026 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm2, %xmm2
1027 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0
1028 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1029 ; XOPAVX1-NEXT: retq
1031 ; XOPAVX2-LABEL: splatvar_funnnel_v32i8:
1033 ; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
1034 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1035 ; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
1036 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1037 ; XOPAVX2-NEXT: vprotb %xmm1, %xmm2, %xmm2
1038 ; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0
1039 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1040 ; XOPAVX2-NEXT: retq
1041 %splat = shufflevector <32 x i8> %amt, <32 x i8> undef, <32 x i32> zeroinitializer
1042 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> %splat)
1050 define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x) nounwind {
1051 ; AVX1-LABEL: constant_funnnel_v4i64:
1053 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1054 ; AVX1-NEXT: vpsllq $4, %xmm1, %xmm2
1055 ; AVX1-NEXT: vpsllq $14, %xmm1, %xmm3
1056 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1057 ; AVX1-NEXT: vpsllq $50, %xmm0, %xmm3
1058 ; AVX1-NEXT: vpsllq $60, %xmm0, %xmm4
1059 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
1060 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1061 ; AVX1-NEXT: vpsrlq $60, %xmm1, %xmm3
1062 ; AVX1-NEXT: vpsrlq $50, %xmm1, %xmm1
1063 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
1064 ; AVX1-NEXT: vpsrlq $14, %xmm0, %xmm3
1065 ; AVX1-NEXT: vpsrlq $4, %xmm0, %xmm0
1066 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
1067 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1068 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
1071 ; AVX2-LABEL: constant_funnnel_v4i64:
1073 ; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1074 ; AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1075 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1078 ; AVX512F-LABEL: constant_funnnel_v4i64:
1080 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1081 ; AVX512F-NEXT: vpmovsxbq {{.*#+}} ymm1 = [4,14,50,60]
1082 ; AVX512F-NEXT: vprorvq %zmm1, %zmm0, %zmm0
1083 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1084 ; AVX512F-NEXT: retq
1086 ; AVX512VL-LABEL: constant_funnnel_v4i64:
1087 ; AVX512VL: # %bb.0:
1088 ; AVX512VL-NEXT: vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1089 ; AVX512VL-NEXT: retq
1091 ; AVX512BW-LABEL: constant_funnnel_v4i64:
1092 ; AVX512BW: # %bb.0:
1093 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1094 ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} ymm1 = [4,14,50,60]
1095 ; AVX512BW-NEXT: vprorvq %zmm1, %zmm0, %zmm0
1096 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1097 ; AVX512BW-NEXT: retq
1099 ; AVX512VLBW-LABEL: constant_funnnel_v4i64:
1100 ; AVX512VLBW: # %bb.0:
1101 ; AVX512VLBW-NEXT: vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1102 ; AVX512VLBW-NEXT: retq
1104 ; AVX512VBMI2-LABEL: constant_funnnel_v4i64:
1105 ; AVX512VBMI2: # %bb.0:
1106 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1107 ; AVX512VBMI2-NEXT: vpmovsxbq {{.*#+}} ymm1 = [4,14,50,60]
1108 ; AVX512VBMI2-NEXT: vprorvq %zmm1, %zmm0, %zmm0
1109 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1110 ; AVX512VBMI2-NEXT: retq
1112 ; AVX512VLVBMI2-LABEL: constant_funnnel_v4i64:
1113 ; AVX512VLVBMI2: # %bb.0:
1114 ; AVX512VLVBMI2-NEXT: vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1115 ; AVX512VLVBMI2-NEXT: retq
1117 ; XOPAVX1-LABEL: constant_funnnel_v4i64:
1119 ; XOPAVX1-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1120 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1121 ; XOPAVX1-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1122 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1123 ; XOPAVX1-NEXT: retq
1125 ; XOPAVX2-LABEL: constant_funnnel_v4i64:
1127 ; XOPAVX2-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1128 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1129 ; XOPAVX2-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1130 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1131 ; XOPAVX2-NEXT: retq
1132 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> <i64 4, i64 14, i64 50, i64 60>)
1136 define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x) nounwind {
1137 ; AVX1-LABEL: constant_funnnel_v8i32:
1139 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1140 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1141 ; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1142 ; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1143 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
1144 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
1145 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
1146 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1147 ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
1148 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1149 ; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1150 ; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1151 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1152 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
1153 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
1154 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
1155 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
1156 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1159 ; AVX2-LABEL: constant_funnnel_v8i32:
1161 ; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1162 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1163 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1166 ; AVX512F-LABEL: constant_funnnel_v8i32:
1168 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1169 ; AVX512F-NEXT: vpmovsxbd {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11]
1170 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
1171 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1172 ; AVX512F-NEXT: retq
1174 ; AVX512VL-LABEL: constant_funnnel_v8i32:
1175 ; AVX512VL: # %bb.0:
1176 ; AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1177 ; AVX512VL-NEXT: retq
1179 ; AVX512BW-LABEL: constant_funnnel_v8i32:
1180 ; AVX512BW: # %bb.0:
1181 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1182 ; AVX512BW-NEXT: vpmovsxbd {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11]
1183 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
1184 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1185 ; AVX512BW-NEXT: retq
1187 ; AVX512VLBW-LABEL: constant_funnnel_v8i32:
1188 ; AVX512VLBW: # %bb.0:
1189 ; AVX512VLBW-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1190 ; AVX512VLBW-NEXT: retq
1192 ; AVX512VBMI2-LABEL: constant_funnnel_v8i32:
1193 ; AVX512VBMI2: # %bb.0:
1194 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1195 ; AVX512VBMI2-NEXT: vpmovsxbd {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11]
1196 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
1197 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1198 ; AVX512VBMI2-NEXT: retq
1200 ; AVX512VLVBMI2-LABEL: constant_funnnel_v8i32:
1201 ; AVX512VLVBMI2: # %bb.0:
1202 ; AVX512VLVBMI2-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1203 ; AVX512VLVBMI2-NEXT: retq
1205 ; XOPAVX1-LABEL: constant_funnnel_v8i32:
1207 ; XOPAVX1-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1208 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1209 ; XOPAVX1-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1210 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1211 ; XOPAVX1-NEXT: retq
1213 ; XOPAVX2-LABEL: constant_funnnel_v8i32:
1215 ; XOPAVX2-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1216 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1217 ; XOPAVX2-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1218 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1219 ; XOPAVX2-NEXT: retq
1220 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>)
1224 define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x) nounwind {
1225 ; AVX1-LABEL: constant_funnnel_v16i16:
1227 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1228 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [256,128,64,32,16,8,4,2]
1229 ; AVX1-NEXT: vpmulhuw %xmm2, %xmm1, %xmm3
1230 ; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1
1231 ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
1232 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,32768,16384,8192,4096,2048,1024,512]
1233 ; AVX1-NEXT: vpmulhuw %xmm2, %xmm0, %xmm3
1234 ; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0
1235 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
1236 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1239 ; AVX2-LABEL: constant_funnnel_v16i16:
1241 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
1242 ; AVX2-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
1243 ; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
1244 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
1247 ; AVX512F-LABEL: constant_funnnel_v16i16:
1249 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
1250 ; AVX512F-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
1251 ; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0
1252 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
1253 ; AVX512F-NEXT: retq
1255 ; AVX512VL-LABEL: constant_funnnel_v16i16:
1256 ; AVX512VL: # %bb.0:
1257 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
1258 ; AVX512VL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
1259 ; AVX512VL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
1260 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
1261 ; AVX512VL-NEXT: retq
1263 ; AVX512BW-LABEL: constant_funnnel_v16i16:
1264 ; AVX512BW: # %bb.0:
1265 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1266 ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
1267 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1
1268 ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
1269 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
1270 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
1271 ; AVX512BW-NEXT: retq
1273 ; AVX512VLBW-LABEL: constant_funnnel_v16i16:
1274 ; AVX512VLBW: # %bb.0:
1275 ; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1276 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1277 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
1278 ; AVX512VLBW-NEXT: retq
1280 ; AVX512VBMI2-LABEL: constant_funnnel_v16i16:
1281 ; AVX512VBMI2: # %bb.0:
1282 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1283 ; AVX512VBMI2-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
1284 ; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
1285 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1286 ; AVX512VBMI2-NEXT: retq
1288 ; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16:
1289 ; AVX512VLVBMI2: # %bb.0:
1290 ; AVX512VLVBMI2-NEXT: vpshrdvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1291 ; AVX512VLVBMI2-NEXT: retq
1293 ; XOPAVX1-LABEL: constant_funnnel_v16i16:
1295 ; XOPAVX1-NEXT: vprotw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1296 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1297 ; XOPAVX1-NEXT: vprotw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1298 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1299 ; XOPAVX1-NEXT: retq
1301 ; XOPAVX2-LABEL: constant_funnnel_v16i16:
1303 ; XOPAVX2-NEXT: vprotw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1304 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1305 ; XOPAVX2-NEXT: vprotw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1306 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1307 ; XOPAVX2-NEXT: retq
1308 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
1312 define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind {
1313 ; AVX1-LABEL: constant_funnnel_v32i8:
1315 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1316 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1317 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128]
1318 ; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
1319 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
1320 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1321 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = [1,128,64,32,16,8,4,2]
1322 ; AVX1-NEXT: vpmullw %xmm4, %xmm1, %xmm1
1323 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
1324 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
1325 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1326 ; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
1327 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
1328 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1329 ; AVX1-NEXT: vpmullw %xmm4, %xmm0, %xmm0
1330 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
1331 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
1332 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1335 ; AVX2-LABEL: constant_funnnel_v32i8:
1337 ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1338 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
1339 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
1340 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1341 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2]
1342 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
1343 ; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1346 ; AVX512F-LABEL: constant_funnnel_v32i8:
1348 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1349 ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
1350 ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
1351 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1352 ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2]
1353 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
1354 ; AVX512F-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1355 ; AVX512F-NEXT: retq
1357 ; AVX512VL-LABEL: constant_funnnel_v32i8:
1358 ; AVX512VL: # %bb.0:
1359 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1360 ; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
1361 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
1362 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1363 ; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2]
1364 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
1365 ; AVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1366 ; AVX512VL-NEXT: retq
1368 ; AVX512BW-LABEL: constant_funnnel_v32i8:
1369 ; AVX512BW: # %bb.0:
1370 ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,1,0,2,0,3,0,4,0,5,0,6,0,7,0,0,0,1,0,2,0,3,0,4,0,5,0,6,0,7,0]
1371 ; AVX512BW-NEXT: # ymm1 = mem[0,1,0,1]
1372 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1373 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm2, %zmm1
1374 ; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1
1375 ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,0,7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0,3,0,2,0,1,0]
1376 ; AVX512BW-NEXT: # ymm2 = mem[0,1,0,1]
1377 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1378 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
1379 ; AVX512BW-NEXT: vpsrlw $8, %ymm0, %ymm0
1380 ; AVX512BW-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1381 ; AVX512BW-NEXT: retq
1383 ; AVX512VLBW-LABEL: constant_funnnel_v32i8:
1384 ; AVX512VLBW: # %bb.0:
1385 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1386 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1387 ; AVX512VLBW-NEXT: vpsrlw $8, %ymm1, %ymm1
1388 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1389 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1390 ; AVX512VLBW-NEXT: vpsrlw $8, %ymm0, %ymm0
1391 ; AVX512VLBW-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1392 ; AVX512VLBW-NEXT: retq
1394 ; AVX512VBMI2-LABEL: constant_funnnel_v32i8:
1395 ; AVX512VBMI2: # %bb.0:
1396 ; AVX512VBMI2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,1,0,2,0,3,0,4,0,5,0,6,0,7,0,0,0,1,0,2,0,3,0,4,0,5,0,6,0,7,0]
1397 ; AVX512VBMI2-NEXT: # ymm1 = mem[0,1,0,1]
1398 ; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1399 ; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm2, %zmm1
1400 ; AVX512VBMI2-NEXT: vpsrlw $8, %ymm1, %ymm1
1401 ; AVX512VBMI2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,0,7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0,3,0,2,0,1,0]
1402 ; AVX512VBMI2-NEXT: # ymm2 = mem[0,1,0,1]
1403 ; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1404 ; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
1405 ; AVX512VBMI2-NEXT: vpsrlw $8, %ymm0, %ymm0
1406 ; AVX512VBMI2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1407 ; AVX512VBMI2-NEXT: retq
1409 ; AVX512VLVBMI2-LABEL: constant_funnnel_v32i8:
1410 ; AVX512VLVBMI2: # %bb.0:
1411 ; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1412 ; AVX512VLVBMI2-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1413 ; AVX512VLVBMI2-NEXT: vpsrlw $8, %ymm1, %ymm1
1414 ; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1415 ; AVX512VLVBMI2-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1416 ; AVX512VLVBMI2-NEXT: vpsrlw $8, %ymm0, %ymm0
1417 ; AVX512VLVBMI2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1418 ; AVX512VLVBMI2-NEXT: retq
1420 ; XOPAVX1-LABEL: constant_funnnel_v32i8:
1422 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1423 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
1424 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm1, %xmm1
1425 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm0, %xmm0
1426 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1427 ; XOPAVX1-NEXT: retq
1429 ; XOPAVX2-LABEL: constant_funnnel_v32i8:
1431 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1432 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
1433 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm1, %xmm1
1434 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm0, %xmm0
1435 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1436 ; XOPAVX2-NEXT: retq
1437 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
1442 ; Uniform Constant Shifts
1445 define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x) nounwind {
1446 ; AVX1-LABEL: splatconstant_funnnel_v4i64:
1448 ; AVX1-NEXT: vpsllq $50, %xmm0, %xmm1
1449 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1450 ; AVX1-NEXT: vpsllq $50, %xmm2, %xmm3
1451 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
1452 ; AVX1-NEXT: vpsrlq $14, %xmm0, %xmm0
1453 ; AVX1-NEXT: vpsrlq $14, %xmm2, %xmm2
1454 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1455 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1458 ; AVX2-LABEL: splatconstant_funnnel_v4i64:
1460 ; AVX2-NEXT: vpsllq $50, %ymm0, %ymm1
1461 ; AVX2-NEXT: vpsrlq $14, %ymm0, %ymm0
1462 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1465 ; AVX512F-LABEL: splatconstant_funnnel_v4i64:
1467 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1468 ; AVX512F-NEXT: vprorq $14, %zmm0, %zmm0
1469 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1470 ; AVX512F-NEXT: retq
1472 ; AVX512VL-LABEL: splatconstant_funnnel_v4i64:
1473 ; AVX512VL: # %bb.0:
1474 ; AVX512VL-NEXT: vprorq $14, %ymm0, %ymm0
1475 ; AVX512VL-NEXT: retq
1477 ; AVX512BW-LABEL: splatconstant_funnnel_v4i64:
1478 ; AVX512BW: # %bb.0:
1479 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1480 ; AVX512BW-NEXT: vprorq $14, %zmm0, %zmm0
1481 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1482 ; AVX512BW-NEXT: retq
1484 ; AVX512VLBW-LABEL: splatconstant_funnnel_v4i64:
1485 ; AVX512VLBW: # %bb.0:
1486 ; AVX512VLBW-NEXT: vprorq $14, %ymm0, %ymm0
1487 ; AVX512VLBW-NEXT: retq
1489 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i64:
1490 ; AVX512VBMI2: # %bb.0:
1491 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1492 ; AVX512VBMI2-NEXT: vprorq $14, %zmm0, %zmm0
1493 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1494 ; AVX512VBMI2-NEXT: retq
1496 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i64:
1497 ; AVX512VLVBMI2: # %bb.0:
1498 ; AVX512VLVBMI2-NEXT: vprorq $14, %ymm0, %ymm0
1499 ; AVX512VLVBMI2-NEXT: retq
1501 ; XOPAVX1-LABEL: splatconstant_funnnel_v4i64:
1503 ; XOPAVX1-NEXT: vprotq $50, %xmm0, %xmm1
1504 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1505 ; XOPAVX1-NEXT: vprotq $50, %xmm0, %xmm0
1506 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1507 ; XOPAVX1-NEXT: retq
1509 ; XOPAVX2-LABEL: splatconstant_funnnel_v4i64:
1511 ; XOPAVX2-NEXT: vprotq $50, %xmm0, %xmm1
1512 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1513 ; XOPAVX2-NEXT: vprotq $50, %xmm0, %xmm0
1514 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1515 ; XOPAVX2-NEXT: retq
1516 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> <i64 14, i64 14, i64 14, i64 14>)
1520 define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x) nounwind {
1521 ; AVX1-LABEL: splatconstant_funnnel_v8i32:
1523 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1524 ; AVX1-NEXT: vpsrld $4, %xmm1, %xmm2
1525 ; AVX1-NEXT: vpslld $28, %xmm1, %xmm1
1526 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1527 ; AVX1-NEXT: vpsrld $4, %xmm0, %xmm2
1528 ; AVX1-NEXT: vpslld $28, %xmm0, %xmm0
1529 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1530 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1533 ; AVX2-LABEL: splatconstant_funnnel_v8i32:
1535 ; AVX2-NEXT: vpsrld $4, %ymm0, %ymm1
1536 ; AVX2-NEXT: vpslld $28, %ymm0, %ymm0
1537 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1540 ; AVX512F-LABEL: splatconstant_funnnel_v8i32:
1542 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1543 ; AVX512F-NEXT: vprord $4, %zmm0, %zmm0
1544 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1545 ; AVX512F-NEXT: retq
1547 ; AVX512VL-LABEL: splatconstant_funnnel_v8i32:
1548 ; AVX512VL: # %bb.0:
1549 ; AVX512VL-NEXT: vprord $4, %ymm0, %ymm0
1550 ; AVX512VL-NEXT: retq
1552 ; AVX512BW-LABEL: splatconstant_funnnel_v8i32:
1553 ; AVX512BW: # %bb.0:
1554 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1555 ; AVX512BW-NEXT: vprord $4, %zmm0, %zmm0
1556 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1557 ; AVX512BW-NEXT: retq
1559 ; AVX512VLBW-LABEL: splatconstant_funnnel_v8i32:
1560 ; AVX512VLBW: # %bb.0:
1561 ; AVX512VLBW-NEXT: vprord $4, %ymm0, %ymm0
1562 ; AVX512VLBW-NEXT: retq
1564 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i32:
1565 ; AVX512VBMI2: # %bb.0:
1566 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1567 ; AVX512VBMI2-NEXT: vprord $4, %zmm0, %zmm0
1568 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1569 ; AVX512VBMI2-NEXT: retq
1571 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i32:
1572 ; AVX512VLVBMI2: # %bb.0:
1573 ; AVX512VLVBMI2-NEXT: vprord $4, %ymm0, %ymm0
1574 ; AVX512VLVBMI2-NEXT: retq
1576 ; XOPAVX1-LABEL: splatconstant_funnnel_v8i32:
1578 ; XOPAVX1-NEXT: vprotd $28, %xmm0, %xmm1
1579 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1580 ; XOPAVX1-NEXT: vprotd $28, %xmm0, %xmm0
1581 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1582 ; XOPAVX1-NEXT: retq
1584 ; XOPAVX2-LABEL: splatconstant_funnnel_v8i32:
1586 ; XOPAVX2-NEXT: vprotd $28, %xmm0, %xmm1
1587 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1588 ; XOPAVX2-NEXT: vprotd $28, %xmm0, %xmm0
1589 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1590 ; XOPAVX2-NEXT: retq
1591 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>)
1595 define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind {
1596 ; AVX1-LABEL: splatconstant_funnnel_v16i16:
1598 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1599 ; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm2
1600 ; AVX1-NEXT: vpsllw $9, %xmm1, %xmm1
1601 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1602 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm2
1603 ; AVX1-NEXT: vpsllw $9, %xmm0, %xmm0
1604 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1605 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1608 ; AVX2-LABEL: splatconstant_funnnel_v16i16:
1610 ; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm1
1611 ; AVX2-NEXT: vpsllw $9, %ymm0, %ymm0
1612 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1615 ; AVX512F-LABEL: splatconstant_funnnel_v16i16:
1617 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm1
1618 ; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm0
1619 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
1620 ; AVX512F-NEXT: retq
1622 ; AVX512VL-LABEL: splatconstant_funnnel_v16i16:
1623 ; AVX512VL: # %bb.0:
1624 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm1
1625 ; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm0
1626 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
1627 ; AVX512VL-NEXT: retq
1629 ; AVX512BW-LABEL: splatconstant_funnnel_v16i16:
1630 ; AVX512BW: # %bb.0:
1631 ; AVX512BW-NEXT: vpsrlw $7, %ymm0, %ymm1
1632 ; AVX512BW-NEXT: vpsllw $9, %ymm0, %ymm0
1633 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
1634 ; AVX512BW-NEXT: retq
1636 ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16:
1637 ; AVX512VLBW: # %bb.0:
1638 ; AVX512VLBW-NEXT: vpsrlw $7, %ymm0, %ymm1
1639 ; AVX512VLBW-NEXT: vpsllw $9, %ymm0, %ymm0
1640 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
1641 ; AVX512VLBW-NEXT: retq
1643 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16:
1644 ; AVX512VBMI2: # %bb.0:
1645 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1646 ; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm0, %zmm0
1647 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1648 ; AVX512VBMI2-NEXT: retq
1650 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16:
1651 ; AVX512VLVBMI2: # %bb.0:
1652 ; AVX512VLVBMI2-NEXT: vpshrdw $7, %ymm0, %ymm0, %ymm0
1653 ; AVX512VLVBMI2-NEXT: retq
1655 ; XOPAVX1-LABEL: splatconstant_funnnel_v16i16:
1657 ; XOPAVX1-NEXT: vprotw $9, %xmm0, %xmm1
1658 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1659 ; XOPAVX1-NEXT: vprotw $9, %xmm0, %xmm0
1660 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1661 ; XOPAVX1-NEXT: retq
1663 ; XOPAVX2-LABEL: splatconstant_funnnel_v16i16:
1665 ; XOPAVX2-NEXT: vprotw $9, %xmm0, %xmm1
1666 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1667 ; XOPAVX2-NEXT: vprotw $9, %xmm0, %xmm0
1668 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1669 ; XOPAVX2-NEXT: retq
1670 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1674 define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
1675 ; AVX1-LABEL: splatconstant_funnnel_v32i8:
1677 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1678 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
1679 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1680 ; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
1681 ; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
1682 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
1683 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1684 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2
1685 ; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
1686 ; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0
1687 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
1688 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1689 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1692 ; AVX2-LABEL: splatconstant_funnnel_v32i8:
1694 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm1
1695 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1696 ; AVX2-NEXT: vpsllw $4, %ymm0, %ymm0
1697 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1698 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1701 ; AVX512F-LABEL: splatconstant_funnnel_v32i8:
1703 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
1704 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
1705 ; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
1706 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1707 ; AVX512F-NEXT: retq
1709 ; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
1710 ; AVX512VL: # %bb.0:
1711 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
1712 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
1713 ; AVX512VL-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
1714 ; AVX512VL-NEXT: retq
1716 ; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
1717 ; AVX512BW: # %bb.0:
1718 ; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1
1719 ; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0
1720 ; AVX512BW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
1721 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1722 ; AVX512BW-NEXT: retq
1724 ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
1725 ; AVX512VLBW: # %bb.0:
1726 ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1
1727 ; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0
1728 ; AVX512VLBW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
1729 ; AVX512VLBW-NEXT: retq
1731 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8:
1732 ; AVX512VBMI2: # %bb.0:
1733 ; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1
1734 ; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
1735 ; AVX512VBMI2-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
1736 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1737 ; AVX512VBMI2-NEXT: retq
1739 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
1740 ; AVX512VLVBMI2: # %bb.0:
1741 ; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm1
1742 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
1743 ; AVX512VLVBMI2-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
1744 ; AVX512VLVBMI2-NEXT: retq
1746 ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
1748 ; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm1
1749 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1750 ; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm0
1751 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1752 ; XOPAVX1-NEXT: retq
1754 ; XOPAVX2-LABEL: splatconstant_funnnel_v32i8:
1756 ; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm1
1757 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1758 ; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm0
1759 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1760 ; XOPAVX2-NEXT: retq
1761 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)