1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512VL
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512VLBW
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefix=AVX512VBMI2
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefix=AVX512VLVBMI2
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=XOPAVX1
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=XOPAVX2
13 declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
14 declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
15 declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
16 declare <32 x i8> @llvm.fshr.v32i8(<32 x i8>, <32 x i8>, <32 x i8>)
22 define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
23 ; AVX1-LABEL: var_funnnel_v4i64:
25 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
26 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm3
27 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
28 ; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm5
29 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
30 ; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm4
31 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
32 ; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm5
33 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
34 ; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
35 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7]
36 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
37 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
38 ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
39 ; AVX1-NEXT: vpsubq %xmm4, %xmm5, %xmm4
40 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63]
41 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
42 ; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm7
43 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
44 ; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm2
45 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm7[0,1,2,3],xmm2[4,5,6,7]
46 ; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1
47 ; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
48 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm4
49 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
50 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
51 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
52 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
53 ; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0
56 ; AVX2-LABEL: var_funnnel_v4i64:
58 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
59 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3
60 ; AVX2-NEXT: vpsrlvq %ymm3, %ymm0, %ymm3
61 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
62 ; AVX2-NEXT: vpsubq %ymm1, %ymm4, %ymm1
63 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
64 ; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
65 ; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
68 ; AVX512F-LABEL: var_funnnel_v4i64:
70 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
71 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
72 ; AVX512F-NEXT: vprorvq %zmm1, %zmm0, %zmm0
73 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
76 ; AVX512VL-LABEL: var_funnnel_v4i64:
78 ; AVX512VL-NEXT: vprorvq %ymm1, %ymm0, %ymm0
81 ; AVX512BW-LABEL: var_funnnel_v4i64:
83 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
84 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
85 ; AVX512BW-NEXT: vprorvq %zmm1, %zmm0, %zmm0
86 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
89 ; AVX512VLBW-LABEL: var_funnnel_v4i64:
90 ; AVX512VLBW: # %bb.0:
91 ; AVX512VLBW-NEXT: vprorvq %ymm1, %ymm0, %ymm0
92 ; AVX512VLBW-NEXT: retq
94 ; AVX512VBMI2-LABEL: var_funnnel_v4i64:
95 ; AVX512VBMI2: # %bb.0:
96 ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
97 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
98 ; AVX512VBMI2-NEXT: vprorvq %zmm1, %zmm0, %zmm0
99 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
100 ; AVX512VBMI2-NEXT: retq
102 ; AVX512VLVBMI2-LABEL: var_funnnel_v4i64:
103 ; AVX512VLVBMI2: # %bb.0:
104 ; AVX512VLVBMI2-NEXT: vprorvq %ymm1, %ymm0, %ymm0
105 ; AVX512VLVBMI2-NEXT: retq
107 ; XOPAVX1-LABEL: var_funnnel_v4i64:
109 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
110 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
111 ; XOPAVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
112 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
113 ; XOPAVX1-NEXT: vprotq %xmm2, %xmm4, %xmm2
114 ; XOPAVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1
115 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0
116 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
119 ; XOPAVX2-LABEL: var_funnnel_v4i64:
121 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
122 ; XOPAVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm1
123 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
124 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
125 ; XOPAVX2-NEXT: vprotq %xmm2, %xmm3, %xmm2
126 ; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0
127 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
129 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> %amt)
133 define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind {
134 ; AVX1-LABEL: var_funnnel_v8i32:
136 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
137 ; AVX1-NEXT: vpxor %xmm8, %xmm8, %xmm8
138 ; AVX1-NEXT: vpsubd %xmm2, %xmm8, %xmm2
139 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [31,31,31,31]
140 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
141 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
142 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1065353216,1065353216,1065353216,1065353216]
143 ; AVX1-NEXT: vpaddd %xmm5, %xmm2, %xmm2
144 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
145 ; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
146 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
147 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
148 ; AVX1-NEXT: vpmuludq %xmm6, %xmm3, %xmm3
149 ; AVX1-NEXT: vpmuludq %xmm2, %xmm7, %xmm2
150 ; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
151 ; AVX1-NEXT: vpblendw {{.*#+}} xmm6 = xmm6[0,1],xmm3[2,3],xmm6[4,5],xmm3[6,7]
152 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
153 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
154 ; AVX1-NEXT: vpor %xmm6, %xmm2, %xmm2
155 ; AVX1-NEXT: vpsubd %xmm1, %xmm8, %xmm1
156 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
157 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
158 ; AVX1-NEXT: vpaddd %xmm5, %xmm1, %xmm1
159 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
160 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
161 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
162 ; AVX1-NEXT: vpmuludq %xmm3, %xmm4, %xmm3
163 ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
164 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
165 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
166 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
167 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
168 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
169 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
172 ; AVX2-LABEL: var_funnnel_v8i32:
174 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
175 ; AVX2-NEXT: vpsubd %ymm1, %ymm2, %ymm1
176 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31]
177 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
178 ; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm2
179 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32]
180 ; AVX2-NEXT: vpsubd %ymm1, %ymm3, %ymm1
181 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
182 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
185 ; AVX512F-LABEL: var_funnnel_v8i32:
187 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
188 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
189 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
190 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
193 ; AVX512VL-LABEL: var_funnnel_v8i32:
195 ; AVX512VL-NEXT: vprorvd %ymm1, %ymm0, %ymm0
196 ; AVX512VL-NEXT: retq
198 ; AVX512BW-LABEL: var_funnnel_v8i32:
200 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
201 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
202 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
203 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
204 ; AVX512BW-NEXT: retq
206 ; AVX512VLBW-LABEL: var_funnnel_v8i32:
207 ; AVX512VLBW: # %bb.0:
208 ; AVX512VLBW-NEXT: vprorvd %ymm1, %ymm0, %ymm0
209 ; AVX512VLBW-NEXT: retq
211 ; AVX512VBMI2-LABEL: var_funnnel_v8i32:
212 ; AVX512VBMI2: # %bb.0:
213 ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
214 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
215 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
216 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
217 ; AVX512VBMI2-NEXT: retq
219 ; AVX512VLVBMI2-LABEL: var_funnnel_v8i32:
220 ; AVX512VLVBMI2: # %bb.0:
221 ; AVX512VLVBMI2-NEXT: vprorvd %ymm1, %ymm0, %ymm0
222 ; AVX512VLVBMI2-NEXT: retq
224 ; XOPAVX1-LABEL: var_funnnel_v8i32:
226 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
227 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
228 ; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
229 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
230 ; XOPAVX1-NEXT: vprotd %xmm2, %xmm4, %xmm2
231 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm3, %xmm1
232 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
233 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
236 ; XOPAVX2-LABEL: var_funnnel_v8i32:
238 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
239 ; XOPAVX2-NEXT: vpsubd %ymm1, %ymm2, %ymm1
240 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
241 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
242 ; XOPAVX2-NEXT: vprotd %xmm2, %xmm3, %xmm2
243 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0
244 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
246 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> %amt)
250 define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
251 ; AVX1-LABEL: var_funnnel_v16i16:
253 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
254 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
255 ; AVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2
256 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15]
257 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
258 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm2[4,4,5,5,6,6,7,7]
259 ; AVX1-NEXT: vpslld $23, %xmm5, %xmm5
260 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [1065353216,1065353216,1065353216,1065353216]
261 ; AVX1-NEXT: vpaddd %xmm6, %xmm5, %xmm5
262 ; AVX1-NEXT: vcvttps2dq %xmm5, %xmm5
263 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
264 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
265 ; AVX1-NEXT: vpaddd %xmm6, %xmm2, %xmm2
266 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
267 ; AVX1-NEXT: vpackusdw %xmm5, %xmm2, %xmm2
268 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
269 ; AVX1-NEXT: vpmulhuw %xmm2, %xmm5, %xmm7
270 ; AVX1-NEXT: vpmullw %xmm2, %xmm5, %xmm2
271 ; AVX1-NEXT: vpor %xmm7, %xmm2, %xmm2
272 ; AVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1
273 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
274 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4,4,5,5,6,6,7,7]
275 ; AVX1-NEXT: vpslld $23, %xmm3, %xmm3
276 ; AVX1-NEXT: vpaddd %xmm6, %xmm3, %xmm3
277 ; AVX1-NEXT: vcvttps2dq %xmm3, %xmm3
278 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
279 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
280 ; AVX1-NEXT: vpaddd %xmm6, %xmm1, %xmm1
281 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
282 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
283 ; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm3
284 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
285 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
286 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
289 ; AVX2-LABEL: var_funnnel_v16i16:
291 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
292 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
293 ; AVX2-NEXT: vpsubw %ymm1, %ymm2, %ymm1
294 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
295 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
296 ; AVX2-NEXT: vpsllvd %ymm4, %ymm3, %ymm4
297 ; AVX2-NEXT: vpsrld $16, %ymm4, %ymm4
298 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
299 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
300 ; AVX2-NEXT: vpsllvd %ymm5, %ymm0, %ymm5
301 ; AVX2-NEXT: vpsrld $16, %ymm5, %ymm5
302 ; AVX2-NEXT: vpackusdw %ymm4, %ymm5, %ymm4
303 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
304 ; AVX2-NEXT: vpsubw %ymm1, %ymm5, %ymm1
305 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
306 ; AVX2-NEXT: vpsrlvd %ymm5, %ymm3, %ymm3
307 ; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3
308 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
309 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
310 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
311 ; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
312 ; AVX2-NEXT: vpor %ymm0, %ymm4, %ymm0
315 ; AVX512F-LABEL: var_funnnel_v16i16:
317 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
318 ; AVX512F-NEXT: vpsubw %ymm1, %ymm2, %ymm1
319 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
320 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
321 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
322 ; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm2
323 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
324 ; AVX512F-NEXT: vpsubw %ymm1, %ymm3, %ymm1
325 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
326 ; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
327 ; AVX512F-NEXT: vpord %zmm0, %zmm2, %zmm0
328 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
331 ; AVX512VL-LABEL: var_funnnel_v16i16:
333 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
334 ; AVX512VL-NEXT: vpsubw %ymm1, %ymm2, %ymm1
335 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
336 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
337 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
338 ; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm2
339 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
340 ; AVX512VL-NEXT: vpsubw %ymm1, %ymm3, %ymm1
341 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
342 ; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
343 ; AVX512VL-NEXT: vpord %zmm0, %zmm2, %zmm0
344 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
345 ; AVX512VL-NEXT: retq
347 ; AVX512BW-LABEL: var_funnnel_v16i16:
349 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
350 ; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
351 ; AVX512BW-NEXT: vpsubw %ymm1, %ymm2, %ymm1
352 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
353 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
354 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
355 ; AVX512BW-NEXT: vpsubw %ymm1, %ymm3, %ymm1
356 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
357 ; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
358 ; AVX512BW-NEXT: retq
360 ; AVX512VLBW-LABEL: var_funnnel_v16i16:
361 ; AVX512VLBW: # %bb.0:
362 ; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
363 ; AVX512VLBW-NEXT: vpsubw %ymm1, %ymm2, %ymm1
364 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
365 ; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm2
366 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
367 ; AVX512VLBW-NEXT: vpsubw %ymm1, %ymm3, %ymm1
368 ; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
369 ; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
370 ; AVX512VLBW-NEXT: retq
372 ; AVX512VBMI2-LABEL: var_funnnel_v16i16:
373 ; AVX512VBMI2: # %bb.0:
374 ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
375 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
376 ; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
377 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
378 ; AVX512VBMI2-NEXT: retq
380 ; AVX512VLVBMI2-LABEL: var_funnnel_v16i16:
381 ; AVX512VLVBMI2: # %bb.0:
382 ; AVX512VLVBMI2-NEXT: vpshrdvw %ymm1, %ymm0, %ymm0
383 ; AVX512VLVBMI2-NEXT: retq
385 ; XOPAVX1-LABEL: var_funnnel_v16i16:
387 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
388 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
389 ; XOPAVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2
390 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
391 ; XOPAVX1-NEXT: vprotw %xmm2, %xmm4, %xmm2
392 ; XOPAVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1
393 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0
394 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
397 ; XOPAVX2-LABEL: var_funnnel_v16i16:
399 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
400 ; XOPAVX2-NEXT: vpsubw %ymm1, %ymm2, %ymm1
401 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
402 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
403 ; XOPAVX2-NEXT: vprotw %xmm2, %xmm3, %xmm2
404 ; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0
405 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
407 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> %amt)
411 define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
412 ; AVX1-LABEL: var_funnnel_v32i8:
414 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
415 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3
416 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
417 ; AVX1-NEXT: vpandn %xmm3, %xmm4, %xmm3
418 ; AVX1-NEXT: vpsllw $4, %xmm2, %xmm5
419 ; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm5
420 ; AVX1-NEXT: vpor %xmm3, %xmm5, %xmm3
421 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
422 ; AVX1-NEXT: vpxor %xmm8, %xmm8, %xmm8
423 ; AVX1-NEXT: vpsubb %xmm5, %xmm8, %xmm5
424 ; AVX1-NEXT: vpsllw $5, %xmm5, %xmm5
425 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
426 ; AVX1-NEXT: vpsrlw $6, %xmm2, %xmm3
427 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
428 ; AVX1-NEXT: vpandn %xmm3, %xmm7, %xmm3
429 ; AVX1-NEXT: vpsllw $2, %xmm2, %xmm6
430 ; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm6
431 ; AVX1-NEXT: vpor %xmm3, %xmm6, %xmm3
432 ; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5
433 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
434 ; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm3
435 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
436 ; AVX1-NEXT: vpand %xmm3, %xmm9, %xmm3
437 ; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm6
438 ; AVX1-NEXT: vpor %xmm3, %xmm6, %xmm3
439 ; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5
440 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
441 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3
442 ; AVX1-NEXT: vpandn %xmm3, %xmm4, %xmm3
443 ; AVX1-NEXT: vpsllw $4, %xmm0, %xmm5
444 ; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm4
445 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
446 ; AVX1-NEXT: vpsubb %xmm1, %xmm8, %xmm1
447 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1
448 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
449 ; AVX1-NEXT: vpsrlw $6, %xmm0, %xmm3
450 ; AVX1-NEXT: vpandn %xmm3, %xmm7, %xmm3
451 ; AVX1-NEXT: vpsllw $2, %xmm0, %xmm4
452 ; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
453 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
454 ; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1
455 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
456 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm3
457 ; AVX1-NEXT: vpand %xmm3, %xmm9, %xmm3
458 ; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm4
459 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
460 ; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1
461 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
462 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
465 ; AVX2-LABEL: var_funnnel_v32i8:
467 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2
468 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
469 ; AVX2-NEXT: vpsllw $4, %ymm0, %ymm3
470 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
471 ; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
472 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
473 ; AVX2-NEXT: vpsubb %ymm1, %ymm3, %ymm1
474 ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
475 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
476 ; AVX2-NEXT: vpsrlw $6, %ymm0, %ymm2
477 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
478 ; AVX2-NEXT: vpsllw $2, %ymm0, %ymm3
479 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
480 ; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
481 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
482 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
483 ; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2
484 ; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm3
485 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
486 ; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
487 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
488 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
491 ; AVX512F-LABEL: var_funnnel_v32i8:
493 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2
494 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
495 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm3
496 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
497 ; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2
498 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
499 ; AVX512F-NEXT: vpsubb %ymm1, %ymm3, %ymm1
500 ; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1
501 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
502 ; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2
503 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
504 ; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3
505 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
506 ; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2
507 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
508 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
509 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm2
510 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm3
511 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
512 ; AVX512F-NEXT: vpor %ymm3, %ymm2, %ymm2
513 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
514 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
517 ; AVX512VL-LABEL: var_funnnel_v32i8:
519 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
520 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
521 ; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
522 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
523 ; AVX512VL-NEXT: vpsubb %ymm1, %ymm2, %ymm1
524 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
525 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
526 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
527 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
528 ; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
529 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
530 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
531 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2
532 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm3
533 ; AVX512VL-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
534 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
535 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
536 ; AVX512VL-NEXT: retq
538 ; AVX512BW-LABEL: var_funnnel_v32i8:
540 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
541 ; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm3
542 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero
543 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
544 ; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
545 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
546 ; AVX512BW-NEXT: vpsubb %ymm1, %ymm4, %ymm1
547 ; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
548 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
549 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
550 ; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
551 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
552 ; AVX512BW-NEXT: retq
554 ; AVX512VLBW-LABEL: var_funnnel_v32i8:
555 ; AVX512VLBW: # %bb.0:
556 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
557 ; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm3
558 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero
559 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
560 ; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
561 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
562 ; AVX512VLBW-NEXT: vpsubb %ymm1, %ymm4, %ymm1
563 ; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1
564 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
565 ; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
566 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
567 ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
568 ; AVX512VLBW-NEXT: retq
570 ; AVX512VBMI2-LABEL: var_funnnel_v32i8:
571 ; AVX512VBMI2: # %bb.0:
572 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
573 ; AVX512VBMI2-NEXT: vpand %ymm2, %ymm1, %ymm3
574 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero
575 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
576 ; AVX512VBMI2-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
577 ; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
578 ; AVX512VBMI2-NEXT: vpsubb %ymm1, %ymm4, %ymm1
579 ; AVX512VBMI2-NEXT: vpand %ymm2, %ymm1, %ymm1
580 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
581 ; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
582 ; AVX512VBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
583 ; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0
584 ; AVX512VBMI2-NEXT: retq
586 ; AVX512VLVBMI2-LABEL: var_funnnel_v32i8:
587 ; AVX512VLVBMI2: # %bb.0:
588 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
589 ; AVX512VLVBMI2-NEXT: vpand %ymm2, %ymm1, %ymm3
590 ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero
591 ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
592 ; AVX512VLVBMI2-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
593 ; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
594 ; AVX512VLVBMI2-NEXT: vpsubb %ymm1, %ymm4, %ymm1
595 ; AVX512VLVBMI2-NEXT: vpand %ymm2, %ymm1, %ymm1
596 ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
597 ; AVX512VLVBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
598 ; AVX512VLVBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
599 ; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
600 ; AVX512VLVBMI2-NEXT: retq
602 ; XOPAVX1-LABEL: var_funnnel_v32i8:
604 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
605 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
606 ; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2
607 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
608 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm4, %xmm2
609 ; XOPAVX1-NEXT: vpsubb %xmm1, %xmm3, %xmm1
610 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0
611 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
614 ; XOPAVX2-LABEL: var_funnnel_v32i8:
616 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
617 ; XOPAVX2-NEXT: vpsubb %ymm1, %ymm2, %ymm1
618 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
619 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
620 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm3, %xmm2
621 ; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0
622 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
624 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> %amt)
629 ; Uniform Variable Shifts
632 define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
633 ; AVX1-LABEL: splatvar_funnnel_v4i64:
635 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
636 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
637 ; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
638 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
639 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
640 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
641 ; AVX1-NEXT: vpsllq %xmm2, %xmm4, %xmm5
642 ; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
643 ; AVX1-NEXT: vpsllq %xmm6, %xmm4, %xmm7
644 ; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7]
645 ; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm2
646 ; AVX1-NEXT: vpsllq %xmm6, %xmm0, %xmm6
647 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
648 ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2
649 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
650 ; AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm3
651 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
652 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
653 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
656 ; AVX2-LABEL: splatvar_funnnel_v4i64:
658 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
659 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
660 ; AVX2-NEXT: vpsrlq %xmm3, %ymm0, %ymm3
661 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
662 ; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
663 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
664 ; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
665 ; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
668 ; AVX512F-LABEL: splatvar_funnnel_v4i64:
670 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
671 ; AVX512F-NEXT: vpbroadcastq %xmm1, %ymm1
672 ; AVX512F-NEXT: vprorvq %zmm1, %zmm0, %zmm0
673 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
676 ; AVX512VL-LABEL: splatvar_funnnel_v4i64:
678 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
679 ; AVX512VL-NEXT: vprorvq %ymm1, %ymm0, %ymm0
680 ; AVX512VL-NEXT: retq
682 ; AVX512BW-LABEL: splatvar_funnnel_v4i64:
684 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
685 ; AVX512BW-NEXT: vpbroadcastq %xmm1, %ymm1
686 ; AVX512BW-NEXT: vprorvq %zmm1, %zmm0, %zmm0
687 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
688 ; AVX512BW-NEXT: retq
690 ; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
691 ; AVX512VLBW: # %bb.0:
692 ; AVX512VLBW-NEXT: vpbroadcastq %xmm1, %ymm1
693 ; AVX512VLBW-NEXT: vprorvq %ymm1, %ymm0, %ymm0
694 ; AVX512VLBW-NEXT: retq
696 ; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64:
697 ; AVX512VBMI2: # %bb.0:
698 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
699 ; AVX512VBMI2-NEXT: vpbroadcastq %xmm1, %ymm1
700 ; AVX512VBMI2-NEXT: vprorvq %zmm1, %zmm0, %zmm0
701 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
702 ; AVX512VBMI2-NEXT: retq
704 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v4i64:
705 ; AVX512VLVBMI2: # %bb.0:
706 ; AVX512VLVBMI2-NEXT: vpbroadcastq %xmm1, %ymm1
707 ; AVX512VLVBMI2-NEXT: vprorvq %ymm1, %ymm0, %ymm0
708 ; AVX512VLVBMI2-NEXT: retq
710 ; XOPAVX1-LABEL: splatvar_funnnel_v4i64:
712 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
713 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
714 ; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
715 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
716 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm2, %xmm2
717 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0
718 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
721 ; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
723 ; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
724 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
725 ; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1
726 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
727 ; XOPAVX2-NEXT: vprotq %xmm1, %xmm2, %xmm2
728 ; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0
729 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
731 %splat = shufflevector <4 x i64> %amt, <4 x i64> undef, <4 x i32> zeroinitializer
732 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> %splat)
736 define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind {
737 ; AVX1-LABEL: splatvar_funnnel_v8i32:
739 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
740 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
741 ; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
742 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
743 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
744 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
745 ; AVX1-NEXT: vpslld %xmm2, %xmm3, %xmm4
746 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [32,32,32,32]
747 ; AVX1-NEXT: vpsubd %xmm1, %xmm5, %xmm1
748 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
749 ; AVX1-NEXT: vpsrld %xmm1, %xmm3, %xmm3
750 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
751 ; AVX1-NEXT: vpslld %xmm2, %xmm0, %xmm2
752 ; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
753 ; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0
754 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
757 ; AVX2-LABEL: splatvar_funnnel_v8i32:
759 ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
760 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
761 ; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1
762 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
763 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
764 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
765 ; AVX2-NEXT: vpslld %xmm2, %ymm0, %ymm2
766 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
767 ; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1
768 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
769 ; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
770 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
773 ; AVX512F-LABEL: splatvar_funnnel_v8i32:
775 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
776 ; AVX512F-NEXT: vpbroadcastd %xmm1, %ymm1
777 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
778 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
781 ; AVX512VL-LABEL: splatvar_funnnel_v8i32:
783 ; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
784 ; AVX512VL-NEXT: vprorvd %ymm1, %ymm0, %ymm0
785 ; AVX512VL-NEXT: retq
787 ; AVX512BW-LABEL: splatvar_funnnel_v8i32:
789 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
790 ; AVX512BW-NEXT: vpbroadcastd %xmm1, %ymm1
791 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
792 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
793 ; AVX512BW-NEXT: retq
795 ; AVX512VLBW-LABEL: splatvar_funnnel_v8i32:
796 ; AVX512VLBW: # %bb.0:
797 ; AVX512VLBW-NEXT: vpbroadcastd %xmm1, %ymm1
798 ; AVX512VLBW-NEXT: vprorvd %ymm1, %ymm0, %ymm0
799 ; AVX512VLBW-NEXT: retq
801 ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32:
802 ; AVX512VBMI2: # %bb.0:
803 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
804 ; AVX512VBMI2-NEXT: vpbroadcastd %xmm1, %ymm1
805 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
806 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
807 ; AVX512VBMI2-NEXT: retq
809 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i32:
810 ; AVX512VLVBMI2: # %bb.0:
811 ; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm1, %ymm1
812 ; AVX512VLVBMI2-NEXT: vprorvd %ymm1, %ymm0, %ymm0
813 ; AVX512VLVBMI2-NEXT: retq
815 ; XOPAVX1-LABEL: splatvar_funnnel_v8i32:
817 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
818 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
819 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
820 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
821 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm2, %xmm2
822 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
823 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
826 ; XOPAVX2-LABEL: splatvar_funnnel_v8i32:
828 ; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
829 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
830 ; XOPAVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1
831 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
832 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm2, %xmm2
833 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0
834 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
836 %splat = shufflevector <8 x i32> %amt, <8 x i32> undef, <8 x i32> zeroinitializer
837 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> %splat)
841 define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
842 ; AVX1-LABEL: splatvar_funnnel_v16i16:
844 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
845 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
846 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
847 ; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
848 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
849 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
850 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
851 ; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm4
852 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
853 ; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1
854 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
855 ; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
856 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
857 ; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm2
858 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
859 ; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0
860 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
863 ; AVX2-LABEL: splatvar_funnnel_v16i16:
865 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
866 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
867 ; AVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
868 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
869 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
870 ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2
871 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
872 ; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
873 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
874 ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
875 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
878 ; AVX512F-LABEL: splatvar_funnnel_v16i16:
880 ; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1
881 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
882 ; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1
883 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
884 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
885 ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
886 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
887 ; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
888 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
889 ; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
890 ; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
893 ; AVX512VL-LABEL: splatvar_funnnel_v16i16:
895 ; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1
896 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
897 ; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1
898 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
899 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
900 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
901 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
902 ; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
903 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
904 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
905 ; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
906 ; AVX512VL-NEXT: retq
908 ; AVX512BW-LABEL: splatvar_funnnel_v16i16:
910 ; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
911 ; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
912 ; AVX512BW-NEXT: vpsubw %xmm1, %xmm2, %xmm1
913 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
914 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
915 ; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
916 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
917 ; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
918 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
919 ; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
920 ; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
921 ; AVX512BW-NEXT: retq
923 ; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
924 ; AVX512VLBW: # %bb.0:
925 ; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
926 ; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
927 ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm2, %xmm1
928 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
929 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
930 ; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
931 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
932 ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
933 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
934 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
935 ; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
936 ; AVX512VLBW-NEXT: retq
938 ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
939 ; AVX512VBMI2: # %bb.0:
940 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
941 ; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
942 ; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
943 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
944 ; AVX512VBMI2-NEXT: retq
946 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16:
947 ; AVX512VLVBMI2: # %bb.0:
948 ; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
949 ; AVX512VLVBMI2-NEXT: vpshrdvw %ymm1, %ymm0, %ymm0
950 ; AVX512VLVBMI2-NEXT: retq
952 ; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
954 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
955 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
956 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
957 ; XOPAVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
958 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
959 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm2, %xmm2
960 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0
961 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
964 ; XOPAVX2-LABEL: splatvar_funnnel_v16i16:
966 ; XOPAVX2-NEXT: vpbroadcastw %xmm1, %xmm1
967 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
968 ; XOPAVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
969 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
970 ; XOPAVX2-NEXT: vprotw %xmm1, %xmm2, %xmm2
971 ; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0
972 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
974 %splat = shufflevector <16 x i16> %amt, <16 x i16> undef, <16 x i32> zeroinitializer
975 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> %splat)
979 define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
980 ; AVX1-LABEL: splatvar_funnnel_v32i8:
982 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
983 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
984 ; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
985 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
986 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
987 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
988 ; AVX1-NEXT: vpsllw %xmm3, %xmm4, %xmm5
989 ; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6
990 ; AVX1-NEXT: vpsllw %xmm3, %xmm6, %xmm7
991 ; AVX1-NEXT: vpshufb %xmm2, %xmm7, %xmm2
992 ; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5
993 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
994 ; AVX1-NEXT: vpsubb %xmm1, %xmm7, %xmm1
995 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
996 ; AVX1-NEXT: vpsrlw %xmm1, %xmm4, %xmm4
997 ; AVX1-NEXT: vpsrlw %xmm1, %xmm6, %xmm6
998 ; AVX1-NEXT: vpshufb {{.*#+}} xmm6 = xmm6[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
999 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
1000 ; AVX1-NEXT: vpor %xmm4, %xmm5, %xmm4
1001 ; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3
1002 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
1003 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
1004 ; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0
1005 ; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0
1006 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
1009 ; AVX2-LABEL: splatvar_funnnel_v32i8:
1011 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
1012 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1013 ; AVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
1014 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1015 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1016 ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm3
1017 ; AVX2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
1018 ; AVX2-NEXT: vpsllw %xmm2, %xmm4, %xmm2
1019 ; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
1020 ; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm2
1021 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1022 ; AVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1
1023 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1024 ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
1025 ; AVX2-NEXT: vpsrlw %xmm1, %xmm4, %xmm1
1026 ; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
1027 ; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
1028 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
1029 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
1032 ; AVX512F-LABEL: splatvar_funnnel_v32i8:
1034 ; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1
1035 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1036 ; AVX512F-NEXT: vpsubb %xmm1, %xmm2, %xmm1
1037 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1038 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1039 ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm3
1040 ; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
1041 ; AVX512F-NEXT: vpsllw %xmm2, %xmm4, %xmm2
1042 ; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
1043 ; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm2
1044 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1045 ; AVX512F-NEXT: vpsubb %xmm1, %xmm3, %xmm1
1046 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1047 ; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
1048 ; AVX512F-NEXT: vpsrlw %xmm1, %xmm4, %xmm1
1049 ; AVX512F-NEXT: vpsrlw $8, %xmm1, %xmm1
1050 ; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1
1051 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
1052 ; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
1053 ; AVX512F-NEXT: retq
1055 ; AVX512VL-LABEL: splatvar_funnnel_v32i8:
1056 ; AVX512VL: # %bb.0:
1057 ; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1
1058 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
1059 ; AVX512VL-NEXT: vpsubb %xmm1, %xmm2, %xmm1
1060 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1061 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1062 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm3
1063 ; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
1064 ; AVX512VL-NEXT: vpsllw %xmm2, %xmm4, %xmm2
1065 ; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
1066 ; AVX512VL-NEXT: vpand %ymm2, %ymm3, %ymm2
1067 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1068 ; AVX512VL-NEXT: vpsubb %xmm1, %xmm3, %xmm1
1069 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1070 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm3
1071 ; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
1072 ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
1073 ; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
1074 ; AVX512VL-NEXT: vpternlogq $236, %ymm3, %ymm2, %ymm0
1075 ; AVX512VL-NEXT: retq
1077 ; AVX512BW-LABEL: splatvar_funnnel_v32i8:
1078 ; AVX512BW: # %bb.0:
1079 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1080 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
1081 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1082 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
1083 ; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
1084 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
1085 ; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
1086 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
1087 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1088 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
1089 ; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
1090 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1091 ; AVX512BW-NEXT: retq
1093 ; AVX512VLBW-LABEL: splatvar_funnnel_v32i8:
1094 ; AVX512VLBW: # %bb.0:
1095 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1096 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
1097 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1098 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
1099 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
1100 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
1101 ; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
1102 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
1103 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1104 ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
1105 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
1106 ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
1107 ; AVX512VLBW-NEXT: retq
1109 ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
1110 ; AVX512VBMI2: # %bb.0:
1111 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1112 ; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
1113 ; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1114 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
1115 ; AVX512VBMI2-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
1116 ; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
1117 ; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
1118 ; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
1119 ; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1120 ; AVX512VBMI2-NEXT: vpsllw %xmm1, %zmm0, %zmm0
1121 ; AVX512VBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
1122 ; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0
1123 ; AVX512VBMI2-NEXT: retq
1125 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
1126 ; AVX512VLVBMI2: # %bb.0:
1127 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1128 ; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
1129 ; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1130 ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
1131 ; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
1132 ; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
1133 ; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
1134 ; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
1135 ; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1136 ; AVX512VLVBMI2-NEXT: vpsllw %xmm1, %zmm0, %zmm0
1137 ; AVX512VLVBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
1138 ; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
1139 ; AVX512VLVBMI2-NEXT: retq
1141 ; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
1143 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1144 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1145 ; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
1146 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1147 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm2, %xmm2
1148 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0
1149 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1150 ; XOPAVX1-NEXT: retq
1152 ; XOPAVX2-LABEL: splatvar_funnnel_v32i8:
1154 ; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
1155 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1156 ; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
1157 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1158 ; XOPAVX2-NEXT: vprotb %xmm1, %xmm2, %xmm2
1159 ; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0
1160 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1161 ; XOPAVX2-NEXT: retq
1162 %splat = shufflevector <32 x i8> %amt, <32 x i8> undef, <32 x i32> zeroinitializer
1163 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> %splat)
1171 define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x) nounwind {
1172 ; AVX1-LABEL: constant_funnnel_v4i64:
1174 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1175 ; AVX1-NEXT: vpsllq $4, %xmm1, %xmm2
1176 ; AVX1-NEXT: vpsllq $14, %xmm1, %xmm3
1177 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1178 ; AVX1-NEXT: vpsllq $50, %xmm0, %xmm3
1179 ; AVX1-NEXT: vpsllq $60, %xmm0, %xmm4
1180 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
1181 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1182 ; AVX1-NEXT: vpsrlq $60, %xmm1, %xmm3
1183 ; AVX1-NEXT: vpsrlq $50, %xmm1, %xmm1
1184 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
1185 ; AVX1-NEXT: vpsrlq $14, %xmm0, %xmm3
1186 ; AVX1-NEXT: vpsrlq $4, %xmm0, %xmm0
1187 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
1188 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1189 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
1192 ; AVX2-LABEL: constant_funnnel_v4i64:
1194 ; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1195 ; AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1196 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1199 ; AVX512F-LABEL: constant_funnnel_v4i64:
1201 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1202 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4,14,50,60]
1203 ; AVX512F-NEXT: vprorvq %zmm1, %zmm0, %zmm0
1204 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1205 ; AVX512F-NEXT: retq
1207 ; AVX512VL-LABEL: constant_funnnel_v4i64:
1208 ; AVX512VL: # %bb.0:
1209 ; AVX512VL-NEXT: vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1210 ; AVX512VL-NEXT: retq
1212 ; AVX512BW-LABEL: constant_funnnel_v4i64:
1213 ; AVX512BW: # %bb.0:
1214 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1215 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4,14,50,60]
1216 ; AVX512BW-NEXT: vprorvq %zmm1, %zmm0, %zmm0
1217 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1218 ; AVX512BW-NEXT: retq
1220 ; AVX512VLBW-LABEL: constant_funnnel_v4i64:
1221 ; AVX512VLBW: # %bb.0:
1222 ; AVX512VLBW-NEXT: vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1223 ; AVX512VLBW-NEXT: retq
1225 ; AVX512VBMI2-LABEL: constant_funnnel_v4i64:
1226 ; AVX512VBMI2: # %bb.0:
1227 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1228 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,14,50,60]
1229 ; AVX512VBMI2-NEXT: vprorvq %zmm1, %zmm0, %zmm0
1230 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1231 ; AVX512VBMI2-NEXT: retq
1233 ; AVX512VLVBMI2-LABEL: constant_funnnel_v4i64:
1234 ; AVX512VLVBMI2: # %bb.0:
1235 ; AVX512VLVBMI2-NEXT: vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1236 ; AVX512VLVBMI2-NEXT: retq
1238 ; XOPAVX1-LABEL: constant_funnnel_v4i64:
1240 ; XOPAVX1-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1241 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1242 ; XOPAVX1-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1243 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1244 ; XOPAVX1-NEXT: retq
1246 ; XOPAVX2-LABEL: constant_funnnel_v4i64:
1248 ; XOPAVX2-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1249 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1250 ; XOPAVX2-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1251 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1252 ; XOPAVX2-NEXT: retq
1253 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> <i64 4, i64 14, i64 50, i64 60>)
1257 define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x) nounwind {
1258 ; AVX1-LABEL: constant_funnnel_v8i32:
1260 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16777216,8388608,4194304,2097152]
1261 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1262 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1263 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1264 ; AVX1-NEXT: vpmuludq %xmm2, %xmm4, %xmm2
1265 ; AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
1266 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
1267 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
1268 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
1269 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1270 ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
1271 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [268435456,134217728,67108864,33554432]
1272 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1273 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1274 ; AVX1-NEXT: vpmuludq %xmm3, %xmm4, %xmm3
1275 ; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
1276 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1277 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
1278 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
1279 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
1280 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1281 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1284 ; AVX2-LABEL: constant_funnnel_v8i32:
1286 ; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1287 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1288 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1291 ; AVX512F-LABEL: constant_funnnel_v8i32:
1293 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1294 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11]
1295 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
1296 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1297 ; AVX512F-NEXT: retq
1299 ; AVX512VL-LABEL: constant_funnnel_v8i32:
1300 ; AVX512VL: # %bb.0:
1301 ; AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1302 ; AVX512VL-NEXT: retq
1304 ; AVX512BW-LABEL: constant_funnnel_v8i32:
1305 ; AVX512BW: # %bb.0:
1306 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1307 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11]
1308 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
1309 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1310 ; AVX512BW-NEXT: retq
1312 ; AVX512VLBW-LABEL: constant_funnnel_v8i32:
1313 ; AVX512VLBW: # %bb.0:
1314 ; AVX512VLBW-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1315 ; AVX512VLBW-NEXT: retq
1317 ; AVX512VBMI2-LABEL: constant_funnnel_v8i32:
1318 ; AVX512VBMI2: # %bb.0:
1319 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1320 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11]
1321 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
1322 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1323 ; AVX512VBMI2-NEXT: retq
1325 ; AVX512VLVBMI2-LABEL: constant_funnnel_v8i32:
1326 ; AVX512VLVBMI2: # %bb.0:
1327 ; AVX512VLVBMI2-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1328 ; AVX512VLVBMI2-NEXT: retq
1330 ; XOPAVX1-LABEL: constant_funnnel_v8i32:
1332 ; XOPAVX1-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1333 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1334 ; XOPAVX1-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1335 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1336 ; XOPAVX1-NEXT: retq
1338 ; XOPAVX2-LABEL: constant_funnnel_v8i32:
1340 ; XOPAVX2-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1341 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1342 ; XOPAVX2-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1343 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1344 ; XOPAVX2-NEXT: retq
1345 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>)
1349 define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x) nounwind {
1350 ; AVX1-LABEL: constant_funnnel_v16i16:
1352 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1353 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [256,128,64,32,16,8,4,2]
1354 ; AVX1-NEXT: vpmulhuw %xmm2, %xmm1, %xmm3
1355 ; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1
1356 ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
1357 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,32768,16384,8192,4096,2048,1024,512]
1358 ; AVX1-NEXT: vpmulhuw %xmm2, %xmm0, %xmm3
1359 ; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0
1360 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
1361 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1364 ; AVX2-LABEL: constant_funnnel_v16i16:
1366 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
1367 ; AVX2-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
1368 ; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
1369 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
1372 ; AVX512F-LABEL: constant_funnnel_v16i16:
1374 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
1375 ; AVX512F-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
1376 ; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0
1377 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
1378 ; AVX512F-NEXT: retq
1380 ; AVX512VL-LABEL: constant_funnnel_v16i16:
1381 ; AVX512VL: # %bb.0:
1382 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
1383 ; AVX512VL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
1384 ; AVX512VL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
1385 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
1386 ; AVX512VL-NEXT: retq
1388 ; AVX512BW-LABEL: constant_funnnel_v16i16:
1389 ; AVX512BW: # %bb.0:
1390 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1391 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
1392 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1
1393 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
1394 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
1395 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
1396 ; AVX512BW-NEXT: retq
1398 ; AVX512VLBW-LABEL: constant_funnnel_v16i16:
1399 ; AVX512VLBW: # %bb.0:
1400 ; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1401 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1402 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
1403 ; AVX512VLBW-NEXT: retq
1405 ; AVX512VBMI2-LABEL: constant_funnnel_v16i16:
1406 ; AVX512VBMI2: # %bb.0:
1407 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1408 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
1409 ; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
1410 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1411 ; AVX512VBMI2-NEXT: retq
1413 ; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16:
1414 ; AVX512VLVBMI2: # %bb.0:
1415 ; AVX512VLVBMI2-NEXT: vpshrdvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1416 ; AVX512VLVBMI2-NEXT: retq
1418 ; XOPAVX1-LABEL: constant_funnnel_v16i16:
1420 ; XOPAVX1-NEXT: vprotw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1421 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1422 ; XOPAVX1-NEXT: vprotw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1423 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1424 ; XOPAVX1-NEXT: retq
1426 ; XOPAVX2-LABEL: constant_funnnel_v16i16:
1428 ; XOPAVX2-NEXT: vprotw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1429 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1430 ; XOPAVX2-NEXT: vprotw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1431 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1432 ; XOPAVX2-NEXT: retq
1433 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
1437 define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind {
1438 ; AVX1-LABEL: constant_funnnel_v32i8:
1440 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1441 ; AVX1-NEXT: vpxor %xmm8, %xmm8, %xmm8
1442 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm8[8],xmm1[9],xmm8[9],xmm1[10],xmm8[10],xmm1[11],xmm8[11],xmm1[12],xmm8[12],xmm1[13],xmm8[13],xmm1[14],xmm8[14],xmm1[15],xmm8[15]
1443 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm9 = [256,2,4,8,16,32,64,128]
1444 ; AVX1-NEXT: vpmullw %xmm3, %xmm9, %xmm3
1445 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
1446 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
1447 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [256,128,64,32,16,8,4,2]
1448 ; AVX1-NEXT: vpmullw %xmm6, %xmm5, %xmm7
1449 ; AVX1-NEXT: vpsrlw $8, %xmm7, %xmm7
1450 ; AVX1-NEXT: vpackuswb %xmm3, %xmm7, %xmm3
1451 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1452 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [1,2,4,8,16,32,64,128]
1453 ; AVX1-NEXT: vpmullw %xmm7, %xmm1, %xmm1
1454 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1455 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
1456 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,128,64,32,16,8,4,2]
1457 ; AVX1-NEXT: vpmullw %xmm4, %xmm5, %xmm5
1458 ; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5
1459 ; AVX1-NEXT: vpackuswb %xmm1, %xmm5, %xmm1
1460 ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
1461 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm8[8],xmm0[9],xmm8[9],xmm0[10],xmm8[10],xmm0[11],xmm8[11],xmm0[12],xmm8[12],xmm0[13],xmm8[13],xmm0[14],xmm8[14],xmm0[15],xmm8[15]
1462 ; AVX1-NEXT: vpmullw %xmm3, %xmm9, %xmm3
1463 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
1464 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1465 ; AVX1-NEXT: vpmullw %xmm6, %xmm5, %xmm6
1466 ; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6
1467 ; AVX1-NEXT: vpackuswb %xmm3, %xmm6, %xmm3
1468 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1469 ; AVX1-NEXT: vpmullw %xmm7, %xmm0, %xmm0
1470 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1471 ; AVX1-NEXT: vpmullw %xmm4, %xmm5, %xmm4
1472 ; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm2
1473 ; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0
1474 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
1475 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1478 ; AVX2-LABEL: constant_funnnel_v32i8:
1480 ; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1
1481 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1482 ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
1483 ; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
1484 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
1485 ; AVX2-NEXT: vpsllw $2, %ymm1, %ymm3
1486 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
1487 ; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1488 ; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
1489 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm3
1490 ; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1491 ; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
1492 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1493 ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
1494 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
1495 ; AVX2-NEXT: vpsrlw $8, %ymm3, %ymm3
1496 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
1497 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1498 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
1499 ; AVX2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
1500 ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
1503 ; AVX512F-LABEL: constant_funnnel_v32i8:
1505 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
1506 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1507 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
1508 ; AVX512F-NEXT: # ymm2 = mem[0,1,0,1]
1509 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
1510 ; AVX512F-NEXT: vpsllw $2, %ymm1, %ymm3
1511 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
1512 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1513 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
1514 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm3
1515 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1516 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
1517 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1518 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
1519 ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
1520 ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
1521 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
1522 ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1523 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
1524 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
1525 ; AVX512F-NEXT: vpor %ymm0, %ymm1, %ymm0
1526 ; AVX512F-NEXT: retq
1528 ; AVX512VL-LABEL: constant_funnnel_v32i8:
1529 ; AVX512VL: # %bb.0:
1530 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
1531 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1532 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
1533 ; AVX512VL-NEXT: # ymm2 = mem[0,1,0,1]
1534 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
1535 ; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm3
1536 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
1537 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1538 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
1539 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm3
1540 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1541 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
1542 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
1543 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
1544 ; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
1545 ; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
1546 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
1547 ; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1548 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
1549 ; AVX512VL-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
1550 ; AVX512VL-NEXT: vpor %ymm0, %ymm1, %ymm0
1551 ; AVX512VL-NEXT: retq
1553 ; AVX512BW-LABEL: constant_funnnel_v32i8:
1554 ; AVX512BW: # %bb.0:
1555 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
1556 ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
1557 ; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1558 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1559 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1560 ; AVX512BW-NEXT: retq
1562 ; AVX512VLBW-LABEL: constant_funnnel_v32i8:
1563 ; AVX512VLBW: # %bb.0:
1564 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
1565 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
1566 ; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1567 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1568 ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
1569 ; AVX512VLBW-NEXT: retq
1571 ; AVX512VBMI2-LABEL: constant_funnnel_v32i8:
1572 ; AVX512VBMI2: # %bb.0:
1573 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
1574 ; AVX512VBMI2-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
1575 ; AVX512VBMI2-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1576 ; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
1577 ; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0
1578 ; AVX512VBMI2-NEXT: retq
1580 ; AVX512VLVBMI2-LABEL: constant_funnnel_v32i8:
1581 ; AVX512VLVBMI2: # %bb.0:
1582 ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
1583 ; AVX512VLVBMI2-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
1584 ; AVX512VLVBMI2-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1585 ; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
1586 ; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
1587 ; AVX512VLVBMI2-NEXT: retq
1589 ; XOPAVX1-LABEL: constant_funnnel_v32i8:
1591 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1592 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
1593 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm1, %xmm1
1594 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm0, %xmm0
1595 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1596 ; XOPAVX1-NEXT: retq
1598 ; XOPAVX2-LABEL: constant_funnnel_v32i8:
1600 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1601 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
1602 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm1, %xmm1
1603 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm0, %xmm0
1604 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1605 ; XOPAVX2-NEXT: retq
1606 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
1611 ; Uniform Constant Shifts
1614 define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x) nounwind {
1615 ; AVX1-LABEL: splatconstant_funnnel_v4i64:
1617 ; AVX1-NEXT: vpsllq $50, %xmm0, %xmm1
1618 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1619 ; AVX1-NEXT: vpsllq $50, %xmm2, %xmm3
1620 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
1621 ; AVX1-NEXT: vpsrlq $14, %xmm0, %xmm0
1622 ; AVX1-NEXT: vpsrlq $14, %xmm2, %xmm2
1623 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1624 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1627 ; AVX2-LABEL: splatconstant_funnnel_v4i64:
1629 ; AVX2-NEXT: vpsllq $50, %ymm0, %ymm1
1630 ; AVX2-NEXT: vpsrlq $14, %ymm0, %ymm0
1631 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1634 ; AVX512F-LABEL: splatconstant_funnnel_v4i64:
1636 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1637 ; AVX512F-NEXT: vprorq $14, %zmm0, %zmm0
1638 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1639 ; AVX512F-NEXT: retq
1641 ; AVX512VL-LABEL: splatconstant_funnnel_v4i64:
1642 ; AVX512VL: # %bb.0:
1643 ; AVX512VL-NEXT: vprorq $14, %ymm0, %ymm0
1644 ; AVX512VL-NEXT: retq
1646 ; AVX512BW-LABEL: splatconstant_funnnel_v4i64:
1647 ; AVX512BW: # %bb.0:
1648 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1649 ; AVX512BW-NEXT: vprorq $14, %zmm0, %zmm0
1650 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1651 ; AVX512BW-NEXT: retq
1653 ; AVX512VLBW-LABEL: splatconstant_funnnel_v4i64:
1654 ; AVX512VLBW: # %bb.0:
1655 ; AVX512VLBW-NEXT: vprorq $14, %ymm0, %ymm0
1656 ; AVX512VLBW-NEXT: retq
1658 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i64:
1659 ; AVX512VBMI2: # %bb.0:
1660 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1661 ; AVX512VBMI2-NEXT: vprorq $14, %zmm0, %zmm0
1662 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1663 ; AVX512VBMI2-NEXT: retq
1665 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i64:
1666 ; AVX512VLVBMI2: # %bb.0:
1667 ; AVX512VLVBMI2-NEXT: vprorq $14, %ymm0, %ymm0
1668 ; AVX512VLVBMI2-NEXT: retq
1670 ; XOPAVX1-LABEL: splatconstant_funnnel_v4i64:
1672 ; XOPAVX1-NEXT: vprotq $50, %xmm0, %xmm1
1673 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1674 ; XOPAVX1-NEXT: vprotq $50, %xmm0, %xmm0
1675 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1676 ; XOPAVX1-NEXT: retq
1678 ; XOPAVX2-LABEL: splatconstant_funnnel_v4i64:
1680 ; XOPAVX2-NEXT: vprotq $50, %xmm0, %xmm1
1681 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1682 ; XOPAVX2-NEXT: vprotq $50, %xmm0, %xmm0
1683 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1684 ; XOPAVX2-NEXT: retq
1685 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> <i64 14, i64 14, i64 14, i64 14>)
1689 define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x) nounwind {
1690 ; AVX1-LABEL: splatconstant_funnnel_v8i32:
1692 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1693 ; AVX1-NEXT: vpsrld $4, %xmm1, %xmm2
1694 ; AVX1-NEXT: vpslld $28, %xmm1, %xmm1
1695 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1696 ; AVX1-NEXT: vpsrld $4, %xmm0, %xmm2
1697 ; AVX1-NEXT: vpslld $28, %xmm0, %xmm0
1698 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1699 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1702 ; AVX2-LABEL: splatconstant_funnnel_v8i32:
1704 ; AVX2-NEXT: vpsrld $4, %ymm0, %ymm1
1705 ; AVX2-NEXT: vpslld $28, %ymm0, %ymm0
1706 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1709 ; AVX512F-LABEL: splatconstant_funnnel_v8i32:
1711 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1712 ; AVX512F-NEXT: vprord $4, %zmm0, %zmm0
1713 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1714 ; AVX512F-NEXT: retq
1716 ; AVX512VL-LABEL: splatconstant_funnnel_v8i32:
1717 ; AVX512VL: # %bb.0:
1718 ; AVX512VL-NEXT: vprord $4, %ymm0, %ymm0
1719 ; AVX512VL-NEXT: retq
1721 ; AVX512BW-LABEL: splatconstant_funnnel_v8i32:
1722 ; AVX512BW: # %bb.0:
1723 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1724 ; AVX512BW-NEXT: vprord $4, %zmm0, %zmm0
1725 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1726 ; AVX512BW-NEXT: retq
1728 ; AVX512VLBW-LABEL: splatconstant_funnnel_v8i32:
1729 ; AVX512VLBW: # %bb.0:
1730 ; AVX512VLBW-NEXT: vprord $4, %ymm0, %ymm0
1731 ; AVX512VLBW-NEXT: retq
1733 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i32:
1734 ; AVX512VBMI2: # %bb.0:
1735 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1736 ; AVX512VBMI2-NEXT: vprord $4, %zmm0, %zmm0
1737 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1738 ; AVX512VBMI2-NEXT: retq
1740 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i32:
1741 ; AVX512VLVBMI2: # %bb.0:
1742 ; AVX512VLVBMI2-NEXT: vprord $4, %ymm0, %ymm0
1743 ; AVX512VLVBMI2-NEXT: retq
1745 ; XOPAVX1-LABEL: splatconstant_funnnel_v8i32:
1747 ; XOPAVX1-NEXT: vprotd $28, %xmm0, %xmm1
1748 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1749 ; XOPAVX1-NEXT: vprotd $28, %xmm0, %xmm0
1750 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1751 ; XOPAVX1-NEXT: retq
1753 ; XOPAVX2-LABEL: splatconstant_funnnel_v8i32:
1755 ; XOPAVX2-NEXT: vprotd $28, %xmm0, %xmm1
1756 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1757 ; XOPAVX2-NEXT: vprotd $28, %xmm0, %xmm0
1758 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1759 ; XOPAVX2-NEXT: retq
1760 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>)
1764 define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind {
1765 ; AVX1-LABEL: splatconstant_funnnel_v16i16:
1767 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1768 ; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm2
1769 ; AVX1-NEXT: vpsllw $9, %xmm1, %xmm1
1770 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1771 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm2
1772 ; AVX1-NEXT: vpsllw $9, %xmm0, %xmm0
1773 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1774 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1777 ; AVX2-LABEL: splatconstant_funnnel_v16i16:
1779 ; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm1
1780 ; AVX2-NEXT: vpsllw $9, %ymm0, %ymm0
1781 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1784 ; AVX512F-LABEL: splatconstant_funnnel_v16i16:
1786 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm1
1787 ; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm0
1788 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
1789 ; AVX512F-NEXT: retq
1791 ; AVX512VL-LABEL: splatconstant_funnnel_v16i16:
1792 ; AVX512VL: # %bb.0:
1793 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm1
1794 ; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm0
1795 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
1796 ; AVX512VL-NEXT: retq
1798 ; AVX512BW-LABEL: splatconstant_funnnel_v16i16:
1799 ; AVX512BW: # %bb.0:
1800 ; AVX512BW-NEXT: vpsrlw $7, %ymm0, %ymm1
1801 ; AVX512BW-NEXT: vpsllw $9, %ymm0, %ymm0
1802 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
1803 ; AVX512BW-NEXT: retq
1805 ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16:
1806 ; AVX512VLBW: # %bb.0:
1807 ; AVX512VLBW-NEXT: vpsrlw $7, %ymm0, %ymm1
1808 ; AVX512VLBW-NEXT: vpsllw $9, %ymm0, %ymm0
1809 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
1810 ; AVX512VLBW-NEXT: retq
1812 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16:
1813 ; AVX512VBMI2: # %bb.0:
1814 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1815 ; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm0, %zmm0
1816 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1817 ; AVX512VBMI2-NEXT: retq
1819 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16:
1820 ; AVX512VLVBMI2: # %bb.0:
1821 ; AVX512VLVBMI2-NEXT: vpshrdw $7, %ymm0, %ymm0, %ymm0
1822 ; AVX512VLVBMI2-NEXT: retq
1824 ; XOPAVX1-LABEL: splatconstant_funnnel_v16i16:
1826 ; XOPAVX1-NEXT: vprotw $9, %xmm0, %xmm1
1827 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1828 ; XOPAVX1-NEXT: vprotw $9, %xmm0, %xmm0
1829 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1830 ; XOPAVX1-NEXT: retq
1832 ; XOPAVX2-LABEL: splatconstant_funnnel_v16i16:
1834 ; XOPAVX2-NEXT: vprotw $9, %xmm0, %xmm1
1835 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1836 ; XOPAVX2-NEXT: vprotw $9, %xmm0, %xmm0
1837 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1838 ; XOPAVX2-NEXT: retq
1839 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1843 define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
1844 ; AVX1-LABEL: splatconstant_funnnel_v32i8:
1846 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1847 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
1848 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1849 ; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
1850 ; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
1851 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
1852 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1853 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2
1854 ; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
1855 ; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0
1856 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
1857 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1858 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1861 ; AVX2-LABEL: splatconstant_funnnel_v32i8:
1863 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm1
1864 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1865 ; AVX2-NEXT: vpsllw $4, %ymm0, %ymm0
1866 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1867 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1870 ; AVX512F-LABEL: splatconstant_funnnel_v32i8:
1872 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1
1873 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1874 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
1875 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1876 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
1877 ; AVX512F-NEXT: retq
1879 ; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
1880 ; AVX512VL: # %bb.0:
1881 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
1882 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
1883 ; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
1884 ; AVX512VL-NEXT: retq
1886 ; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
1887 ; AVX512BW: # %bb.0:
1888 ; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1
1889 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1890 ; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0
1891 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1892 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
1893 ; AVX512BW-NEXT: retq
1895 ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
1896 ; AVX512VLBW: # %bb.0:
1897 ; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1
1898 ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
1899 ; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
1900 ; AVX512VLBW-NEXT: retq
1902 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8:
1903 ; AVX512VBMI2: # %bb.0:
1904 ; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1
1905 ; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1906 ; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
1907 ; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1908 ; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
1909 ; AVX512VBMI2-NEXT: retq
1911 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
1912 ; AVX512VLVBMI2: # %bb.0:
1913 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm1
1914 ; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
1915 ; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
1916 ; AVX512VLVBMI2-NEXT: retq
1918 ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
1920 ; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm1
1921 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1922 ; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm0
1923 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1924 ; XOPAVX1-NEXT: retq
1926 ; XOPAVX2-LABEL: splatconstant_funnnel_v32i8:
1928 ; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm1
1929 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1930 ; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm0
1931 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1932 ; XOPAVX2-NEXT: retq
1933 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)