1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
11 declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
12 declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
13 declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
14 declare <32 x i8> @llvm.fshl.v32i8(<32 x i8>, <32 x i8>, <32 x i8>)
20 define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
21 ; AVX1-LABEL: var_funnnel_v4i64:
23 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
24 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm3
25 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
26 ; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm5
27 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
28 ; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm4
29 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
30 ; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm5
31 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
32 ; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3
33 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7]
34 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
35 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
36 ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
37 ; AVX1-NEXT: vpsubq %xmm4, %xmm5, %xmm4
38 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63]
39 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
40 ; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm7
41 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
42 ; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm2
43 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm7[0,1,2,3],xmm2[4,5,6,7]
44 ; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1
45 ; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
46 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm4
47 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
48 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
49 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
50 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
51 ; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0
54 ; AVX2-LABEL: var_funnnel_v4i64:
56 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
57 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3
58 ; AVX2-NEXT: vpsllvq %ymm3, %ymm0, %ymm3
59 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
60 ; AVX2-NEXT: vpsubq %ymm1, %ymm4, %ymm1
61 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
62 ; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
63 ; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
66 ; AVX512F-LABEL: var_funnnel_v4i64:
68 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
69 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
70 ; AVX512F-NEXT: vprolvq %zmm1, %zmm0, %zmm0
71 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
74 ; AVX512VL-LABEL: var_funnnel_v4i64:
76 ; AVX512VL-NEXT: vprolvq %ymm1, %ymm0, %ymm0
79 ; AVX512BW-LABEL: var_funnnel_v4i64:
81 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
82 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
83 ; AVX512BW-NEXT: vprolvq %zmm1, %zmm0, %zmm0
84 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
87 ; AVX512VLBW-LABEL: var_funnnel_v4i64:
88 ; AVX512VLBW: # %bb.0:
89 ; AVX512VLBW-NEXT: vprolvq %ymm1, %ymm0, %ymm0
90 ; AVX512VLBW-NEXT: retq
92 ; XOPAVX1-LABEL: var_funnnel_v4i64:
94 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
95 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
96 ; XOPAVX1-NEXT: vprotq %xmm2, %xmm3, %xmm2
97 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0
98 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
101 ; XOPAVX2-LABEL: var_funnnel_v4i64:
103 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
104 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
105 ; XOPAVX2-NEXT: vprotq %xmm2, %xmm3, %xmm2
106 ; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0
107 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
109 %res = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> %amt)
113 define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind {
114 ; AVX1-LABEL: var_funnnel_v8i32:
116 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
117 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [31,31,31,31]
118 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
119 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
120 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216]
121 ; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2
122 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
123 ; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
124 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
125 ; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm6[1,1,3,3]
126 ; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5
127 ; AVX1-NEXT: vpmuludq %xmm2, %xmm6, %xmm2
128 ; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
129 ; AVX1-NEXT: vpblendw {{.*#+}} xmm6 = xmm6[0,1],xmm5[2,3],xmm6[4,5],xmm5[6,7]
130 ; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[0,0,2,2]
131 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
132 ; AVX1-NEXT: vpor %xmm6, %xmm2, %xmm2
133 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
134 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
135 ; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm1
136 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
137 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
138 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
139 ; AVX1-NEXT: vpmuludq %xmm3, %xmm4, %xmm3
140 ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
141 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
142 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
143 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
144 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
145 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
146 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
149 ; AVX2-LABEL: var_funnnel_v8i32:
151 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31]
152 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
153 ; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm2
154 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32]
155 ; AVX2-NEXT: vpsubd %ymm1, %ymm3, %ymm1
156 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
157 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
160 ; AVX512F-LABEL: var_funnnel_v8i32:
162 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
163 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
164 ; AVX512F-NEXT: vprolvd %zmm1, %zmm0, %zmm0
165 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
168 ; AVX512VL-LABEL: var_funnnel_v8i32:
170 ; AVX512VL-NEXT: vprolvd %ymm1, %ymm0, %ymm0
171 ; AVX512VL-NEXT: retq
173 ; AVX512BW-LABEL: var_funnnel_v8i32:
175 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
176 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
177 ; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0
178 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
179 ; AVX512BW-NEXT: retq
181 ; AVX512VLBW-LABEL: var_funnnel_v8i32:
182 ; AVX512VLBW: # %bb.0:
183 ; AVX512VLBW-NEXT: vprolvd %ymm1, %ymm0, %ymm0
184 ; AVX512VLBW-NEXT: retq
186 ; XOPAVX1-LABEL: var_funnnel_v8i32:
188 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
189 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
190 ; XOPAVX1-NEXT: vprotd %xmm2, %xmm3, %xmm2
191 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
192 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
195 ; XOPAVX2-LABEL: var_funnnel_v8i32:
197 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
198 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
199 ; XOPAVX2-NEXT: vprotd %xmm2, %xmm3, %xmm2
200 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0
201 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
203 %res = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> %amt)
207 define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
208 ; AVX1-LABEL: var_funnnel_v16i16:
210 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
211 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
212 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
213 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
214 ; AVX1-NEXT: vpslld $23, %xmm4, %xmm4
215 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1065353216,1065353216,1065353216,1065353216]
216 ; AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm4
217 ; AVX1-NEXT: vcvttps2dq %xmm4, %xmm4
218 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
219 ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
220 ; AVX1-NEXT: vpaddd %xmm5, %xmm2, %xmm2
221 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
222 ; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2
223 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
224 ; AVX1-NEXT: vpmulhuw %xmm2, %xmm4, %xmm6
225 ; AVX1-NEXT: vpmullw %xmm2, %xmm4, %xmm2
226 ; AVX1-NEXT: vpor %xmm6, %xmm2, %xmm2
227 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
228 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
229 ; AVX1-NEXT: vpslld $23, %xmm3, %xmm3
230 ; AVX1-NEXT: vpaddd %xmm5, %xmm3, %xmm3
231 ; AVX1-NEXT: vcvttps2dq %xmm3, %xmm3
232 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
233 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
234 ; AVX1-NEXT: vpaddd %xmm5, %xmm1, %xmm1
235 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
236 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
237 ; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm3
238 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
239 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
240 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
243 ; AVX2-LABEL: var_funnnel_v16i16:
245 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
246 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
247 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
248 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
249 ; AVX2-NEXT: vpsllvd %ymm4, %ymm3, %ymm4
250 ; AVX2-NEXT: vpsrld $16, %ymm4, %ymm4
251 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
252 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
253 ; AVX2-NEXT: vpsllvd %ymm5, %ymm0, %ymm5
254 ; AVX2-NEXT: vpsrld $16, %ymm5, %ymm5
255 ; AVX2-NEXT: vpackusdw %ymm4, %ymm5, %ymm4
256 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
257 ; AVX2-NEXT: vpsubw %ymm1, %ymm5, %ymm1
258 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
259 ; AVX2-NEXT: vpsrlvd %ymm5, %ymm3, %ymm3
260 ; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3
261 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
262 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
263 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
264 ; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
265 ; AVX2-NEXT: vpor %ymm0, %ymm4, %ymm0
268 ; AVX512F-LABEL: var_funnnel_v16i16:
270 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
271 ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
272 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
273 ; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm2
274 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
275 ; AVX512F-NEXT: vpsubw %ymm1, %ymm3, %ymm1
276 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
277 ; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
278 ; AVX512F-NEXT: vpord %zmm0, %zmm2, %zmm0
279 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
282 ; AVX512VL-LABEL: var_funnnel_v16i16:
284 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
285 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
286 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
287 ; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm2
288 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
289 ; AVX512VL-NEXT: vpsubw %ymm1, %ymm3, %ymm1
290 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
291 ; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
292 ; AVX512VL-NEXT: vpord %zmm0, %zmm2, %zmm0
293 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
294 ; AVX512VL-NEXT: retq
296 ; AVX512BW-LABEL: var_funnnel_v16i16:
298 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
299 ; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
300 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
301 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
302 ; AVX512BW-NEXT: vpsubw %ymm1, %ymm3, %ymm1
303 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
304 ; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
305 ; AVX512BW-NEXT: retq
307 ; AVX512VLBW-LABEL: var_funnnel_v16i16:
308 ; AVX512VLBW: # %bb.0:
309 ; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
310 ; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm2
311 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
312 ; AVX512VLBW-NEXT: vpsubw %ymm1, %ymm3, %ymm1
313 ; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
314 ; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
315 ; AVX512VLBW-NEXT: retq
317 ; XOPAVX1-LABEL: var_funnnel_v16i16:
319 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
320 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
321 ; XOPAVX1-NEXT: vprotw %xmm2, %xmm3, %xmm2
322 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0
323 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
326 ; XOPAVX2-LABEL: var_funnnel_v16i16:
328 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
329 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
330 ; XOPAVX2-NEXT: vprotw %xmm2, %xmm3, %xmm2
331 ; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0
332 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
334 %res = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> %amt)
338 define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
339 ; AVX1-LABEL: var_funnnel_v32i8:
341 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
342 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3
343 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
344 ; AVX1-NEXT: vpandn %xmm3, %xmm4, %xmm3
345 ; AVX1-NEXT: vpsllw $4, %xmm2, %xmm5
346 ; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm5
347 ; AVX1-NEXT: vpor %xmm3, %xmm5, %xmm3
348 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
349 ; AVX1-NEXT: vpsllw $5, %xmm5, %xmm5
350 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
351 ; AVX1-NEXT: vpsrlw $6, %xmm2, %xmm3
352 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
353 ; AVX1-NEXT: vpandn %xmm3, %xmm6, %xmm3
354 ; AVX1-NEXT: vpsllw $2, %xmm2, %xmm7
355 ; AVX1-NEXT: vpand %xmm6, %xmm7, %xmm7
356 ; AVX1-NEXT: vpor %xmm3, %xmm7, %xmm3
357 ; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5
358 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
359 ; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm3
360 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
361 ; AVX1-NEXT: vpand %xmm8, %xmm3, %xmm3
362 ; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm7
363 ; AVX1-NEXT: vpor %xmm3, %xmm7, %xmm3
364 ; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5
365 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
366 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3
367 ; AVX1-NEXT: vpandn %xmm3, %xmm4, %xmm3
368 ; AVX1-NEXT: vpsllw $4, %xmm0, %xmm5
369 ; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm4
370 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
371 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1
372 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
373 ; AVX1-NEXT: vpsrlw $6, %xmm0, %xmm3
374 ; AVX1-NEXT: vpandn %xmm3, %xmm6, %xmm3
375 ; AVX1-NEXT: vpsllw $2, %xmm0, %xmm4
376 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
377 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
378 ; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1
379 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
380 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm3
381 ; AVX1-NEXT: vpand %xmm8, %xmm3, %xmm3
382 ; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm4
383 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
384 ; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1
385 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
386 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
389 ; AVX2-LABEL: var_funnnel_v32i8:
391 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2
392 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
393 ; AVX2-NEXT: vpsllw $4, %ymm0, %ymm3
394 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
395 ; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
396 ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
397 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
398 ; AVX2-NEXT: vpsrlw $6, %ymm0, %ymm2
399 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
400 ; AVX2-NEXT: vpsllw $2, %ymm0, %ymm3
401 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
402 ; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
403 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
404 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
405 ; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2
406 ; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm3
407 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
408 ; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
409 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
410 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
413 ; AVX512F-LABEL: var_funnnel_v32i8:
415 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2
416 ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
417 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm3
418 ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
419 ; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2
420 ; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1
421 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
422 ; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2
423 ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
424 ; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3
425 ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
426 ; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2
427 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
428 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
429 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm2
430 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm3
431 ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
432 ; AVX512F-NEXT: vpor %ymm3, %ymm2, %ymm2
433 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
434 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
437 ; AVX512VL-LABEL: var_funnnel_v32i8:
439 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
440 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
441 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
442 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
443 ; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2
444 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
445 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
446 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2
447 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
448 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3
449 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
450 ; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2
451 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
452 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
453 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
454 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3
455 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
456 ; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
457 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
458 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
459 ; AVX512VL-NEXT: retq
461 ; AVX512BW-LABEL: var_funnnel_v32i8:
463 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
464 ; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm3
465 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero
466 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
467 ; AVX512BW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3
468 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
469 ; AVX512BW-NEXT: vpsubb %ymm1, %ymm4, %ymm1
470 ; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
471 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
472 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
473 ; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
474 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
475 ; AVX512BW-NEXT: retq
477 ; AVX512VLBW-LABEL: var_funnnel_v32i8:
478 ; AVX512VLBW: # %bb.0:
479 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
480 ; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm3
481 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero
482 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
483 ; AVX512VLBW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3
484 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
485 ; AVX512VLBW-NEXT: vpsubb %ymm1, %ymm4, %ymm1
486 ; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1
487 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
488 ; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
489 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
490 ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
491 ; AVX512VLBW-NEXT: retq
493 ; XOPAVX1-LABEL: var_funnnel_v32i8:
495 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
496 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
497 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm3, %xmm2
498 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0
499 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
502 ; XOPAVX2-LABEL: var_funnnel_v32i8:
504 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
505 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
506 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm3, %xmm2
507 ; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0
508 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
510 %res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> %amt)
515 ; Uniform Variable Shifts
518 define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
519 ; AVX1-LABEL: splatvar_funnnel_v4i64:
521 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
522 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
523 ; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2
524 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
525 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
526 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
527 ; AVX1-NEXT: vpsrlq %xmm2, %xmm4, %xmm5
528 ; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
529 ; AVX1-NEXT: vpsrlq %xmm6, %xmm4, %xmm7
530 ; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7]
531 ; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm2
532 ; AVX1-NEXT: vpsrlq %xmm6, %xmm0, %xmm6
533 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
534 ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2
535 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
536 ; AVX1-NEXT: vpsllq %xmm1, %xmm4, %xmm3
537 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
538 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
539 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
542 ; AVX2-LABEL: splatvar_funnnel_v4i64:
544 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
545 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
546 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
547 ; AVX2-NEXT: vpsllq %xmm3, %ymm0, %ymm3
548 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
549 ; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
550 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
551 ; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
552 ; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
555 ; AVX512F-LABEL: splatvar_funnnel_v4i64:
557 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
558 ; AVX512F-NEXT: vpbroadcastq %xmm1, %ymm1
559 ; AVX512F-NEXT: vprolvq %zmm1, %zmm0, %zmm0
560 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
563 ; AVX512VL-LABEL: splatvar_funnnel_v4i64:
565 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
566 ; AVX512VL-NEXT: vprolvq %ymm1, %ymm0, %ymm0
567 ; AVX512VL-NEXT: retq
569 ; AVX512BW-LABEL: splatvar_funnnel_v4i64:
571 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
572 ; AVX512BW-NEXT: vpbroadcastq %xmm1, %ymm1
573 ; AVX512BW-NEXT: vprolvq %zmm1, %zmm0, %zmm0
574 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
575 ; AVX512BW-NEXT: retq
577 ; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
578 ; AVX512VLBW: # %bb.0:
579 ; AVX512VLBW-NEXT: vpbroadcastq %xmm1, %ymm1
580 ; AVX512VLBW-NEXT: vprolvq %ymm1, %ymm0, %ymm0
581 ; AVX512VLBW-NEXT: retq
583 ; XOPAVX1-LABEL: splatvar_funnnel_v4i64:
585 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
586 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
587 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm2, %xmm2
588 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0
589 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
592 ; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
594 ; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
595 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
596 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
597 ; XOPAVX2-NEXT: vprotq %xmm3, %xmm2, %xmm2
598 ; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0
599 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
601 %splat = shufflevector <4 x i64> %amt, <4 x i64> undef, <4 x i32> zeroinitializer
602 %res = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> %splat)
606 define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind {
607 ; AVX1-LABEL: splatvar_funnnel_v8i32:
609 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
610 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
611 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
612 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
613 ; AVX1-NEXT: vpslld %xmm3, %xmm2, %xmm4
614 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [32,32,32,32]
615 ; AVX1-NEXT: vpsubd %xmm1, %xmm5, %xmm1
616 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
617 ; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
618 ; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
619 ; AVX1-NEXT: vpslld %xmm3, %xmm0, %xmm3
620 ; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
621 ; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
622 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
625 ; AVX2-LABEL: splatvar_funnnel_v8i32:
627 ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
628 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
629 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
630 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
631 ; AVX2-NEXT: vpslld %xmm2, %ymm0, %ymm2
632 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
633 ; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1
634 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
635 ; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
636 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
639 ; AVX512F-LABEL: splatvar_funnnel_v8i32:
641 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
642 ; AVX512F-NEXT: vpbroadcastd %xmm1, %ymm1
643 ; AVX512F-NEXT: vprolvd %zmm1, %zmm0, %zmm0
644 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
647 ; AVX512VL-LABEL: splatvar_funnnel_v8i32:
649 ; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
650 ; AVX512VL-NEXT: vprolvd %ymm1, %ymm0, %ymm0
651 ; AVX512VL-NEXT: retq
653 ; AVX512BW-LABEL: splatvar_funnnel_v8i32:
655 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
656 ; AVX512BW-NEXT: vpbroadcastd %xmm1, %ymm1
657 ; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0
658 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
659 ; AVX512BW-NEXT: retq
661 ; AVX512VLBW-LABEL: splatvar_funnnel_v8i32:
662 ; AVX512VLBW: # %bb.0:
663 ; AVX512VLBW-NEXT: vpbroadcastd %xmm1, %ymm1
664 ; AVX512VLBW-NEXT: vprolvd %ymm1, %ymm0, %ymm0
665 ; AVX512VLBW-NEXT: retq
667 ; XOPAVX1-LABEL: splatvar_funnnel_v8i32:
669 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
670 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
671 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm2, %xmm2
672 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
673 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
676 ; XOPAVX2-LABEL: splatvar_funnnel_v8i32:
678 ; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
679 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
680 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
681 ; XOPAVX2-NEXT: vprotd %xmm3, %xmm2, %xmm2
682 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0
683 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
685 %splat = shufflevector <8 x i32> %amt, <8 x i32> undef, <8 x i32> zeroinitializer
686 %res = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> %splat)
690 define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
691 ; AVX1-LABEL: splatvar_funnnel_v16i16:
693 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
694 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
695 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
696 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
697 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
698 ; AVX1-NEXT: vpsllw %xmm3, %xmm2, %xmm4
699 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
700 ; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1
701 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
702 ; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
703 ; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
704 ; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3
705 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
706 ; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
707 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
710 ; AVX2-LABEL: splatvar_funnnel_v16i16:
712 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
713 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
714 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
715 ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2
716 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
717 ; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
718 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
719 ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
720 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
723 ; AVX512-LABEL: splatvar_funnnel_v16i16:
725 ; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1
726 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
727 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
728 ; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2
729 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
730 ; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1
731 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
732 ; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
733 ; AVX512-NEXT: vpor %ymm0, %ymm2, %ymm0
736 ; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
738 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
739 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
740 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
741 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm2, %xmm2
742 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0
743 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
746 ; XOPAVX2-LABEL: splatvar_funnnel_v16i16:
748 ; XOPAVX2-NEXT: vpbroadcastw %xmm1, %ymm1
749 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
750 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
751 ; XOPAVX2-NEXT: vprotw %xmm3, %xmm2, %xmm2
752 ; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0
753 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
755 %splat = shufflevector <16 x i16> %amt, <16 x i16> undef, <16 x i32> zeroinitializer
756 %res = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> %splat)
760 define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
761 ; AVX1-LABEL: splatvar_funnnel_v32i8:
763 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
764 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
765 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
766 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
767 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
768 ; AVX1-NEXT: vpsllw %xmm3, %xmm4, %xmm5
769 ; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6
770 ; AVX1-NEXT: vpsllw %xmm3, %xmm6, %xmm7
771 ; AVX1-NEXT: vpshufb %xmm2, %xmm7, %xmm2
772 ; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5
773 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
774 ; AVX1-NEXT: vpsubb %xmm1, %xmm7, %xmm1
775 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
776 ; AVX1-NEXT: vpsrlw %xmm1, %xmm4, %xmm4
777 ; AVX1-NEXT: vpsrlw %xmm1, %xmm6, %xmm6
778 ; AVX1-NEXT: vpshufb {{.*#+}} xmm6 = xmm6[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
779 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
780 ; AVX1-NEXT: vpor %xmm4, %xmm5, %xmm4
781 ; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3
782 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
783 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
784 ; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0
785 ; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0
786 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
789 ; AVX2-LABEL: splatvar_funnnel_v32i8:
791 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
792 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
793 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
794 ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm3
795 ; AVX2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
796 ; AVX2-NEXT: vpsllw %xmm2, %xmm4, %xmm2
797 ; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
798 ; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm2
799 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
800 ; AVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1
801 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
802 ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
803 ; AVX2-NEXT: vpsrlw %xmm1, %xmm4, %xmm1
804 ; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
805 ; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
806 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
807 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
810 ; AVX512F-LABEL: splatvar_funnnel_v32i8:
812 ; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1
813 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
814 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
815 ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm3
816 ; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
817 ; AVX512F-NEXT: vpsllw %xmm2, %xmm4, %xmm2
818 ; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
819 ; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm2
820 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
821 ; AVX512F-NEXT: vpsubb %xmm1, %xmm3, %xmm1
822 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
823 ; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
824 ; AVX512F-NEXT: vpsrlw %xmm1, %xmm4, %xmm1
825 ; AVX512F-NEXT: vpsrlw $8, %xmm1, %xmm1
826 ; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1
827 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
828 ; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
831 ; AVX512VL-LABEL: splatvar_funnnel_v32i8:
833 ; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1
834 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
835 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
836 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm3
837 ; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
838 ; AVX512VL-NEXT: vpsllw %xmm2, %xmm4, %xmm2
839 ; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
840 ; AVX512VL-NEXT: vpand %ymm2, %ymm3, %ymm2
841 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
842 ; AVX512VL-NEXT: vpsubb %xmm1, %xmm3, %xmm1
843 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
844 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
845 ; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm1
846 ; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1
847 ; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1
848 ; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm0
849 ; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
850 ; AVX512VL-NEXT: retq
852 ; AVX512BW-LABEL: splatvar_funnnel_v32i8:
854 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
855 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
856 ; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm3
857 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero
858 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
859 ; AVX512BW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3
860 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
861 ; AVX512BW-NEXT: vpsubb %ymm1, %ymm4, %ymm1
862 ; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
863 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
864 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
865 ; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
866 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
867 ; AVX512BW-NEXT: retq
869 ; AVX512VLBW-LABEL: splatvar_funnnel_v32i8:
870 ; AVX512VLBW: # %bb.0:
871 ; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %ymm1
872 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
873 ; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm3
874 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero
875 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
876 ; AVX512VLBW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3
877 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
878 ; AVX512VLBW-NEXT: vpsubb %ymm1, %ymm4, %ymm1
879 ; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1
880 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
881 ; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
882 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
883 ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
884 ; AVX512VLBW-NEXT: retq
886 ; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
888 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
889 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
890 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
891 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm2, %xmm2
892 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0
893 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
896 ; XOPAVX2-LABEL: splatvar_funnnel_v32i8:
898 ; XOPAVX2-NEXT: vpbroadcastb %xmm1, %ymm1
899 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
900 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
901 ; XOPAVX2-NEXT: vprotb %xmm3, %xmm2, %xmm2
902 ; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0
903 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
905 %splat = shufflevector <32 x i8> %amt, <32 x i8> undef, <32 x i32> zeroinitializer
906 %res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> %splat)
914 define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x) nounwind {
915 ; AVX1-LABEL: constant_funnnel_v4i64:
917 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
918 ; AVX1-NEXT: vpsrlq $4, %xmm1, %xmm2
919 ; AVX1-NEXT: vpsrlq $14, %xmm1, %xmm3
920 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
921 ; AVX1-NEXT: vpsrlq $50, %xmm0, %xmm3
922 ; AVX1-NEXT: vpsrlq $60, %xmm0, %xmm4
923 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
924 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
925 ; AVX1-NEXT: vpsllq $60, %xmm1, %xmm3
926 ; AVX1-NEXT: vpsllq $50, %xmm1, %xmm1
927 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
928 ; AVX1-NEXT: vpsllq $14, %xmm0, %xmm3
929 ; AVX1-NEXT: vpsllq $4, %xmm0, %xmm0
930 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
931 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
932 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
935 ; AVX2-LABEL: constant_funnnel_v4i64:
937 ; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm1
938 ; AVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
939 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
942 ; AVX512F-LABEL: constant_funnnel_v4i64:
944 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
945 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4,14,50,60]
946 ; AVX512F-NEXT: vprolvq %zmm1, %zmm0, %zmm0
947 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
950 ; AVX512VL-LABEL: constant_funnnel_v4i64:
952 ; AVX512VL-NEXT: vprolvq {{.*}}(%rip), %ymm0, %ymm0
953 ; AVX512VL-NEXT: retq
955 ; AVX512BW-LABEL: constant_funnnel_v4i64:
957 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
958 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4,14,50,60]
959 ; AVX512BW-NEXT: vprolvq %zmm1, %zmm0, %zmm0
960 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
961 ; AVX512BW-NEXT: retq
963 ; AVX512VLBW-LABEL: constant_funnnel_v4i64:
964 ; AVX512VLBW: # %bb.0:
965 ; AVX512VLBW-NEXT: vprolvq {{.*}}(%rip), %ymm0, %ymm0
966 ; AVX512VLBW-NEXT: retq
968 ; XOPAVX1-LABEL: constant_funnnel_v4i64:
970 ; XOPAVX1-NEXT: vprotq {{.*}}(%rip), %xmm0, %xmm1
971 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
972 ; XOPAVX1-NEXT: vprotq {{.*}}(%rip), %xmm0, %xmm0
973 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
976 ; XOPAVX2-LABEL: constant_funnnel_v4i64:
978 ; XOPAVX2-NEXT: vprotq {{.*}}(%rip), %xmm0, %xmm1
979 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
980 ; XOPAVX2-NEXT: vprotq {{.*}}(%rip), %xmm0, %xmm0
981 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
983 %res = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> <i64 4, i64 14, i64 50, i64 60>)
987 define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x) nounwind {
988 ; AVX1-LABEL: constant_funnnel_v8i32:
990 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [256,512,1024,2048]
991 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
992 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
993 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
994 ; AVX1-NEXT: vpmuludq %xmm2, %xmm4, %xmm2
995 ; AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
996 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
997 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
998 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
999 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1000 ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
1001 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [16,32,64,128]
1002 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1003 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1004 ; AVX1-NEXT: vpmuludq %xmm3, %xmm4, %xmm3
1005 ; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
1006 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1007 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
1008 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
1009 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
1010 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1011 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1014 ; AVX2-LABEL: constant_funnnel_v8i32:
1016 ; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm1
1017 ; AVX2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
1018 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1021 ; AVX512F-LABEL: constant_funnnel_v8i32:
1023 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1024 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11]
1025 ; AVX512F-NEXT: vprolvd %zmm1, %zmm0, %zmm0
1026 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1027 ; AVX512F-NEXT: retq
1029 ; AVX512VL-LABEL: constant_funnnel_v8i32:
1030 ; AVX512VL: # %bb.0:
1031 ; AVX512VL-NEXT: vprolvd {{.*}}(%rip), %ymm0, %ymm0
1032 ; AVX512VL-NEXT: retq
1034 ; AVX512BW-LABEL: constant_funnnel_v8i32:
1035 ; AVX512BW: # %bb.0:
1036 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1037 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11]
1038 ; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0
1039 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1040 ; AVX512BW-NEXT: retq
1042 ; AVX512VLBW-LABEL: constant_funnnel_v8i32:
1043 ; AVX512VLBW: # %bb.0:
1044 ; AVX512VLBW-NEXT: vprolvd {{.*}}(%rip), %ymm0, %ymm0
1045 ; AVX512VLBW-NEXT: retq
1047 ; XOPAVX1-LABEL: constant_funnnel_v8i32:
1049 ; XOPAVX1-NEXT: vprotd {{.*}}(%rip), %xmm0, %xmm1
1050 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1051 ; XOPAVX1-NEXT: vprotd {{.*}}(%rip), %xmm0, %xmm0
1052 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1053 ; XOPAVX1-NEXT: retq
1055 ; XOPAVX2-LABEL: constant_funnnel_v8i32:
1057 ; XOPAVX2-NEXT: vprotd {{.*}}(%rip), %xmm0, %xmm1
1058 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1059 ; XOPAVX2-NEXT: vprotd {{.*}}(%rip), %xmm0, %xmm0
1060 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1061 ; XOPAVX2-NEXT: retq
1062 %res = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>)
1066 define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x) nounwind {
1067 ; AVX1-LABEL: constant_funnnel_v16i16:
1069 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1070 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
1071 ; AVX1-NEXT: vpmulhuw %xmm2, %xmm1, %xmm3
1072 ; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1
1073 ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
1074 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
1075 ; AVX1-NEXT: vpmulhuw %xmm2, %xmm0, %xmm3
1076 ; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0
1077 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
1078 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1081 ; AVX2-LABEL: constant_funnnel_v16i16:
1083 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
1084 ; AVX2-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
1085 ; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
1086 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
1089 ; AVX512F-LABEL: constant_funnnel_v16i16:
1091 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
1092 ; AVX512F-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
1093 ; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0
1094 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
1095 ; AVX512F-NEXT: retq
1097 ; AVX512VL-LABEL: constant_funnnel_v16i16:
1098 ; AVX512VL: # %bb.0:
1099 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
1100 ; AVX512VL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
1101 ; AVX512VL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
1102 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
1103 ; AVX512VL-NEXT: retq
1105 ; AVX512BW-LABEL: constant_funnnel_v16i16:
1106 ; AVX512BW: # %bb.0:
1107 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1108 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
1109 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1
1110 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
1111 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
1112 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
1113 ; AVX512BW-NEXT: retq
1115 ; AVX512VLBW-LABEL: constant_funnnel_v16i16:
1116 ; AVX512VLBW: # %bb.0:
1117 ; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm1
1118 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
1119 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
1120 ; AVX512VLBW-NEXT: retq
1122 ; XOPAVX1-LABEL: constant_funnnel_v16i16:
1124 ; XOPAVX1-NEXT: vprotw {{.*}}(%rip), %xmm0, %xmm1
1125 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1126 ; XOPAVX1-NEXT: vprotw {{.*}}(%rip), %xmm0, %xmm0
1127 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1128 ; XOPAVX1-NEXT: retq
1130 ; XOPAVX2-LABEL: constant_funnnel_v16i16:
1132 ; XOPAVX2-NEXT: vprotw {{.*}}(%rip), %xmm0, %xmm1
1133 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1134 ; XOPAVX2-NEXT: vprotw {{.*}}(%rip), %xmm0, %xmm0
1135 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1136 ; XOPAVX2-NEXT: retq
1137 %res = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
1141 define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind {
1142 ; AVX1-LABEL: constant_funnnel_v32i8:
1144 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1145 ; AVX1-NEXT: vpxor %xmm8, %xmm8, %xmm8
1146 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm8[8],xmm1[9],xmm8[9],xmm1[10],xmm8[10],xmm1[11],xmm8[11],xmm1[12],xmm8[12],xmm1[13],xmm8[13],xmm1[14],xmm8[14],xmm1[15],xmm8[15]
1147 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm9 = [256,128,64,32,16,8,4,2]
1148 ; AVX1-NEXT: vpmullw %xmm9, %xmm3, %xmm3
1149 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
1150 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
1151 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [256,2,4,8,16,32,64,128]
1152 ; AVX1-NEXT: vpmullw %xmm6, %xmm5, %xmm7
1153 ; AVX1-NEXT: vpsrlw $8, %xmm7, %xmm7
1154 ; AVX1-NEXT: vpackuswb %xmm3, %xmm7, %xmm3
1155 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
1156 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [1,128,64,32,16,8,4,2]
1157 ; AVX1-NEXT: vpmullw %xmm7, %xmm1, %xmm1
1158 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1159 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
1160 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
1161 ; AVX1-NEXT: vpmullw %xmm4, %xmm5, %xmm5
1162 ; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5
1163 ; AVX1-NEXT: vpackuswb %xmm1, %xmm5, %xmm1
1164 ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
1165 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm8[8],xmm0[9],xmm8[9],xmm0[10],xmm8[10],xmm0[11],xmm8[11],xmm0[12],xmm8[12],xmm0[13],xmm8[13],xmm0[14],xmm8[14],xmm0[15],xmm8[15]
1166 ; AVX1-NEXT: vpmullw %xmm9, %xmm3, %xmm3
1167 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
1168 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1169 ; AVX1-NEXT: vpmullw %xmm6, %xmm5, %xmm6
1170 ; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6
1171 ; AVX1-NEXT: vpackuswb %xmm3, %xmm6, %xmm3
1172 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1173 ; AVX1-NEXT: vpmullw %xmm7, %xmm0, %xmm0
1174 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1175 ; AVX1-NEXT: vpmullw %xmm4, %xmm5, %xmm4
1176 ; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm2
1177 ; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0
1178 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
1179 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1182 ; AVX2-LABEL: constant_funnnel_v32i8:
1184 ; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1
1185 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
1186 ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
1187 ; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
1188 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
1189 ; AVX2-NEXT: vpsllw $2, %ymm1, %ymm3
1190 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
1191 ; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1192 ; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
1193 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm3
1194 ; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1195 ; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
1196 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1197 ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
1198 ; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm3, %ymm3
1199 ; AVX2-NEXT: vpsrlw $8, %ymm3, %ymm3
1200 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
1201 ; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
1202 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
1203 ; AVX2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
1204 ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
1207 ; AVX512F-LABEL: constant_funnnel_v32i8:
1209 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
1210 ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
1211 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
1212 ; AVX512F-NEXT: # ymm2 = mem[0,1,0,1]
1213 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
1214 ; AVX512F-NEXT: vpsllw $2, %ymm1, %ymm3
1215 ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
1216 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1217 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
1218 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm3
1219 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1220 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
1221 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1222 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
1223 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm3, %ymm3
1224 ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
1225 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
1226 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
1227 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
1228 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
1229 ; AVX512F-NEXT: vpor %ymm0, %ymm1, %ymm0
1230 ; AVX512F-NEXT: retq
1232 ; AVX512VL-LABEL: constant_funnnel_v32i8:
1233 ; AVX512VL: # %bb.0:
1234 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
1235 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
1236 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
1237 ; AVX512VL-NEXT: # ymm2 = mem[0,1,0,1]
1238 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
1239 ; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm3
1240 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
1241 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1242 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
1243 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm3
1244 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1245 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
1246 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1247 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
1248 ; AVX512VL-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm2
1249 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
1250 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1251 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
1252 ; AVX512VL-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
1253 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
1254 ; AVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
1255 ; AVX512VL-NEXT: vpor %ymm0, %ymm1, %ymm0
1256 ; AVX512VL-NEXT: retq
1258 ; AVX512BW-LABEL: constant_funnnel_v32i8:
1259 ; AVX512BW: # %bb.0:
1260 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
1261 ; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
1262 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
1263 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1264 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1265 ; AVX512BW-NEXT: retq
1267 ; AVX512VLBW-LABEL: constant_funnnel_v32i8:
1268 ; AVX512VLBW: # %bb.0:
1269 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
1270 ; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
1271 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
1272 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1273 ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
1274 ; AVX512VLBW-NEXT: retq
1276 ; XOPAVX1-LABEL: constant_funnnel_v32i8:
1278 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1279 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
1280 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm1, %xmm1
1281 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm0, %xmm0
1282 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1283 ; XOPAVX1-NEXT: retq
1285 ; XOPAVX2-LABEL: constant_funnnel_v32i8:
1287 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1288 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
1289 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm1, %xmm1
1290 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm0, %xmm0
1291 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1292 ; XOPAVX2-NEXT: retq
1293 %res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
1298 ; Uniform Constant Shifts
1301 define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x) nounwind {
1302 ; AVX1-LABEL: splatconstant_funnnel_v4i64:
1304 ; AVX1-NEXT: vpsrlq $50, %xmm0, %xmm1
1305 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1306 ; AVX1-NEXT: vpsrlq $50, %xmm2, %xmm3
1307 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
1308 ; AVX1-NEXT: vpsllq $14, %xmm0, %xmm0
1309 ; AVX1-NEXT: vpsllq $14, %xmm2, %xmm2
1310 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1311 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1314 ; AVX2-LABEL: splatconstant_funnnel_v4i64:
1316 ; AVX2-NEXT: vpsrlq $50, %ymm0, %ymm1
1317 ; AVX2-NEXT: vpsllq $14, %ymm0, %ymm0
1318 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1321 ; AVX512F-LABEL: splatconstant_funnnel_v4i64:
1323 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1324 ; AVX512F-NEXT: vprolq $14, %zmm0, %zmm0
1325 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1326 ; AVX512F-NEXT: retq
1328 ; AVX512VL-LABEL: splatconstant_funnnel_v4i64:
1329 ; AVX512VL: # %bb.0:
1330 ; AVX512VL-NEXT: vprolq $14, %ymm0, %ymm0
1331 ; AVX512VL-NEXT: retq
1333 ; AVX512BW-LABEL: splatconstant_funnnel_v4i64:
1334 ; AVX512BW: # %bb.0:
1335 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1336 ; AVX512BW-NEXT: vprolq $14, %zmm0, %zmm0
1337 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1338 ; AVX512BW-NEXT: retq
1340 ; AVX512VLBW-LABEL: splatconstant_funnnel_v4i64:
1341 ; AVX512VLBW: # %bb.0:
1342 ; AVX512VLBW-NEXT: vprolq $14, %ymm0, %ymm0
1343 ; AVX512VLBW-NEXT: retq
1345 ; XOPAVX1-LABEL: splatconstant_funnnel_v4i64:
1347 ; XOPAVX1-NEXT: vprotq $14, %xmm0, %xmm1
1348 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1349 ; XOPAVX1-NEXT: vprotq $14, %xmm0, %xmm0
1350 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1351 ; XOPAVX1-NEXT: retq
1353 ; XOPAVX2-LABEL: splatconstant_funnnel_v4i64:
1355 ; XOPAVX2-NEXT: vprotq $14, %xmm0, %xmm1
1356 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1357 ; XOPAVX2-NEXT: vprotq $14, %xmm0, %xmm0
1358 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1359 ; XOPAVX2-NEXT: retq
1360 %res = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> <i64 14, i64 14, i64 14, i64 14>)
1364 define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x) nounwind {
1365 ; AVX1-LABEL: splatconstant_funnnel_v8i32:
1367 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1368 ; AVX1-NEXT: vpsrld $28, %xmm1, %xmm2
1369 ; AVX1-NEXT: vpslld $4, %xmm1, %xmm1
1370 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1371 ; AVX1-NEXT: vpsrld $28, %xmm0, %xmm2
1372 ; AVX1-NEXT: vpslld $4, %xmm0, %xmm0
1373 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1374 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1377 ; AVX2-LABEL: splatconstant_funnnel_v8i32:
1379 ; AVX2-NEXT: vpsrld $28, %ymm0, %ymm1
1380 ; AVX2-NEXT: vpslld $4, %ymm0, %ymm0
1381 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1384 ; AVX512F-LABEL: splatconstant_funnnel_v8i32:
1386 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1387 ; AVX512F-NEXT: vprold $4, %zmm0, %zmm0
1388 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1389 ; AVX512F-NEXT: retq
1391 ; AVX512VL-LABEL: splatconstant_funnnel_v8i32:
1392 ; AVX512VL: # %bb.0:
1393 ; AVX512VL-NEXT: vprold $4, %ymm0, %ymm0
1394 ; AVX512VL-NEXT: retq
1396 ; AVX512BW-LABEL: splatconstant_funnnel_v8i32:
1397 ; AVX512BW: # %bb.0:
1398 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1399 ; AVX512BW-NEXT: vprold $4, %zmm0, %zmm0
1400 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1401 ; AVX512BW-NEXT: retq
1403 ; AVX512VLBW-LABEL: splatconstant_funnnel_v8i32:
1404 ; AVX512VLBW: # %bb.0:
1405 ; AVX512VLBW-NEXT: vprold $4, %ymm0, %ymm0
1406 ; AVX512VLBW-NEXT: retq
1408 ; XOPAVX1-LABEL: splatconstant_funnnel_v8i32:
1410 ; XOPAVX1-NEXT: vprotd $4, %xmm0, %xmm1
1411 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1412 ; XOPAVX1-NEXT: vprotd $4, %xmm0, %xmm0
1413 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1414 ; XOPAVX1-NEXT: retq
1416 ; XOPAVX2-LABEL: splatconstant_funnnel_v8i32:
1418 ; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm1
1419 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1420 ; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm0
1421 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1422 ; XOPAVX2-NEXT: retq
1423 %res = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>)
1427 define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind {
1428 ; AVX1-LABEL: splatconstant_funnnel_v16i16:
1430 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1431 ; AVX1-NEXT: vpsrlw $9, %xmm1, %xmm2
1432 ; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1
1433 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1434 ; AVX1-NEXT: vpsrlw $9, %xmm0, %xmm2
1435 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1436 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1437 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1440 ; AVX2-LABEL: splatconstant_funnnel_v16i16:
1442 ; AVX2-NEXT: vpsrlw $9, %ymm0, %ymm1
1443 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1444 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1447 ; AVX512-LABEL: splatconstant_funnnel_v16i16:
1449 ; AVX512-NEXT: vpsrlw $9, %ymm0, %ymm1
1450 ; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0
1451 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
1454 ; XOPAVX1-LABEL: splatconstant_funnnel_v16i16:
1456 ; XOPAVX1-NEXT: vprotw $7, %xmm0, %xmm1
1457 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1458 ; XOPAVX1-NEXT: vprotw $7, %xmm0, %xmm0
1459 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1460 ; XOPAVX1-NEXT: retq
1462 ; XOPAVX2-LABEL: splatconstant_funnnel_v16i16:
1464 ; XOPAVX2-NEXT: vprotw $7, %xmm0, %xmm1
1465 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1466 ; XOPAVX2-NEXT: vprotw $7, %xmm0, %xmm0
1467 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1468 ; XOPAVX2-NEXT: retq
1469 %res = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1473 define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
1474 ; AVX1-LABEL: splatconstant_funnnel_v32i8:
1476 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1477 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
1478 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1479 ; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
1480 ; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
1481 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
1482 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1483 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2
1484 ; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
1485 ; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0
1486 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
1487 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1488 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1491 ; AVX2-LABEL: splatconstant_funnnel_v32i8:
1493 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm1
1494 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
1495 ; AVX2-NEXT: vpsllw $4, %ymm0, %ymm0
1496 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
1497 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1500 ; AVX512-LABEL: splatconstant_funnnel_v32i8:
1502 ; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm1
1503 ; AVX512-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
1504 ; AVX512-NEXT: vpsllw $4, %ymm0, %ymm0
1505 ; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
1506 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
1509 ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
1511 ; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm1
1512 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1513 ; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm0
1514 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1515 ; XOPAVX1-NEXT: retq
1517 ; XOPAVX2-LABEL: splatconstant_funnnel_v32i8:
1519 ; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm1
1520 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1521 ; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm0
1522 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1523 ; XOPAVX2-NEXT: retq
1524 %res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)