1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512VBMI2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512VLVBMI2
9 declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
10 declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
11 declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
12 declare <64 x i8> @llvm.fshr.v64i8(<64 x i8>, <64 x i8>, <64 x i8>)
18 define <8 x i64> @var_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
19 ; AVX512F-LABEL: var_funnnel_v8i64:
21 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
22 ; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm4
23 ; AVX512F-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
24 ; AVX512F-NEXT: vpandnq %zmm3, %zmm2, %zmm2
25 ; AVX512F-NEXT: vpsllq $1, %zmm0, %zmm0
26 ; AVX512F-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
27 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
30 ; AVX512VL-LABEL: var_funnnel_v8i64:
32 ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
33 ; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm4
34 ; AVX512VL-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
35 ; AVX512VL-NEXT: vpandnq %zmm3, %zmm2, %zmm2
36 ; AVX512VL-NEXT: vpsllq $1, %zmm0, %zmm0
37 ; AVX512VL-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
38 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
41 ; AVX512BW-LABEL: var_funnnel_v8i64:
43 ; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
44 ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
45 ; AVX512BW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
46 ; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
47 ; AVX512BW-NEXT: vpsllq $1, %zmm0, %zmm0
48 ; AVX512BW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
49 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
52 ; AVX512VBMI2-LABEL: var_funnnel_v8i64:
53 ; AVX512VBMI2: # %bb.0:
54 ; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
55 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
56 ; AVX512VBMI2-NEXT: retq
58 ; AVX512VLBW-LABEL: var_funnnel_v8i64:
59 ; AVX512VLBW: # %bb.0:
60 ; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
61 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
62 ; AVX512VLBW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
63 ; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
64 ; AVX512VLBW-NEXT: vpsllq $1, %zmm0, %zmm0
65 ; AVX512VLBW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
66 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
67 ; AVX512VLBW-NEXT: retq
69 ; AVX512VLVBMI2-LABEL: var_funnnel_v8i64:
70 ; AVX512VLVBMI2: # %bb.0:
71 ; AVX512VLVBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
72 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
73 ; AVX512VLVBMI2-NEXT: retq
74 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt)
78 define <16 x i32> @var_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt) nounwind {
79 ; AVX512F-LABEL: var_funnnel_v16i32:
81 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
82 ; AVX512F-NEXT: vpandd %zmm3, %zmm2, %zmm4
83 ; AVX512F-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
84 ; AVX512F-NEXT: vpandnd %zmm3, %zmm2, %zmm2
85 ; AVX512F-NEXT: vpslld $1, %zmm0, %zmm0
86 ; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm0
87 ; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
90 ; AVX512VL-LABEL: var_funnnel_v16i32:
92 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
93 ; AVX512VL-NEXT: vpandd %zmm3, %zmm2, %zmm4
94 ; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
95 ; AVX512VL-NEXT: vpandnd %zmm3, %zmm2, %zmm2
96 ; AVX512VL-NEXT: vpslld $1, %zmm0, %zmm0
97 ; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm0
98 ; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
101 ; AVX512BW-LABEL: var_funnnel_v16i32:
103 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
104 ; AVX512BW-NEXT: vpandd %zmm3, %zmm2, %zmm4
105 ; AVX512BW-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
106 ; AVX512BW-NEXT: vpandnd %zmm3, %zmm2, %zmm2
107 ; AVX512BW-NEXT: vpslld $1, %zmm0, %zmm0
108 ; AVX512BW-NEXT: vpsllvd %zmm2, %zmm0, %zmm0
109 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
110 ; AVX512BW-NEXT: retq
112 ; AVX512VBMI2-LABEL: var_funnnel_v16i32:
113 ; AVX512VBMI2: # %bb.0:
114 ; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
115 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
116 ; AVX512VBMI2-NEXT: retq
118 ; AVX512VLBW-LABEL: var_funnnel_v16i32:
119 ; AVX512VLBW: # %bb.0:
120 ; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
121 ; AVX512VLBW-NEXT: vpandd %zmm3, %zmm2, %zmm4
122 ; AVX512VLBW-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
123 ; AVX512VLBW-NEXT: vpandnd %zmm3, %zmm2, %zmm2
124 ; AVX512VLBW-NEXT: vpslld $1, %zmm0, %zmm0
125 ; AVX512VLBW-NEXT: vpsllvd %zmm2, %zmm0, %zmm0
126 ; AVX512VLBW-NEXT: vpord %zmm1, %zmm0, %zmm0
127 ; AVX512VLBW-NEXT: retq
129 ; AVX512VLVBMI2-LABEL: var_funnnel_v16i32:
130 ; AVX512VLVBMI2: # %bb.0:
131 ; AVX512VLVBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
132 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
133 ; AVX512VLVBMI2-NEXT: retq
134 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt)
138 define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
139 ; AVX512F-LABEL: var_funnnel_v32i16:
141 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
142 ; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm4
143 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
144 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm6 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
145 ; AVX512F-NEXT: vpsrlvd %zmm5, %zmm6, %zmm5
146 ; AVX512F-NEXT: vpmovdw %zmm5, %ymm5
147 ; AVX512F-NEXT: vextracti64x4 $1, %zmm4, %ymm4
148 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
149 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
150 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
151 ; AVX512F-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
152 ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
153 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm1
154 ; AVX512F-NEXT: vpandnq %zmm3, %zmm2, %zmm2
155 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
156 ; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm4
157 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
158 ; AVX512F-NEXT: vpsllvd %zmm3, %zmm4, %zmm3
159 ; AVX512F-NEXT: vpmovdw %zmm3, %ymm3
160 ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm2
161 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
162 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
163 ; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
164 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
165 ; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm0
166 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
167 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
168 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
171 ; AVX512VL-LABEL: var_funnnel_v32i16:
173 ; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
174 ; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm4
175 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
176 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm6 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
177 ; AVX512VL-NEXT: vpsrlvd %zmm5, %zmm6, %zmm5
178 ; AVX512VL-NEXT: vpmovdw %zmm5, %ymm5
179 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm4, %ymm4
180 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
181 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
182 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
183 ; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
184 ; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1
185 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm1
186 ; AVX512VL-NEXT: vpandnq %zmm3, %zmm2, %zmm2
187 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
188 ; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm4
189 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
190 ; AVX512VL-NEXT: vpsllvd %zmm3, %zmm4, %zmm3
191 ; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3
192 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm2, %ymm2
193 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
194 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
195 ; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
196 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
197 ; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm0
198 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
199 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
200 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
201 ; AVX512VL-NEXT: retq
203 ; AVX512BW-LABEL: var_funnnel_v32i16:
205 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
206 ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
207 ; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
208 ; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
209 ; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0
210 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
211 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
212 ; AVX512BW-NEXT: retq
214 ; AVX512VBMI2-LABEL: var_funnnel_v32i16:
215 ; AVX512VBMI2: # %bb.0:
216 ; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
217 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
218 ; AVX512VBMI2-NEXT: retq
220 ; AVX512VLBW-LABEL: var_funnnel_v32i16:
221 ; AVX512VLBW: # %bb.0:
222 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
223 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
224 ; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
225 ; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
226 ; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0
227 ; AVX512VLBW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
228 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
229 ; AVX512VLBW-NEXT: retq
231 ; AVX512VLVBMI2-LABEL: var_funnnel_v32i16:
232 ; AVX512VLVBMI2: # %bb.0:
233 ; AVX512VLVBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
234 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
235 ; AVX512VLVBMI2-NEXT: retq
236 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt)
240 define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
241 ; AVX512F-LABEL: var_funnnel_v64i8:
243 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
244 ; AVX512F-NEXT: vpsrlw $4, %ymm3, %ymm4
245 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
246 ; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
247 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm6 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
248 ; AVX512F-NEXT: vpandq %zmm6, %zmm2, %zmm7
249 ; AVX512F-NEXT: vextracti64x4 $1, %zmm7, %ymm8
250 ; AVX512F-NEXT: vpsllw $5, %ymm8, %ymm8
251 ; AVX512F-NEXT: vpblendvb %ymm8, %ymm4, %ymm3, %ymm3
252 ; AVX512F-NEXT: vpsrlw $2, %ymm3, %ymm4
253 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm9 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
254 ; AVX512F-NEXT: vpand %ymm4, %ymm9, %ymm4
255 ; AVX512F-NEXT: vpaddb %ymm8, %ymm8, %ymm8
256 ; AVX512F-NEXT: vpblendvb %ymm8, %ymm4, %ymm3, %ymm3
257 ; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm4
258 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm10 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
259 ; AVX512F-NEXT: vpand %ymm4, %ymm10, %ymm4
260 ; AVX512F-NEXT: vpaddb %ymm8, %ymm8, %ymm8
261 ; AVX512F-NEXT: vpblendvb %ymm8, %ymm4, %ymm3, %ymm3
262 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm4
263 ; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
264 ; AVX512F-NEXT: vpsllw $5, %ymm7, %ymm5
265 ; AVX512F-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1
266 ; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm4
267 ; AVX512F-NEXT: vpand %ymm4, %ymm9, %ymm4
268 ; AVX512F-NEXT: vpaddb %ymm5, %ymm5, %ymm5
269 ; AVX512F-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1
270 ; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm4
271 ; AVX512F-NEXT: vpand %ymm4, %ymm10, %ymm4
272 ; AVX512F-NEXT: vpaddb %ymm5, %ymm5, %ymm5
273 ; AVX512F-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1
274 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
275 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
276 ; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm3
277 ; AVX512F-NEXT: vpsllw $4, %ymm3, %ymm4
278 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
279 ; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
280 ; AVX512F-NEXT: vpandnq %zmm6, %zmm2, %zmm2
281 ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm6
282 ; AVX512F-NEXT: vpsllw $5, %ymm6, %ymm6
283 ; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
284 ; AVX512F-NEXT: vpsllw $2, %ymm3, %ymm4
285 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
286 ; AVX512F-NEXT: vpand %ymm7, %ymm4, %ymm4
287 ; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
288 ; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
289 ; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm4
290 ; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
291 ; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
292 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm0
293 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
294 ; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
295 ; AVX512F-NEXT: vpsllw $5, %ymm2, %ymm2
296 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
297 ; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm4
298 ; AVX512F-NEXT: vpand %ymm7, %ymm4, %ymm4
299 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
300 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
301 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm4
302 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
303 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
304 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
305 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
308 ; AVX512VL-LABEL: var_funnnel_v64i8:
310 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
311 ; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm4
312 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
313 ; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
314 ; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm6 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
315 ; AVX512VL-NEXT: vpandq %zmm6, %zmm2, %zmm7
316 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm7, %ymm8
317 ; AVX512VL-NEXT: vpsllw $5, %ymm8, %ymm8
318 ; AVX512VL-NEXT: vpblendvb %ymm8, %ymm4, %ymm3, %ymm3
319 ; AVX512VL-NEXT: vpsrlw $2, %ymm3, %ymm4
320 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
321 ; AVX512VL-NEXT: vpand %ymm4, %ymm9, %ymm4
322 ; AVX512VL-NEXT: vpaddb %ymm8, %ymm8, %ymm8
323 ; AVX512VL-NEXT: vpblendvb %ymm8, %ymm4, %ymm3, %ymm3
324 ; AVX512VL-NEXT: vpsrlw $1, %ymm3, %ymm4
325 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm10 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
326 ; AVX512VL-NEXT: vpand %ymm4, %ymm10, %ymm4
327 ; AVX512VL-NEXT: vpaddb %ymm8, %ymm8, %ymm8
328 ; AVX512VL-NEXT: vpblendvb %ymm8, %ymm4, %ymm3, %ymm3
329 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm4
330 ; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
331 ; AVX512VL-NEXT: vpsllw $5, %ymm7, %ymm5
332 ; AVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1
333 ; AVX512VL-NEXT: vpsrlw $2, %ymm1, %ymm4
334 ; AVX512VL-NEXT: vpand %ymm4, %ymm9, %ymm4
335 ; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
336 ; AVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1
337 ; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm4
338 ; AVX512VL-NEXT: vpand %ymm4, %ymm10, %ymm4
339 ; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
340 ; AVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1
341 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
342 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
343 ; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm3
344 ; AVX512VL-NEXT: vpsllw $4, %ymm3, %ymm4
345 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
346 ; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
347 ; AVX512VL-NEXT: vpandnq %zmm6, %zmm2, %zmm2
348 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm2, %ymm6
349 ; AVX512VL-NEXT: vpsllw $5, %ymm6, %ymm6
350 ; AVX512VL-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
351 ; AVX512VL-NEXT: vpsllw $2, %ymm3, %ymm4
352 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
353 ; AVX512VL-NEXT: vpand %ymm7, %ymm4, %ymm4
354 ; AVX512VL-NEXT: vpaddb %ymm6, %ymm6, %ymm6
355 ; AVX512VL-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
356 ; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm4
357 ; AVX512VL-NEXT: vpaddb %ymm6, %ymm6, %ymm6
358 ; AVX512VL-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
359 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm0
360 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
361 ; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
362 ; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2
363 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
364 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm4
365 ; AVX512VL-NEXT: vpand %ymm7, %ymm4, %ymm4
366 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
367 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
368 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm4
369 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
370 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
371 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
372 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
373 ; AVX512VL-NEXT: retq
375 ; AVX512BW-LABEL: var_funnnel_v64i8:
377 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
378 ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
379 ; AVX512BW-NEXT: vpsllw $5, %zmm4, %zmm4
380 ; AVX512BW-NEXT: vpaddb %zmm4, %zmm4, %zmm5
381 ; AVX512BW-NEXT: vpmovb2m %zmm5, %k1
382 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k2
383 ; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm4
384 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4
385 ; AVX512BW-NEXT: vmovdqu8 %zmm4, %zmm1 {%k2}
386 ; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm4
387 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4
388 ; AVX512BW-NEXT: vmovdqu8 %zmm4, %zmm1 {%k1}
389 ; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm4
390 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4
391 ; AVX512BW-NEXT: vpaddb %zmm5, %zmm5, %zmm5
392 ; AVX512BW-NEXT: vpmovb2m %zmm5, %k1
393 ; AVX512BW-NEXT: vmovdqu8 %zmm4, %zmm1 {%k1}
394 ; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
395 ; AVX512BW-NEXT: vpsllw $5, %zmm2, %zmm2
396 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm3
397 ; AVX512BW-NEXT: vpmovb2m %zmm3, %k1
398 ; AVX512BW-NEXT: vpmovb2m %zmm2, %k2
399 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
400 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
401 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
402 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k2}
403 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
404 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
405 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
406 ; AVX512BW-NEXT: vpaddb %zmm3, %zmm3, %zmm2
407 ; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
408 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
409 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
410 ; AVX512BW-NEXT: retq
412 ; AVX512VBMI2-LABEL: var_funnnel_v64i8:
413 ; AVX512VBMI2: # %bb.0:
414 ; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
415 ; AVX512VBMI2-NEXT: vpandq %zmm3, %zmm2, %zmm4
416 ; AVX512VBMI2-NEXT: vpsllw $5, %zmm4, %zmm4
417 ; AVX512VBMI2-NEXT: vpaddb %zmm4, %zmm4, %zmm5
418 ; AVX512VBMI2-NEXT: vpmovb2m %zmm5, %k1
419 ; AVX512VBMI2-NEXT: vpmovb2m %zmm4, %k2
420 ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm4
421 ; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4
422 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm4, %zmm1 {%k2}
423 ; AVX512VBMI2-NEXT: vpsrlw $2, %zmm1, %zmm4
424 ; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4
425 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm4, %zmm1 {%k1}
426 ; AVX512VBMI2-NEXT: vpsrlw $1, %zmm1, %zmm4
427 ; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4
428 ; AVX512VBMI2-NEXT: vpaddb %zmm5, %zmm5, %zmm5
429 ; AVX512VBMI2-NEXT: vpmovb2m %zmm5, %k1
430 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm4, %zmm1 {%k1}
431 ; AVX512VBMI2-NEXT: vpandnq %zmm3, %zmm2, %zmm2
432 ; AVX512VBMI2-NEXT: vpsllw $5, %zmm2, %zmm2
433 ; AVX512VBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm3
434 ; AVX512VBMI2-NEXT: vpmovb2m %zmm3, %k1
435 ; AVX512VBMI2-NEXT: vpmovb2m %zmm2, %k2
436 ; AVX512VBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0
437 ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
438 ; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
439 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm2, %zmm0 {%k2}
440 ; AVX512VBMI2-NEXT: vpsllw $2, %zmm0, %zmm2
441 ; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
442 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
443 ; AVX512VBMI2-NEXT: vpaddb %zmm3, %zmm3, %zmm2
444 ; AVX512VBMI2-NEXT: vpmovb2m %zmm2, %k1
445 ; AVX512VBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
446 ; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
447 ; AVX512VBMI2-NEXT: retq
449 ; AVX512VLBW-LABEL: var_funnnel_v64i8:
450 ; AVX512VLBW: # %bb.0:
451 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
452 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
453 ; AVX512VLBW-NEXT: vpsllw $5, %zmm4, %zmm4
454 ; AVX512VLBW-NEXT: vpaddb %zmm4, %zmm4, %zmm5
455 ; AVX512VLBW-NEXT: vpmovb2m %zmm5, %k1
456 ; AVX512VLBW-NEXT: vpmovb2m %zmm4, %k2
457 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm4
458 ; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4
459 ; AVX512VLBW-NEXT: vmovdqu8 %zmm4, %zmm1 {%k2}
460 ; AVX512VLBW-NEXT: vpsrlw $2, %zmm1, %zmm4
461 ; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4
462 ; AVX512VLBW-NEXT: vmovdqu8 %zmm4, %zmm1 {%k1}
463 ; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm4
464 ; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4
465 ; AVX512VLBW-NEXT: vpaddb %zmm5, %zmm5, %zmm5
466 ; AVX512VLBW-NEXT: vpmovb2m %zmm5, %k1
467 ; AVX512VLBW-NEXT: vmovdqu8 %zmm4, %zmm1 {%k1}
468 ; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
469 ; AVX512VLBW-NEXT: vpsllw $5, %zmm2, %zmm2
470 ; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm3
471 ; AVX512VLBW-NEXT: vpmovb2m %zmm3, %k1
472 ; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k2
473 ; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
474 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
475 ; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
476 ; AVX512VLBW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k2}
477 ; AVX512VLBW-NEXT: vpsllw $2, %zmm0, %zmm2
478 ; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
479 ; AVX512VLBW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
480 ; AVX512VLBW-NEXT: vpaddb %zmm3, %zmm3, %zmm2
481 ; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
482 ; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
483 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
484 ; AVX512VLBW-NEXT: retq
486 ; AVX512VLVBMI2-LABEL: var_funnnel_v64i8:
487 ; AVX512VLVBMI2: # %bb.0:
488 ; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
489 ; AVX512VLVBMI2-NEXT: vpandq %zmm3, %zmm2, %zmm4
490 ; AVX512VLVBMI2-NEXT: vpsllw $5, %zmm4, %zmm4
491 ; AVX512VLVBMI2-NEXT: vpaddb %zmm4, %zmm4, %zmm5
492 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm5, %k1
493 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm4, %k2
494 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm4
495 ; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4
496 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm4, %zmm1 {%k2}
497 ; AVX512VLVBMI2-NEXT: vpsrlw $2, %zmm1, %zmm4
498 ; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4
499 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm4, %zmm1 {%k1}
500 ; AVX512VLVBMI2-NEXT: vpsrlw $1, %zmm1, %zmm4
501 ; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4
502 ; AVX512VLVBMI2-NEXT: vpaddb %zmm5, %zmm5, %zmm5
503 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm5, %k1
504 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm4, %zmm1 {%k1}
505 ; AVX512VLVBMI2-NEXT: vpandnq %zmm3, %zmm2, %zmm2
506 ; AVX512VLVBMI2-NEXT: vpsllw $5, %zmm2, %zmm2
507 ; AVX512VLVBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm3
508 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm3, %k1
509 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm2, %k2
510 ; AVX512VLVBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0
511 ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
512 ; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
513 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm2, %zmm0 {%k2}
514 ; AVX512VLVBMI2-NEXT: vpsllw $2, %zmm0, %zmm2
515 ; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
516 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
517 ; AVX512VLVBMI2-NEXT: vpaddb %zmm3, %zmm3, %zmm2
518 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm2, %k1
519 ; AVX512VLVBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
520 ; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
521 ; AVX512VLVBMI2-NEXT: retq
522 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
527 ; Uniform Variable Shifts
530 define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
531 ; AVX512F-LABEL: splatvar_funnnel_v8i64:
533 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
534 ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
535 ; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
536 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
537 ; AVX512F-NEXT: vpsllq $1, %zmm0, %zmm0
538 ; AVX512F-NEXT: vpsllq %xmm2, %zmm0, %zmm0
539 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
542 ; AVX512VL-LABEL: splatvar_funnnel_v8i64:
544 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
545 ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
546 ; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
547 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
548 ; AVX512VL-NEXT: vpsllq $1, %zmm0, %zmm0
549 ; AVX512VL-NEXT: vpsllq %xmm2, %zmm0, %zmm0
550 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
551 ; AVX512VL-NEXT: retq
553 ; AVX512BW-LABEL: splatvar_funnnel_v8i64:
555 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
556 ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
557 ; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
558 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
559 ; AVX512BW-NEXT: vpsllq $1, %zmm0, %zmm0
560 ; AVX512BW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
561 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
562 ; AVX512BW-NEXT: retq
564 ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i64:
565 ; AVX512VBMI2: # %bb.0:
566 ; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %zmm2
567 ; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
568 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
569 ; AVX512VBMI2-NEXT: retq
571 ; AVX512VLBW-LABEL: splatvar_funnnel_v8i64:
572 ; AVX512VLBW: # %bb.0:
573 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
574 ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
575 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
576 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
577 ; AVX512VLBW-NEXT: vpsllq $1, %zmm0, %zmm0
578 ; AVX512VLBW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
579 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
580 ; AVX512VLBW-NEXT: retq
582 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i64:
583 ; AVX512VLVBMI2: # %bb.0:
584 ; AVX512VLVBMI2-NEXT: vpbroadcastq %xmm2, %zmm2
585 ; AVX512VLVBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
586 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
587 ; AVX512VLVBMI2-NEXT: retq
588 %splat = shufflevector <8 x i64> %amt, <8 x i64> undef, <8 x i32> zeroinitializer
589 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %splat)
593 define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt) nounwind {
594 ; AVX512F-LABEL: splatvar_funnnel_v16i32:
596 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
597 ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
598 ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
599 ; AVX512F-NEXT: vpsrld %xmm4, %zmm1, %zmm1
600 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
601 ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
602 ; AVX512F-NEXT: vpslld $1, %zmm0, %zmm0
603 ; AVX512F-NEXT: vpslld %xmm2, %zmm0, %zmm0
604 ; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
607 ; AVX512VL-LABEL: splatvar_funnnel_v16i32:
609 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
610 ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
611 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
612 ; AVX512VL-NEXT: vpsrld %xmm4, %zmm1, %zmm1
613 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
614 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
615 ; AVX512VL-NEXT: vpslld $1, %zmm0, %zmm0
616 ; AVX512VL-NEXT: vpslld %xmm2, %zmm0, %zmm0
617 ; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
618 ; AVX512VL-NEXT: retq
620 ; AVX512BW-LABEL: splatvar_funnnel_v16i32:
622 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
623 ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
624 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
625 ; AVX512BW-NEXT: vpsrld %xmm4, %zmm1, %zmm1
626 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
627 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
628 ; AVX512BW-NEXT: vpslld $1, %zmm0, %zmm0
629 ; AVX512BW-NEXT: vpslld %xmm2, %zmm0, %zmm0
630 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
631 ; AVX512BW-NEXT: retq
633 ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i32:
634 ; AVX512VBMI2: # %bb.0:
635 ; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %zmm2
636 ; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
637 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
638 ; AVX512VBMI2-NEXT: retq
640 ; AVX512VLBW-LABEL: splatvar_funnnel_v16i32:
641 ; AVX512VLBW: # %bb.0:
642 ; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
643 ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
644 ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
645 ; AVX512VLBW-NEXT: vpsrld %xmm4, %zmm1, %zmm1
646 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
647 ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
648 ; AVX512VLBW-NEXT: vpslld $1, %zmm0, %zmm0
649 ; AVX512VLBW-NEXT: vpslld %xmm2, %zmm0, %zmm0
650 ; AVX512VLBW-NEXT: vpord %zmm1, %zmm0, %zmm0
651 ; AVX512VLBW-NEXT: retq
653 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i32:
654 ; AVX512VLVBMI2: # %bb.0:
655 ; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm2, %zmm2
656 ; AVX512VLVBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
657 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
658 ; AVX512VLVBMI2-NEXT: retq
659 %splat = shufflevector <16 x i32> %amt, <16 x i32> undef, <16 x i32> zeroinitializer
660 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %splat)
664 define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
665 ; AVX512F-LABEL: splatvar_funnnel_v32i16:
667 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
668 ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
669 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
670 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5
671 ; AVX512F-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
672 ; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
673 ; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
674 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
675 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
676 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
677 ; AVX512F-NEXT: vpsllw $1, %ymm3, %ymm3
678 ; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3
679 ; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
680 ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
681 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
682 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
685 ; AVX512VL-LABEL: splatvar_funnnel_v32i16:
687 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
688 ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
689 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
690 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm5
691 ; AVX512VL-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
692 ; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
693 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
694 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
695 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
696 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
697 ; AVX512VL-NEXT: vpsllw $1, %ymm3, %ymm3
698 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3
699 ; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
700 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
701 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
702 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
703 ; AVX512VL-NEXT: retq
705 ; AVX512BW-LABEL: splatvar_funnnel_v32i16:
707 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
708 ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
709 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
710 ; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
711 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
712 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
713 ; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0
714 ; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
715 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
716 ; AVX512BW-NEXT: retq
718 ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16:
719 ; AVX512VBMI2: # %bb.0:
720 ; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %zmm2
721 ; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
722 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
723 ; AVX512VBMI2-NEXT: retq
725 ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
726 ; AVX512VLBW: # %bb.0:
727 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
728 ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
729 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
730 ; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
731 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
732 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
733 ; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0
734 ; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
735 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
736 ; AVX512VLBW-NEXT: retq
738 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i16:
739 ; AVX512VLVBMI2: # %bb.0:
740 ; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm2, %zmm2
741 ; AVX512VLVBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
742 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
743 ; AVX512VLVBMI2-NEXT: retq
744 %splat = shufflevector <32 x i16> %amt, <32 x i16> undef, <32 x i32> zeroinitializer
745 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %splat)
749 define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
750 ; AVX512F-LABEL: splatvar_funnnel_v64i8:
752 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
753 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
754 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
755 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
756 ; AVX512F-NEXT: vpaddb %ymm5, %ymm5, %ymm5
757 ; AVX512F-NEXT: vpsllw %xmm4, %ymm5, %ymm5
758 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm0
759 ; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
760 ; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
761 ; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
762 ; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4
763 ; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
764 ; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
765 ; AVX512F-NEXT: vpandq %zmm4, %zmm0, %zmm4
766 ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm0
767 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
768 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
769 ; AVX512F-NEXT: vpsrlw %xmm0, %ymm2, %ymm2
770 ; AVX512F-NEXT: vpsrlw %xmm0, %ymm1, %ymm1
771 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
772 ; AVX512F-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
773 ; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0
774 ; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
775 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
776 ; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
779 ; AVX512VL-LABEL: splatvar_funnnel_v64i8:
781 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
782 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
783 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
784 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm5
785 ; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
786 ; AVX512VL-NEXT: vpsllw %xmm4, %ymm5, %ymm5
787 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm0
788 ; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
789 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
790 ; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
791 ; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4
792 ; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
793 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
794 ; AVX512VL-NEXT: vpandq %zmm4, %zmm0, %zmm4
795 ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm0
796 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
797 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
798 ; AVX512VL-NEXT: vpsrlw %xmm0, %ymm2, %ymm2
799 ; AVX512VL-NEXT: vpsrlw %xmm0, %ymm1, %ymm1
800 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
801 ; AVX512VL-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
802 ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
803 ; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
804 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
805 ; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
806 ; AVX512VL-NEXT: retq
808 ; AVX512BW-LABEL: splatvar_funnnel_v64i8:
810 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
811 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
812 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
813 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
814 ; AVX512BW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
815 ; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
816 ; AVX512BW-NEXT: vpsllw %xmm4, %xmm5, %xmm4
817 ; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4
818 ; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm4
819 ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm0
820 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
821 ; AVX512BW-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
822 ; AVX512BW-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
823 ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0
824 ; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0
825 ; AVX512BW-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
826 ; AVX512BW-NEXT: retq
828 ; AVX512VBMI2-LABEL: splatvar_funnnel_v64i8:
829 ; AVX512VBMI2: # %bb.0:
830 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
831 ; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
832 ; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
833 ; AVX512VBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0
834 ; AVX512VBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0
835 ; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
836 ; AVX512VBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
837 ; AVX512VBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
838 ; AVX512VBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm4
839 ; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm0
840 ; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
841 ; AVX512VBMI2-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
842 ; AVX512VBMI2-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
843 ; AVX512VBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
844 ; AVX512VBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
845 ; AVX512VBMI2-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
846 ; AVX512VBMI2-NEXT: retq
848 ; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
849 ; AVX512VLBW: # %bb.0:
850 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
851 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
852 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
853 ; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
854 ; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
855 ; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
856 ; AVX512VLBW-NEXT: vpsllw %xmm4, %xmm5, %xmm4
857 ; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4
858 ; AVX512VLBW-NEXT: vpandq %zmm4, %zmm0, %zmm4
859 ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm0
860 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
861 ; AVX512VLBW-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
862 ; AVX512VLBW-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
863 ; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0
864 ; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0
865 ; AVX512VLBW-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
866 ; AVX512VLBW-NEXT: retq
868 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v64i8:
869 ; AVX512VLVBMI2: # %bb.0:
870 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
871 ; AVX512VLVBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
872 ; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
873 ; AVX512VLVBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0
874 ; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0
875 ; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
876 ; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
877 ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
878 ; AVX512VLVBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm4
879 ; AVX512VLVBMI2-NEXT: vpand %xmm3, %xmm2, %xmm0
880 ; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
881 ; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
882 ; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
883 ; AVX512VLVBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
884 ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
885 ; AVX512VLVBMI2-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
886 ; AVX512VLVBMI2-NEXT: retq
887 %splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer
888 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %splat)
896 define <8 x i64> @constant_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
897 ; AVX512F-LABEL: constant_funnnel_v8i64:
899 ; AVX512F-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
900 ; AVX512F-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
901 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
904 ; AVX512VL-LABEL: constant_funnnel_v8i64:
906 ; AVX512VL-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
907 ; AVX512VL-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
908 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
909 ; AVX512VL-NEXT: retq
911 ; AVX512BW-LABEL: constant_funnnel_v8i64:
913 ; AVX512BW-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
914 ; AVX512BW-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
915 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
916 ; AVX512BW-NEXT: retq
918 ; AVX512VBMI2-LABEL: constant_funnnel_v8i64:
919 ; AVX512VBMI2: # %bb.0:
920 ; AVX512VBMI2-NEXT: vpshrdvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
921 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
922 ; AVX512VBMI2-NEXT: retq
924 ; AVX512VLBW-LABEL: constant_funnnel_v8i64:
925 ; AVX512VLBW: # %bb.0:
926 ; AVX512VLBW-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
927 ; AVX512VLBW-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
928 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
929 ; AVX512VLBW-NEXT: retq
931 ; AVX512VLVBMI2-LABEL: constant_funnnel_v8i64:
932 ; AVX512VLVBMI2: # %bb.0:
933 ; AVX512VLVBMI2-NEXT: vpshrdvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
934 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
935 ; AVX512VLVBMI2-NEXT: retq
936 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> <i64 4, i64 14, i64 50, i64 60, i64 4, i64 14, i64 50, i64 60>)
940 define <16 x i32> @constant_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
941 ; AVX512F-LABEL: constant_funnnel_v16i32:
943 ; AVX512F-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
944 ; AVX512F-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
945 ; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
948 ; AVX512VL-LABEL: constant_funnnel_v16i32:
950 ; AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
951 ; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
952 ; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
953 ; AVX512VL-NEXT: retq
955 ; AVX512BW-LABEL: constant_funnnel_v16i32:
957 ; AVX512BW-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
958 ; AVX512BW-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
959 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
960 ; AVX512BW-NEXT: retq
962 ; AVX512VBMI2-LABEL: constant_funnnel_v16i32:
963 ; AVX512VBMI2: # %bb.0:
964 ; AVX512VBMI2-NEXT: vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
965 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
966 ; AVX512VBMI2-NEXT: retq
968 ; AVX512VLBW-LABEL: constant_funnnel_v16i32:
969 ; AVX512VLBW: # %bb.0:
970 ; AVX512VLBW-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
971 ; AVX512VLBW-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
972 ; AVX512VLBW-NEXT: vpord %zmm1, %zmm0, %zmm0
973 ; AVX512VLBW-NEXT: retq
975 ; AVX512VLVBMI2-LABEL: constant_funnnel_v16i32:
976 ; AVX512VLVBMI2: # %bb.0:
977 ; AVX512VLVBMI2-NEXT: vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
978 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
979 ; AVX512VLVBMI2-NEXT: retq
980 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>)
984 define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
985 ; AVX512F-LABEL: constant_funnnel_v32i16:
987 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
988 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
989 ; AVX512F-NEXT: vpmulhuw %ymm3, %ymm2, %ymm4
990 ; AVX512F-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3,4,5,6,7]
991 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm4[4,5,6,7]
992 ; AVX512F-NEXT: vpmulhuw %ymm3, %ymm1, %ymm3
993 ; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3,4,5,6,7]
994 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
995 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
996 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
997 ; AVX512F-NEXT: vpsllw $1, %ymm2, %ymm2
998 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1]
999 ; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
1000 ; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
1001 ; AVX512F-NEXT: vpmullw %ymm3, %ymm0, %ymm0
1002 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1003 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
1004 ; AVX512F-NEXT: retq
1006 ; AVX512VL-LABEL: constant_funnnel_v32i16:
1007 ; AVX512VL: # %bb.0:
1008 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
1009 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
1010 ; AVX512VL-NEXT: vpmulhuw %ymm3, %ymm2, %ymm4
1011 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3,4,5,6,7]
1012 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm4[4,5,6,7]
1013 ; AVX512VL-NEXT: vpmulhuw %ymm3, %ymm1, %ymm3
1014 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3,4,5,6,7]
1015 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
1016 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
1017 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1018 ; AVX512VL-NEXT: vpsllw $1, %ymm2, %ymm2
1019 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1]
1020 ; AVX512VL-NEXT: vpmullw %ymm3, %ymm2, %ymm2
1021 ; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
1022 ; AVX512VL-NEXT: vpmullw %ymm3, %ymm0, %ymm0
1023 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1024 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
1025 ; AVX512VL-NEXT: retq
1027 ; AVX512BW-LABEL: constant_funnnel_v32i16:
1028 ; AVX512BW: # %bb.0:
1029 ; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
1030 ; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0
1031 ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1032 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1033 ; AVX512BW-NEXT: retq
1035 ; AVX512VBMI2-LABEL: constant_funnnel_v32i16:
1036 ; AVX512VBMI2: # %bb.0:
1037 ; AVX512VBMI2-NEXT: vpshrdvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
1038 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
1039 ; AVX512VBMI2-NEXT: retq
1041 ; AVX512VLBW-LABEL: constant_funnnel_v32i16:
1042 ; AVX512VLBW: # %bb.0:
1043 ; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
1044 ; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0
1045 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1046 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1047 ; AVX512VLBW-NEXT: retq
1049 ; AVX512VLVBMI2-LABEL: constant_funnnel_v32i16:
1050 ; AVX512VLVBMI2: # %bb.0:
1051 ; AVX512VLVBMI2-NEXT: vpshrdvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
1052 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
1053 ; AVX512VLVBMI2-NEXT: retq
1054 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
1058 define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
1059 ; AVX512F-LABEL: constant_funnnel_v64i8:
1061 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1062 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1063 ; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
1064 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1065 ; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3
1066 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312]
1067 ; AVX512F-NEXT: # ymm5 = mem[0,1,0,1]
1068 ; AVX512F-NEXT: vpblendvb %ymm5, %ymm3, %ymm2, %ymm2
1069 ; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm3
1070 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
1071 ; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
1072 ; AVX512F-NEXT: vpaddb %ymm5, %ymm5, %ymm7
1073 ; AVX512F-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
1074 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm3
1075 ; AVX512F-NEXT: vpaddb %ymm7, %ymm7, %ymm8
1076 ; AVX512F-NEXT: vpblendvb %ymm8, %ymm3, %ymm2, %ymm2
1077 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm0
1078 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm3
1079 ; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3
1080 ; AVX512F-NEXT: vpblendvb %ymm5, %ymm3, %ymm0, %ymm0
1081 ; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3
1082 ; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
1083 ; AVX512F-NEXT: vpblendvb %ymm7, %ymm3, %ymm0, %ymm0
1084 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm3
1085 ; AVX512F-NEXT: vpblendvb %ymm8, %ymm3, %ymm0, %ymm0
1086 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1087 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
1088 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
1089 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm3[8],ymm2[9],ymm3[9],ymm2[10],ymm3[10],ymm2[11],ymm3[11],ymm2[12],ymm3[12],ymm2[13],ymm3[13],ymm2[14],ymm3[14],ymm2[15],ymm3[15],ymm2[24],ymm3[24],ymm2[25],ymm3[25],ymm2[26],ymm3[26],ymm2[27],ymm3[27],ymm2[28],ymm3[28],ymm2[29],ymm3[29],ymm2[30],ymm3[30],ymm2[31],ymm3[31]
1090 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128]
1091 ; AVX512F-NEXT: # ymm5 = mem[0,1,0,1]
1092 ; AVX512F-NEXT: vpmullw %ymm5, %ymm4, %ymm4
1093 ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
1094 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm3[0],ymm2[1],ymm3[1],ymm2[2],ymm3[2],ymm2[3],ymm3[3],ymm2[4],ymm3[4],ymm2[5],ymm3[5],ymm2[6],ymm3[6],ymm2[7],ymm3[7],ymm2[16],ymm3[16],ymm2[17],ymm3[17],ymm2[18],ymm3[18],ymm2[19],ymm3[19],ymm2[20],ymm3[20],ymm2[21],ymm3[21],ymm2[22],ymm3[22],ymm2[23],ymm3[23]
1095 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2]
1096 ; AVX512F-NEXT: # ymm6 = mem[0,1,0,1]
1097 ; AVX512F-NEXT: vpmullw %ymm6, %ymm2, %ymm2
1098 ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
1099 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm2, %ymm2
1100 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm3[8],ymm1[9],ymm3[9],ymm1[10],ymm3[10],ymm1[11],ymm3[11],ymm1[12],ymm3[12],ymm1[13],ymm3[13],ymm1[14],ymm3[14],ymm1[15],ymm3[15],ymm1[24],ymm3[24],ymm1[25],ymm3[25],ymm1[26],ymm3[26],ymm1[27],ymm3[27],ymm1[28],ymm3[28],ymm1[29],ymm3[29],ymm1[30],ymm3[30],ymm1[31],ymm3[31]
1101 ; AVX512F-NEXT: vpmullw %ymm5, %ymm4, %ymm4
1102 ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
1103 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[4],ymm3[4],ymm1[5],ymm3[5],ymm1[6],ymm3[6],ymm1[7],ymm3[7],ymm1[16],ymm3[16],ymm1[17],ymm3[17],ymm1[18],ymm3[18],ymm1[19],ymm3[19],ymm1[20],ymm3[20],ymm1[21],ymm3[21],ymm1[22],ymm3[22],ymm1[23],ymm3[23]
1104 ; AVX512F-NEXT: vpmullw %ymm6, %ymm1, %ymm1
1105 ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
1106 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm1, %ymm1
1107 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
1108 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
1109 ; AVX512F-NEXT: retq
1111 ; AVX512VL-LABEL: constant_funnnel_v64i8:
1112 ; AVX512VL: # %bb.0:
1113 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1114 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
1115 ; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
1116 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1117 ; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3
1118 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312]
1119 ; AVX512VL-NEXT: # ymm5 = mem[0,1,0,1]
1120 ; AVX512VL-NEXT: vpblendvb %ymm5, %ymm3, %ymm2, %ymm2
1121 ; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm3
1122 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
1123 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
1124 ; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm7
1125 ; AVX512VL-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
1126 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm3
1127 ; AVX512VL-NEXT: vpaddb %ymm7, %ymm7, %ymm8
1128 ; AVX512VL-NEXT: vpblendvb %ymm8, %ymm3, %ymm2, %ymm2
1129 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm0
1130 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
1131 ; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3
1132 ; AVX512VL-NEXT: vpblendvb %ymm5, %ymm3, %ymm0, %ymm0
1133 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3
1134 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
1135 ; AVX512VL-NEXT: vpblendvb %ymm7, %ymm3, %ymm0, %ymm0
1136 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm3
1137 ; AVX512VL-NEXT: vpblendvb %ymm8, %ymm3, %ymm0, %ymm0
1138 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1139 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
1140 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
1141 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm3[8],ymm2[9],ymm3[9],ymm2[10],ymm3[10],ymm2[11],ymm3[11],ymm2[12],ymm3[12],ymm2[13],ymm3[13],ymm2[14],ymm3[14],ymm2[15],ymm3[15],ymm2[24],ymm3[24],ymm2[25],ymm3[25],ymm2[26],ymm3[26],ymm2[27],ymm3[27],ymm2[28],ymm3[28],ymm2[29],ymm3[29],ymm2[30],ymm3[30],ymm2[31],ymm3[31]
1142 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128]
1143 ; AVX512VL-NEXT: # ymm5 = mem[0,1,0,1]
1144 ; AVX512VL-NEXT: vpmullw %ymm5, %ymm4, %ymm4
1145 ; AVX512VL-NEXT: vpsrlw $8, %ymm4, %ymm4
1146 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm3[0],ymm2[1],ymm3[1],ymm2[2],ymm3[2],ymm2[3],ymm3[3],ymm2[4],ymm3[4],ymm2[5],ymm3[5],ymm2[6],ymm3[6],ymm2[7],ymm3[7],ymm2[16],ymm3[16],ymm2[17],ymm3[17],ymm2[18],ymm3[18],ymm2[19],ymm3[19],ymm2[20],ymm3[20],ymm2[21],ymm3[21],ymm2[22],ymm3[22],ymm2[23],ymm3[23]
1147 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2]
1148 ; AVX512VL-NEXT: # ymm6 = mem[0,1,0,1]
1149 ; AVX512VL-NEXT: vpmullw %ymm6, %ymm2, %ymm2
1150 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
1151 ; AVX512VL-NEXT: vpackuswb %ymm4, %ymm2, %ymm2
1152 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm3[8],ymm1[9],ymm3[9],ymm1[10],ymm3[10],ymm1[11],ymm3[11],ymm1[12],ymm3[12],ymm1[13],ymm3[13],ymm1[14],ymm3[14],ymm1[15],ymm3[15],ymm1[24],ymm3[24],ymm1[25],ymm3[25],ymm1[26],ymm3[26],ymm1[27],ymm3[27],ymm1[28],ymm3[28],ymm1[29],ymm3[29],ymm1[30],ymm3[30],ymm1[31],ymm3[31]
1153 ; AVX512VL-NEXT: vpmullw %ymm5, %ymm4, %ymm4
1154 ; AVX512VL-NEXT: vpsrlw $8, %ymm4, %ymm4
1155 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[4],ymm3[4],ymm1[5],ymm3[5],ymm1[6],ymm3[6],ymm1[7],ymm3[7],ymm1[16],ymm3[16],ymm1[17],ymm3[17],ymm1[18],ymm3[18],ymm1[19],ymm3[19],ymm1[20],ymm3[20],ymm1[21],ymm3[21],ymm1[22],ymm3[22],ymm1[23],ymm3[23]
1156 ; AVX512VL-NEXT: vpmullw %ymm6, %ymm1, %ymm1
1157 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
1158 ; AVX512VL-NEXT: vpackuswb %ymm4, %ymm1, %ymm1
1159 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
1160 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
1161 ; AVX512VL-NEXT: retq
1163 ; AVX512BW-LABEL: constant_funnnel_v64i8:
1164 ; AVX512BW: # %bb.0:
1165 ; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312]
1166 ; AVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1167 ; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
1168 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
1169 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm3
1170 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
1171 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1172 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm3
1173 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
1174 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1175 ; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
1176 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1177 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1178 ; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
1179 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
1180 ; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
1181 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
1182 ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
1183 ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3
1184 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
1185 ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
1186 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
1187 ; AVX512BW-NEXT: vpackuswb %zmm3, %zmm1, %zmm1
1188 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1189 ; AVX512BW-NEXT: retq
1191 ; AVX512VBMI2-LABEL: constant_funnnel_v64i8:
1192 ; AVX512VBMI2: # %bb.0:
1193 ; AVX512VBMI2-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312]
1194 ; AVX512VBMI2-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1195 ; AVX512VBMI2-NEXT: vpmovb2m %zmm2, %k1
1196 ; AVX512VBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0
1197 ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm3
1198 ; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
1199 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1200 ; AVX512VBMI2-NEXT: vpsllw $2, %zmm0, %zmm3
1201 ; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
1202 ; AVX512VBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1203 ; AVX512VBMI2-NEXT: vpmovb2m %zmm2, %k1
1204 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1205 ; AVX512VBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1206 ; AVX512VBMI2-NEXT: vpmovb2m %zmm2, %k1
1207 ; AVX512VBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
1208 ; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1209 ; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
1210 ; AVX512VBMI2-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
1211 ; AVX512VBMI2-NEXT: vpsrlw $8, %zmm3, %zmm3
1212 ; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
1213 ; AVX512VBMI2-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
1214 ; AVX512VBMI2-NEXT: vpsrlw $8, %zmm1, %zmm1
1215 ; AVX512VBMI2-NEXT: vpackuswb %zmm3, %zmm1, %zmm1
1216 ; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
1217 ; AVX512VBMI2-NEXT: retq
1219 ; AVX512VLBW-LABEL: constant_funnnel_v64i8:
1220 ; AVX512VLBW: # %bb.0:
1221 ; AVX512VLBW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312]
1222 ; AVX512VLBW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1223 ; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
1224 ; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
1225 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm3
1226 ; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
1227 ; AVX512VLBW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1228 ; AVX512VLBW-NEXT: vpsllw $2, %zmm0, %zmm3
1229 ; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
1230 ; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1231 ; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
1232 ; AVX512VLBW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1233 ; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1234 ; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
1235 ; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
1236 ; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
1237 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
1238 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
1239 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm3, %zmm3
1240 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
1241 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
1242 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
1243 ; AVX512VLBW-NEXT: vpackuswb %zmm3, %zmm1, %zmm1
1244 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1245 ; AVX512VLBW-NEXT: retq
1247 ; AVX512VLVBMI2-LABEL: constant_funnnel_v64i8:
1248 ; AVX512VLVBMI2: # %bb.0:
1249 ; AVX512VLVBMI2-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312,49376,32928,16480,32,224,16416,32864,49312]
1250 ; AVX512VLVBMI2-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1251 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm2, %k1
1252 ; AVX512VLVBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0
1253 ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm3
1254 ; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
1255 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1256 ; AVX512VLVBMI2-NEXT: vpsllw $2, %zmm0, %zmm3
1257 ; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
1258 ; AVX512VLVBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1259 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm2, %k1
1260 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1261 ; AVX512VLVBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1262 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm2, %k1
1263 ; AVX512VLVBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
1264 ; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1265 ; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
1266 ; AVX512VLVBMI2-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
1267 ; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm3, %zmm3
1268 ; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
1269 ; AVX512VLVBMI2-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
1270 ; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm1, %zmm1
1271 ; AVX512VLVBMI2-NEXT: vpackuswb %zmm3, %zmm1, %zmm1
1272 ; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
1273 ; AVX512VLVBMI2-NEXT: retq
1274 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
1279 ; Uniform Constant Shifts
1282 define <8 x i64> @splatconstant_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
1283 ; AVX512F-LABEL: splatconstant_funnnel_v8i64:
1285 ; AVX512F-NEXT: vpsrlq $14, %zmm1, %zmm1
1286 ; AVX512F-NEXT: vpsllq $50, %zmm0, %zmm0
1287 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
1288 ; AVX512F-NEXT: retq
1290 ; AVX512VL-LABEL: splatconstant_funnnel_v8i64:
1291 ; AVX512VL: # %bb.0:
1292 ; AVX512VL-NEXT: vpsrlq $14, %zmm1, %zmm1
1293 ; AVX512VL-NEXT: vpsllq $50, %zmm0, %zmm0
1294 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
1295 ; AVX512VL-NEXT: retq
1297 ; AVX512BW-LABEL: splatconstant_funnnel_v8i64:
1298 ; AVX512BW: # %bb.0:
1299 ; AVX512BW-NEXT: vpsrlq $14, %zmm1, %zmm1
1300 ; AVX512BW-NEXT: vpsllq $50, %zmm0, %zmm0
1301 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1302 ; AVX512BW-NEXT: retq
1304 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i64:
1305 ; AVX512VBMI2: # %bb.0:
1306 ; AVX512VBMI2-NEXT: vpshrdq $14, %zmm0, %zmm1, %zmm0
1307 ; AVX512VBMI2-NEXT: retq
1309 ; AVX512VLBW-LABEL: splatconstant_funnnel_v8i64:
1310 ; AVX512VLBW: # %bb.0:
1311 ; AVX512VLBW-NEXT: vpsrlq $14, %zmm1, %zmm1
1312 ; AVX512VLBW-NEXT: vpsllq $50, %zmm0, %zmm0
1313 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1314 ; AVX512VLBW-NEXT: retq
1316 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i64:
1317 ; AVX512VLVBMI2: # %bb.0:
1318 ; AVX512VLVBMI2-NEXT: vpshrdq $14, %zmm0, %zmm1, %zmm0
1319 ; AVX512VLVBMI2-NEXT: retq
1320 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> <i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14>)
1324 define <16 x i32> @splatconstant_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
1325 ; AVX512F-LABEL: splatconstant_funnnel_v16i32:
1327 ; AVX512F-NEXT: vpsrld $4, %zmm1, %zmm1
1328 ; AVX512F-NEXT: vpslld $28, %zmm0, %zmm0
1329 ; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
1330 ; AVX512F-NEXT: retq
1332 ; AVX512VL-LABEL: splatconstant_funnnel_v16i32:
1333 ; AVX512VL: # %bb.0:
1334 ; AVX512VL-NEXT: vpsrld $4, %zmm1, %zmm1
1335 ; AVX512VL-NEXT: vpslld $28, %zmm0, %zmm0
1336 ; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
1337 ; AVX512VL-NEXT: retq
1339 ; AVX512BW-LABEL: splatconstant_funnnel_v16i32:
1340 ; AVX512BW: # %bb.0:
1341 ; AVX512BW-NEXT: vpsrld $4, %zmm1, %zmm1
1342 ; AVX512BW-NEXT: vpslld $28, %zmm0, %zmm0
1343 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
1344 ; AVX512BW-NEXT: retq
1346 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i32:
1347 ; AVX512VBMI2: # %bb.0:
1348 ; AVX512VBMI2-NEXT: vpshrdd $4, %zmm0, %zmm1, %zmm0
1349 ; AVX512VBMI2-NEXT: retq
1351 ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i32:
1352 ; AVX512VLBW: # %bb.0:
1353 ; AVX512VLBW-NEXT: vpsrld $4, %zmm1, %zmm1
1354 ; AVX512VLBW-NEXT: vpslld $28, %zmm0, %zmm0
1355 ; AVX512VLBW-NEXT: vpord %zmm1, %zmm0, %zmm0
1356 ; AVX512VLBW-NEXT: retq
1358 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i32:
1359 ; AVX512VLVBMI2: # %bb.0:
1360 ; AVX512VLVBMI2-NEXT: vpshrdd $4, %zmm0, %zmm1, %zmm0
1361 ; AVX512VLVBMI2-NEXT: retq
1362 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>)
1366 define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
1367 ; AVX512F-LABEL: splatconstant_funnnel_v32i16:
1369 ; AVX512F-NEXT: vpsrlw $7, %ymm1, %ymm2
1370 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1371 ; AVX512F-NEXT: vpsrlw $7, %ymm1, %ymm1
1372 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
1373 ; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm2
1374 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1375 ; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm0
1376 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
1377 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
1378 ; AVX512F-NEXT: retq
1380 ; AVX512VL-LABEL: splatconstant_funnnel_v32i16:
1381 ; AVX512VL: # %bb.0:
1382 ; AVX512VL-NEXT: vpsrlw $7, %ymm1, %ymm2
1383 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1384 ; AVX512VL-NEXT: vpsrlw $7, %ymm1, %ymm1
1385 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
1386 ; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm2
1387 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1388 ; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm0
1389 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
1390 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
1391 ; AVX512VL-NEXT: retq
1393 ; AVX512BW-LABEL: splatconstant_funnnel_v32i16:
1394 ; AVX512BW: # %bb.0:
1395 ; AVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1
1396 ; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm0
1397 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1398 ; AVX512BW-NEXT: retq
1400 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i16:
1401 ; AVX512VBMI2: # %bb.0:
1402 ; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm1, %zmm0
1403 ; AVX512VBMI2-NEXT: retq
1405 ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16:
1406 ; AVX512VLBW: # %bb.0:
1407 ; AVX512VLBW-NEXT: vpsrlw $7, %zmm1, %zmm1
1408 ; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm0
1409 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1410 ; AVX512VLBW-NEXT: retq
1412 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i16:
1413 ; AVX512VLVBMI2: # %bb.0:
1414 ; AVX512VLVBMI2-NEXT: vpshrdw $7, %zmm0, %zmm1, %zmm0
1415 ; AVX512VLVBMI2-NEXT: retq
1416 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1420 define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
1421 ; AVX512F-LABEL: splatconstant_funnnel_v64i8:
1423 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
1424 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1425 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
1426 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
1427 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0
1428 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1429 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
1430 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1431 ; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
1432 ; AVX512F-NEXT: retq
1434 ; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
1435 ; AVX512VL: # %bb.0:
1436 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
1437 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1438 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
1439 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
1440 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
1441 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1442 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
1443 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1444 ; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
1445 ; AVX512VL-NEXT: retq
1447 ; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
1448 ; AVX512BW: # %bb.0:
1449 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
1450 ; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0
1451 ; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
1452 ; AVX512BW-NEXT: retq
1454 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
1455 ; AVX512VBMI2: # %bb.0:
1456 ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
1457 ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
1458 ; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
1459 ; AVX512VBMI2-NEXT: retq
1461 ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
1462 ; AVX512VLBW: # %bb.0:
1463 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
1464 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0
1465 ; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
1466 ; AVX512VLBW-NEXT: retq
1468 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
1469 ; AVX512VLVBMI2: # %bb.0:
1470 ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
1471 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
1472 ; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
1473 ; AVX512VLVBMI2-NEXT: retq
1474 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)