1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512,AVX512VBMI2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLVBMI2
9 declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
10 declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
11 declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
12 declare <64 x i8> @llvm.fshr.v64i8(<64 x i8>, <64 x i8>, <64 x i8>)
18 define <8 x i64> @var_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
19 ; AVX512F-LABEL: var_funnnel_v8i64:
21 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
22 ; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm4
23 ; AVX512F-NEXT: vpsrlvq %zmm4, %zmm1, %zmm5
24 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm6 = [64,64,64,64,64,64,64,64]
25 ; AVX512F-NEXT: vpsubq %zmm4, %zmm6, %zmm4
26 ; AVX512F-NEXT: vpsllvq %zmm4, %zmm0, %zmm0
27 ; AVX512F-NEXT: vporq %zmm5, %zmm0, %zmm0
28 ; AVX512F-NEXT: vptestnmq %zmm3, %zmm2, %k1
29 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1}
32 ; AVX512VL-LABEL: var_funnnel_v8i64:
34 ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
35 ; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm4
36 ; AVX512VL-NEXT: vpsrlvq %zmm4, %zmm1, %zmm5
37 ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm6 = [64,64,64,64,64,64,64,64]
38 ; AVX512VL-NEXT: vpsubq %zmm4, %zmm6, %zmm4
39 ; AVX512VL-NEXT: vpsllvq %zmm4, %zmm0, %zmm0
40 ; AVX512VL-NEXT: vporq %zmm5, %zmm0, %zmm0
41 ; AVX512VL-NEXT: vptestnmq %zmm3, %zmm2, %k1
42 ; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1}
45 ; AVX512BW-LABEL: var_funnnel_v8i64:
47 ; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
48 ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
49 ; AVX512BW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm5
50 ; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm6 = [64,64,64,64,64,64,64,64]
51 ; AVX512BW-NEXT: vpsubq %zmm4, %zmm6, %zmm4
52 ; AVX512BW-NEXT: vpsllvq %zmm4, %zmm0, %zmm0
53 ; AVX512BW-NEXT: vporq %zmm5, %zmm0, %zmm0
54 ; AVX512BW-NEXT: vptestnmq %zmm3, %zmm2, %k1
55 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1}
58 ; AVX512VBMI2-LABEL: var_funnnel_v8i64:
59 ; AVX512VBMI2: # %bb.0:
60 ; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
61 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
62 ; AVX512VBMI2-NEXT: retq
64 ; AVX512VLBW-LABEL: var_funnnel_v8i64:
65 ; AVX512VLBW: # %bb.0:
66 ; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
67 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
68 ; AVX512VLBW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm5
69 ; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} zmm6 = [64,64,64,64,64,64,64,64]
70 ; AVX512VLBW-NEXT: vpsubq %zmm4, %zmm6, %zmm4
71 ; AVX512VLBW-NEXT: vpsllvq %zmm4, %zmm0, %zmm0
72 ; AVX512VLBW-NEXT: vporq %zmm5, %zmm0, %zmm0
73 ; AVX512VLBW-NEXT: vptestnmq %zmm3, %zmm2, %k1
74 ; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1}
75 ; AVX512VLBW-NEXT: retq
77 ; AVX512VLVBMI2-LABEL: var_funnnel_v8i64:
78 ; AVX512VLVBMI2: # %bb.0:
79 ; AVX512VLVBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
80 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
81 ; AVX512VLVBMI2-NEXT: retq
82 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt)
86 define <16 x i32> @var_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt) nounwind {
87 ; AVX512F-LABEL: var_funnnel_v16i32:
89 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
90 ; AVX512F-NEXT: vpandd %zmm3, %zmm2, %zmm4
91 ; AVX512F-NEXT: vpsrlvd %zmm4, %zmm1, %zmm5
92 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
93 ; AVX512F-NEXT: vpsubd %zmm4, %zmm6, %zmm4
94 ; AVX512F-NEXT: vpsllvd %zmm4, %zmm0, %zmm0
95 ; AVX512F-NEXT: vpord %zmm5, %zmm0, %zmm0
96 ; AVX512F-NEXT: vptestnmd %zmm3, %zmm2, %k1
97 ; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
100 ; AVX512VL-LABEL: var_funnnel_v16i32:
102 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
103 ; AVX512VL-NEXT: vpandd %zmm3, %zmm2, %zmm4
104 ; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm1, %zmm5
105 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} zmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
106 ; AVX512VL-NEXT: vpsubd %zmm4, %zmm6, %zmm4
107 ; AVX512VL-NEXT: vpsllvd %zmm4, %zmm0, %zmm0
108 ; AVX512VL-NEXT: vpord %zmm5, %zmm0, %zmm0
109 ; AVX512VL-NEXT: vptestnmd %zmm3, %zmm2, %k1
110 ; AVX512VL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
111 ; AVX512VL-NEXT: retq
113 ; AVX512BW-LABEL: var_funnnel_v16i32:
115 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
116 ; AVX512BW-NEXT: vpandd %zmm3, %zmm2, %zmm4
117 ; AVX512BW-NEXT: vpsrlvd %zmm4, %zmm1, %zmm5
118 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
119 ; AVX512BW-NEXT: vpsubd %zmm4, %zmm6, %zmm4
120 ; AVX512BW-NEXT: vpsllvd %zmm4, %zmm0, %zmm0
121 ; AVX512BW-NEXT: vpord %zmm5, %zmm0, %zmm0
122 ; AVX512BW-NEXT: vptestnmd %zmm3, %zmm2, %k1
123 ; AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
124 ; AVX512BW-NEXT: retq
126 ; AVX512VBMI2-LABEL: var_funnnel_v16i32:
127 ; AVX512VBMI2: # %bb.0:
128 ; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
129 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
130 ; AVX512VBMI2-NEXT: retq
132 ; AVX512VLBW-LABEL: var_funnnel_v16i32:
133 ; AVX512VLBW: # %bb.0:
134 ; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
135 ; AVX512VLBW-NEXT: vpandd %zmm3, %zmm2, %zmm4
136 ; AVX512VLBW-NEXT: vpsrlvd %zmm4, %zmm1, %zmm5
137 ; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} zmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
138 ; AVX512VLBW-NEXT: vpsubd %zmm4, %zmm6, %zmm4
139 ; AVX512VLBW-NEXT: vpsllvd %zmm4, %zmm0, %zmm0
140 ; AVX512VLBW-NEXT: vpord %zmm5, %zmm0, %zmm0
141 ; AVX512VLBW-NEXT: vptestnmd %zmm3, %zmm2, %k1
142 ; AVX512VLBW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
143 ; AVX512VLBW-NEXT: retq
145 ; AVX512VLVBMI2-LABEL: var_funnnel_v16i32:
146 ; AVX512VLVBMI2: # %bb.0:
147 ; AVX512VLVBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
148 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
149 ; AVX512VLVBMI2-NEXT: retq
150 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt)
154 define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
155 ; AVX512F-LABEL: var_funnnel_v32i16:
157 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
158 ; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
159 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm7 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
160 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm8 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
161 ; AVX512F-NEXT: vpsrlvd %zmm7, %zmm8, %zmm7
162 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
163 ; AVX512F-NEXT: vpsubw %ymm4, %ymm8, %ymm9
164 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm9 = ymm9[0],zero,ymm9[1],zero,ymm9[2],zero,ymm9[3],zero,ymm9[4],zero,ymm9[5],zero,ymm9[6],zero,ymm9[7],zero,ymm9[8],zero,ymm9[9],zero,ymm9[10],zero,ymm9[11],zero,ymm9[12],zero,ymm9[13],zero,ymm9[14],zero,ymm9[15],zero
165 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
166 ; AVX512F-NEXT: vpsllvd %zmm9, %zmm0, %zmm0
167 ; AVX512F-NEXT: vpord %zmm7, %zmm0, %zmm0
168 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
169 ; AVX512F-NEXT: vpxor %xmm7, %xmm7, %xmm7
170 ; AVX512F-NEXT: vpcmpeqw %ymm7, %ymm4, %ymm4
171 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
172 ; AVX512F-NEXT: vpand %ymm6, %ymm5, %ymm2
173 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
174 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
175 ; AVX512F-NEXT: vpsrlvd %zmm4, %zmm5, %zmm4
176 ; AVX512F-NEXT: vpsubw %ymm2, %ymm8, %ymm5
177 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm5[0],zero,ymm5[1],zero,ymm5[2],zero,ymm5[3],zero,ymm5[4],zero,ymm5[5],zero,ymm5[6],zero,ymm5[7],zero,ymm5[8],zero,ymm5[9],zero,ymm5[10],zero,ymm5[11],zero,ymm5[12],zero,ymm5[13],zero,ymm5[14],zero,ymm5[15],zero
178 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
179 ; AVX512F-NEXT: vpsllvd %zmm5, %zmm1, %zmm1
180 ; AVX512F-NEXT: vpord %zmm4, %zmm1, %zmm1
181 ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
182 ; AVX512F-NEXT: vpcmpeqw %ymm7, %ymm2, %ymm2
183 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
186 ; AVX512VL-LABEL: var_funnnel_v32i16:
188 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
189 ; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
190 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm7 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
191 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm8 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
192 ; AVX512VL-NEXT: vpsrlvd %zmm7, %zmm8, %zmm7
193 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
194 ; AVX512VL-NEXT: vpsubw %ymm4, %ymm8, %ymm9
195 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm9 = ymm9[0],zero,ymm9[1],zero,ymm9[2],zero,ymm9[3],zero,ymm9[4],zero,ymm9[5],zero,ymm9[6],zero,ymm9[7],zero,ymm9[8],zero,ymm9[9],zero,ymm9[10],zero,ymm9[11],zero,ymm9[12],zero,ymm9[13],zero,ymm9[14],zero,ymm9[15],zero
196 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
197 ; AVX512VL-NEXT: vpsllvd %zmm9, %zmm0, %zmm0
198 ; AVX512VL-NEXT: vpord %zmm7, %zmm0, %zmm0
199 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
200 ; AVX512VL-NEXT: vpxor %xmm7, %xmm7, %xmm7
201 ; AVX512VL-NEXT: vpcmpeqw %ymm7, %ymm4, %ymm4
202 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
203 ; AVX512VL-NEXT: vpand %ymm6, %ymm5, %ymm2
204 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
205 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
206 ; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm5, %zmm4
207 ; AVX512VL-NEXT: vpsubw %ymm2, %ymm8, %ymm5
208 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm5[0],zero,ymm5[1],zero,ymm5[2],zero,ymm5[3],zero,ymm5[4],zero,ymm5[5],zero,ymm5[6],zero,ymm5[7],zero,ymm5[8],zero,ymm5[9],zero,ymm5[10],zero,ymm5[11],zero,ymm5[12],zero,ymm5[13],zero,ymm5[14],zero,ymm5[15],zero
209 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
210 ; AVX512VL-NEXT: vpsllvd %zmm5, %zmm1, %zmm1
211 ; AVX512VL-NEXT: vpord %zmm4, %zmm1, %zmm1
212 ; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1
213 ; AVX512VL-NEXT: vpcmpeqw %ymm7, %ymm2, %ymm2
214 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
215 ; AVX512VL-NEXT: retq
217 ; AVX512BW-LABEL: var_funnnel_v32i16:
219 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
220 ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
221 ; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm5
222 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm6 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
223 ; AVX512BW-NEXT: vpsubw %zmm4, %zmm6, %zmm4
224 ; AVX512BW-NEXT: vpsllvw %zmm4, %zmm0, %zmm0
225 ; AVX512BW-NEXT: vporq %zmm5, %zmm0, %zmm0
226 ; AVX512BW-NEXT: vptestnmw %zmm3, %zmm2, %k1
227 ; AVX512BW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
228 ; AVX512BW-NEXT: retq
230 ; AVX512VBMI2-LABEL: var_funnnel_v32i16:
231 ; AVX512VBMI2: # %bb.0:
232 ; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
233 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
234 ; AVX512VBMI2-NEXT: retq
236 ; AVX512VLBW-LABEL: var_funnnel_v32i16:
237 ; AVX512VLBW: # %bb.0:
238 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
239 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
240 ; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm5
241 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm6 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
242 ; AVX512VLBW-NEXT: vpsubw %zmm4, %zmm6, %zmm4
243 ; AVX512VLBW-NEXT: vpsllvw %zmm4, %zmm0, %zmm0
244 ; AVX512VLBW-NEXT: vporq %zmm5, %zmm0, %zmm0
245 ; AVX512VLBW-NEXT: vptestnmw %zmm3, %zmm2, %k1
246 ; AVX512VLBW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
247 ; AVX512VLBW-NEXT: retq
249 ; AVX512VLVBMI2-LABEL: var_funnnel_v32i16:
250 ; AVX512VLVBMI2: # %bb.0:
251 ; AVX512VLVBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
252 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
253 ; AVX512VLVBMI2-NEXT: retq
254 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt)
258 define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
259 ; AVX512F-LABEL: var_funnnel_v64i8:
261 ; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm7
262 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
263 ; AVX512F-NEXT: vpand %ymm6, %ymm7, %ymm8
264 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
265 ; AVX512F-NEXT: vpand %ymm7, %ymm4, %ymm9
266 ; AVX512F-NEXT: vpsllw $5, %ymm9, %ymm10
267 ; AVX512F-NEXT: vpblendvb %ymm10, %ymm8, %ymm2, %ymm8
268 ; AVX512F-NEXT: vpsrlw $2, %ymm8, %ymm11
269 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
270 ; AVX512F-NEXT: vpand %ymm4, %ymm11, %ymm11
271 ; AVX512F-NEXT: vpaddb %ymm10, %ymm10, %ymm10
272 ; AVX512F-NEXT: vpblendvb %ymm10, %ymm11, %ymm8, %ymm11
273 ; AVX512F-NEXT: vpsrlw $1, %ymm11, %ymm12
274 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
275 ; AVX512F-NEXT: vpand %ymm8, %ymm12, %ymm12
276 ; AVX512F-NEXT: vpaddb %ymm10, %ymm10, %ymm10
277 ; AVX512F-NEXT: vpblendvb %ymm10, %ymm12, %ymm11, %ymm10
278 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm11
279 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm12 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
280 ; AVX512F-NEXT: vpand %ymm12, %ymm11, %ymm11
281 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm13 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
282 ; AVX512F-NEXT: vpsubb %ymm9, %ymm13, %ymm14
283 ; AVX512F-NEXT: vpsllw $5, %ymm14, %ymm14
284 ; AVX512F-NEXT: vpblendvb %ymm14, %ymm11, %ymm0, %ymm0
285 ; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm11
286 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm15 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
287 ; AVX512F-NEXT: vpand %ymm15, %ymm11, %ymm11
288 ; AVX512F-NEXT: vpaddb %ymm14, %ymm14, %ymm14
289 ; AVX512F-NEXT: vpblendvb %ymm14, %ymm11, %ymm0, %ymm0
290 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm11
291 ; AVX512F-NEXT: vpaddb %ymm14, %ymm14, %ymm14
292 ; AVX512F-NEXT: vpblendvb %ymm14, %ymm11, %ymm0, %ymm0
293 ; AVX512F-NEXT: vpor %ymm10, %ymm0, %ymm0
294 ; AVX512F-NEXT: vpxor %xmm10, %xmm10, %xmm10
295 ; AVX512F-NEXT: vpcmpeqb %ymm10, %ymm9, %ymm9
296 ; AVX512F-NEXT: vpblendvb %ymm9, %ymm2, %ymm0, %ymm0
297 ; AVX512F-NEXT: vpsrlw $4, %ymm3, %ymm2
298 ; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
299 ; AVX512F-NEXT: vpand %ymm7, %ymm5, %ymm5
300 ; AVX512F-NEXT: vpsllw $5, %ymm5, %ymm6
301 ; AVX512F-NEXT: vpblendvb %ymm6, %ymm2, %ymm3, %ymm2
302 ; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm7
303 ; AVX512F-NEXT: vpand %ymm4, %ymm7, %ymm4
304 ; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
305 ; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm2, %ymm2
306 ; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm4
307 ; AVX512F-NEXT: vpand %ymm8, %ymm4, %ymm4
308 ; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
309 ; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm2, %ymm2
310 ; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm4
311 ; AVX512F-NEXT: vpand %ymm12, %ymm4, %ymm4
312 ; AVX512F-NEXT: vpsubb %ymm5, %ymm13, %ymm6
313 ; AVX512F-NEXT: vpsllw $5, %ymm6, %ymm6
314 ; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm1, %ymm1
315 ; AVX512F-NEXT: vpsllw $2, %ymm1, %ymm4
316 ; AVX512F-NEXT: vpand %ymm15, %ymm4, %ymm4
317 ; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
318 ; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm1, %ymm1
319 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm4
320 ; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
321 ; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm1, %ymm1
322 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
323 ; AVX512F-NEXT: vpcmpeqb %ymm10, %ymm5, %ymm2
324 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
327 ; AVX512VL-LABEL: var_funnnel_v64i8:
329 ; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm6
330 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
331 ; AVX512VL-NEXT: vpand %ymm7, %ymm6, %ymm6
332 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
333 ; AVX512VL-NEXT: vpand %ymm8, %ymm4, %ymm4
334 ; AVX512VL-NEXT: vpsllw $5, %ymm4, %ymm9
335 ; AVX512VL-NEXT: vpblendvb %ymm9, %ymm6, %ymm2, %ymm6
336 ; AVX512VL-NEXT: vpsrlw $2, %ymm6, %ymm10
337 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm11 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
338 ; AVX512VL-NEXT: vpand %ymm11, %ymm10, %ymm10
339 ; AVX512VL-NEXT: vpaddb %ymm9, %ymm9, %ymm9
340 ; AVX512VL-NEXT: vpblendvb %ymm9, %ymm10, %ymm6, %ymm6
341 ; AVX512VL-NEXT: vpsrlw $1, %ymm6, %ymm10
342 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm12 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
343 ; AVX512VL-NEXT: vpand %ymm12, %ymm10, %ymm10
344 ; AVX512VL-NEXT: vpaddb %ymm9, %ymm9, %ymm9
345 ; AVX512VL-NEXT: vpblendvb %ymm9, %ymm10, %ymm6, %ymm6
346 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm9
347 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm10 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
348 ; AVX512VL-NEXT: vpand %ymm10, %ymm9, %ymm9
349 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm13 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
350 ; AVX512VL-NEXT: vpsubb %ymm4, %ymm13, %ymm14
351 ; AVX512VL-NEXT: vpsllw $5, %ymm14, %ymm14
352 ; AVX512VL-NEXT: vpblendvb %ymm14, %ymm9, %ymm0, %ymm0
353 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm9
354 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm15 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
355 ; AVX512VL-NEXT: vpand %ymm15, %ymm9, %ymm9
356 ; AVX512VL-NEXT: vpaddb %ymm14, %ymm14, %ymm14
357 ; AVX512VL-NEXT: vpblendvb %ymm14, %ymm9, %ymm0, %ymm0
358 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm9
359 ; AVX512VL-NEXT: vpaddb %ymm14, %ymm14, %ymm14
360 ; AVX512VL-NEXT: vpblendvb %ymm14, %ymm9, %ymm0, %ymm0
361 ; AVX512VL-NEXT: vpor %ymm6, %ymm0, %ymm0
362 ; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
363 ; AVX512VL-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm4
364 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
365 ; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm2
366 ; AVX512VL-NEXT: vpand %ymm7, %ymm2, %ymm2
367 ; AVX512VL-NEXT: vpand %ymm8, %ymm5, %ymm4
368 ; AVX512VL-NEXT: vpsllw $5, %ymm4, %ymm5
369 ; AVX512VL-NEXT: vpblendvb %ymm5, %ymm2, %ymm3, %ymm2
370 ; AVX512VL-NEXT: vpsrlw $2, %ymm2, %ymm7
371 ; AVX512VL-NEXT: vpand %ymm11, %ymm7, %ymm7
372 ; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
373 ; AVX512VL-NEXT: vpblendvb %ymm5, %ymm7, %ymm2, %ymm2
374 ; AVX512VL-NEXT: vpsrlw $1, %ymm2, %ymm7
375 ; AVX512VL-NEXT: vpand %ymm12, %ymm7, %ymm7
376 ; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
377 ; AVX512VL-NEXT: vpblendvb %ymm5, %ymm7, %ymm2, %ymm2
378 ; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm5
379 ; AVX512VL-NEXT: vpand %ymm10, %ymm5, %ymm5
380 ; AVX512VL-NEXT: vpsubb %ymm4, %ymm13, %ymm7
381 ; AVX512VL-NEXT: vpsllw $5, %ymm7, %ymm7
382 ; AVX512VL-NEXT: vpblendvb %ymm7, %ymm5, %ymm1, %ymm1
383 ; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm5
384 ; AVX512VL-NEXT: vpand %ymm15, %ymm5, %ymm5
385 ; AVX512VL-NEXT: vpaddb %ymm7, %ymm7, %ymm7
386 ; AVX512VL-NEXT: vpblendvb %ymm7, %ymm5, %ymm1, %ymm1
387 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm5
388 ; AVX512VL-NEXT: vpaddb %ymm7, %ymm7, %ymm7
389 ; AVX512VL-NEXT: vpblendvb %ymm7, %ymm5, %ymm1, %ymm1
390 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
391 ; AVX512VL-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm2
392 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
393 ; AVX512VL-NEXT: retq
395 ; AVX512BW-LABEL: var_funnnel_v64i8:
397 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
398 ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
399 ; AVX512BW-NEXT: vpsllw $5, %zmm4, %zmm5
400 ; AVX512BW-NEXT: vpaddb %zmm5, %zmm5, %zmm6
401 ; AVX512BW-NEXT: vpmovb2m %zmm6, %k1
402 ; AVX512BW-NEXT: vpmovb2m %zmm5, %k2
403 ; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm5
404 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
405 ; AVX512BW-NEXT: vpblendmb %zmm5, %zmm1, %zmm5 {%k2}
406 ; AVX512BW-NEXT: vpsrlw $2, %zmm5, %zmm7
407 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm7, %zmm7
408 ; AVX512BW-NEXT: vmovdqu8 %zmm7, %zmm5 {%k1}
409 ; AVX512BW-NEXT: vpsrlw $1, %zmm5, %zmm7
410 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm7, %zmm7
411 ; AVX512BW-NEXT: vpaddb %zmm6, %zmm6, %zmm6
412 ; AVX512BW-NEXT: vpmovb2m %zmm6, %k1
413 ; AVX512BW-NEXT: vmovdqu8 %zmm7, %zmm5 {%k1}
414 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
415 ; AVX512BW-NEXT: vpsubb %zmm4, %zmm6, %zmm4
416 ; AVX512BW-NEXT: vpsllw $5, %zmm4, %zmm4
417 ; AVX512BW-NEXT: vpaddb %zmm4, %zmm4, %zmm6
418 ; AVX512BW-NEXT: vpmovb2m %zmm6, %k1
419 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k2
420 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm4
421 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
422 ; AVX512BW-NEXT: vmovdqu8 %zmm4, %zmm0 {%k2}
423 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm4
424 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
425 ; AVX512BW-NEXT: vmovdqu8 %zmm4, %zmm0 {%k1}
426 ; AVX512BW-NEXT: vpaddb %zmm6, %zmm6, %zmm4
427 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
428 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
429 ; AVX512BW-NEXT: vporq %zmm5, %zmm0, %zmm0
430 ; AVX512BW-NEXT: vptestnmb %zmm3, %zmm2, %k1
431 ; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
432 ; AVX512BW-NEXT: retq
434 ; AVX512VBMI2-LABEL: var_funnnel_v64i8:
435 ; AVX512VBMI2: # %bb.0:
436 ; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
437 ; AVX512VBMI2-NEXT: vpandq %zmm3, %zmm2, %zmm4
438 ; AVX512VBMI2-NEXT: vpsllw $5, %zmm4, %zmm5
439 ; AVX512VBMI2-NEXT: vpaddb %zmm5, %zmm5, %zmm6
440 ; AVX512VBMI2-NEXT: vpmovb2m %zmm6, %k1
441 ; AVX512VBMI2-NEXT: vpmovb2m %zmm5, %k2
442 ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm5
443 ; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
444 ; AVX512VBMI2-NEXT: vpblendmb %zmm5, %zmm1, %zmm5 {%k2}
445 ; AVX512VBMI2-NEXT: vpsrlw $2, %zmm5, %zmm7
446 ; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm7, %zmm7
447 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm7, %zmm5 {%k1}
448 ; AVX512VBMI2-NEXT: vpsrlw $1, %zmm5, %zmm7
449 ; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm7, %zmm7
450 ; AVX512VBMI2-NEXT: vpaddb %zmm6, %zmm6, %zmm6
451 ; AVX512VBMI2-NEXT: vpmovb2m %zmm6, %k1
452 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm7, %zmm5 {%k1}
453 ; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
454 ; AVX512VBMI2-NEXT: vpsubb %zmm4, %zmm6, %zmm4
455 ; AVX512VBMI2-NEXT: vpsllw $5, %zmm4, %zmm4
456 ; AVX512VBMI2-NEXT: vpaddb %zmm4, %zmm4, %zmm6
457 ; AVX512VBMI2-NEXT: vpmovb2m %zmm6, %k1
458 ; AVX512VBMI2-NEXT: vpmovb2m %zmm4, %k2
459 ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm4
460 ; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
461 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm4, %zmm0 {%k2}
462 ; AVX512VBMI2-NEXT: vpsllw $2, %zmm0, %zmm4
463 ; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
464 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm4, %zmm0 {%k1}
465 ; AVX512VBMI2-NEXT: vpaddb %zmm6, %zmm6, %zmm4
466 ; AVX512VBMI2-NEXT: vpmovb2m %zmm4, %k1
467 ; AVX512VBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
468 ; AVX512VBMI2-NEXT: vporq %zmm5, %zmm0, %zmm0
469 ; AVX512VBMI2-NEXT: vptestnmb %zmm3, %zmm2, %k1
470 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
471 ; AVX512VBMI2-NEXT: retq
473 ; AVX512VLBW-LABEL: var_funnnel_v64i8:
474 ; AVX512VLBW: # %bb.0:
475 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
476 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
477 ; AVX512VLBW-NEXT: vpsllw $5, %zmm4, %zmm5
478 ; AVX512VLBW-NEXT: vpaddb %zmm5, %zmm5, %zmm6
479 ; AVX512VLBW-NEXT: vpmovb2m %zmm6, %k1
480 ; AVX512VLBW-NEXT: vpmovb2m %zmm5, %k2
481 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm5
482 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
483 ; AVX512VLBW-NEXT: vpblendmb %zmm5, %zmm1, %zmm5 {%k2}
484 ; AVX512VLBW-NEXT: vpsrlw $2, %zmm5, %zmm7
485 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm7, %zmm7
486 ; AVX512VLBW-NEXT: vmovdqu8 %zmm7, %zmm5 {%k1}
487 ; AVX512VLBW-NEXT: vpsrlw $1, %zmm5, %zmm7
488 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm7, %zmm7
489 ; AVX512VLBW-NEXT: vpaddb %zmm6, %zmm6, %zmm6
490 ; AVX512VLBW-NEXT: vpmovb2m %zmm6, %k1
491 ; AVX512VLBW-NEXT: vmovdqu8 %zmm7, %zmm5 {%k1}
492 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
493 ; AVX512VLBW-NEXT: vpsubb %zmm4, %zmm6, %zmm4
494 ; AVX512VLBW-NEXT: vpsllw $5, %zmm4, %zmm4
495 ; AVX512VLBW-NEXT: vpaddb %zmm4, %zmm4, %zmm6
496 ; AVX512VLBW-NEXT: vpmovb2m %zmm6, %k1
497 ; AVX512VLBW-NEXT: vpmovb2m %zmm4, %k2
498 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm4
499 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
500 ; AVX512VLBW-NEXT: vmovdqu8 %zmm4, %zmm0 {%k2}
501 ; AVX512VLBW-NEXT: vpsllw $2, %zmm0, %zmm4
502 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
503 ; AVX512VLBW-NEXT: vmovdqu8 %zmm4, %zmm0 {%k1}
504 ; AVX512VLBW-NEXT: vpaddb %zmm6, %zmm6, %zmm4
505 ; AVX512VLBW-NEXT: vpmovb2m %zmm4, %k1
506 ; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
507 ; AVX512VLBW-NEXT: vporq %zmm5, %zmm0, %zmm0
508 ; AVX512VLBW-NEXT: vptestnmb %zmm3, %zmm2, %k1
509 ; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
510 ; AVX512VLBW-NEXT: retq
512 ; AVX512VLVBMI2-LABEL: var_funnnel_v64i8:
513 ; AVX512VLVBMI2: # %bb.0:
514 ; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
515 ; AVX512VLVBMI2-NEXT: vpandq %zmm3, %zmm2, %zmm4
516 ; AVX512VLVBMI2-NEXT: vpsllw $5, %zmm4, %zmm5
517 ; AVX512VLVBMI2-NEXT: vpaddb %zmm5, %zmm5, %zmm6
518 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm6, %k1
519 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm5, %k2
520 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm5
521 ; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
522 ; AVX512VLVBMI2-NEXT: vpblendmb %zmm5, %zmm1, %zmm5 {%k2}
523 ; AVX512VLVBMI2-NEXT: vpsrlw $2, %zmm5, %zmm7
524 ; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm7, %zmm7
525 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm7, %zmm5 {%k1}
526 ; AVX512VLVBMI2-NEXT: vpsrlw $1, %zmm5, %zmm7
527 ; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm7, %zmm7
528 ; AVX512VLVBMI2-NEXT: vpaddb %zmm6, %zmm6, %zmm6
529 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm6, %k1
530 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm7, %zmm5 {%k1}
531 ; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
532 ; AVX512VLVBMI2-NEXT: vpsubb %zmm4, %zmm6, %zmm4
533 ; AVX512VLVBMI2-NEXT: vpsllw $5, %zmm4, %zmm4
534 ; AVX512VLVBMI2-NEXT: vpaddb %zmm4, %zmm4, %zmm6
535 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm6, %k1
536 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm4, %k2
537 ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm4
538 ; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
539 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm4, %zmm0 {%k2}
540 ; AVX512VLVBMI2-NEXT: vpsllw $2, %zmm0, %zmm4
541 ; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
542 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm4, %zmm0 {%k1}
543 ; AVX512VLVBMI2-NEXT: vpaddb %zmm6, %zmm6, %zmm4
544 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm4, %k1
545 ; AVX512VLVBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
546 ; AVX512VLVBMI2-NEXT: vporq %zmm5, %zmm0, %zmm0
547 ; AVX512VLVBMI2-NEXT: vptestnmb %zmm3, %zmm2, %k1
548 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
549 ; AVX512VLVBMI2-NEXT: retq
550 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
555 ; Uniform Variable Shifts
558 define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
559 ; AVX512F-LABEL: splatvar_funnnel_v8i64:
561 ; AVX512F-NEXT: vpbroadcastq %xmm2, %zmm2
562 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
563 ; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm4
564 ; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm5
565 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm6 = [64,64]
566 ; AVX512F-NEXT: vpsubq %xmm4, %xmm6, %xmm4
567 ; AVX512F-NEXT: vpsllq %xmm4, %zmm0, %zmm0
568 ; AVX512F-NEXT: vporq %zmm5, %zmm0, %zmm0
569 ; AVX512F-NEXT: vptestnmq %zmm3, %zmm2, %k1
570 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1}
573 ; AVX512VL-LABEL: splatvar_funnnel_v8i64:
575 ; AVX512VL-NEXT: vpbroadcastq %xmm2, %zmm2
576 ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
577 ; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm4
578 ; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm5
579 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm6 = [64,64]
580 ; AVX512VL-NEXT: vpsubq %xmm4, %xmm6, %xmm4
581 ; AVX512VL-NEXT: vpsllq %xmm4, %zmm0, %zmm0
582 ; AVX512VL-NEXT: vporq %zmm5, %zmm0, %zmm0
583 ; AVX512VL-NEXT: vptestnmq %zmm3, %zmm2, %k1
584 ; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1}
585 ; AVX512VL-NEXT: retq
587 ; AVX512BW-LABEL: splatvar_funnnel_v8i64:
589 ; AVX512BW-NEXT: vpbroadcastq %xmm2, %zmm2
590 ; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
591 ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
592 ; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm5
593 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm6 = [64,64]
594 ; AVX512BW-NEXT: vpsubq %xmm4, %xmm6, %xmm4
595 ; AVX512BW-NEXT: vpsllq %xmm4, %zmm0, %zmm0
596 ; AVX512BW-NEXT: vporq %zmm5, %zmm0, %zmm0
597 ; AVX512BW-NEXT: vptestnmq %zmm3, %zmm2, %k1
598 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1}
599 ; AVX512BW-NEXT: retq
601 ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i64:
602 ; AVX512VBMI2: # %bb.0:
603 ; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %zmm2
604 ; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
605 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
606 ; AVX512VBMI2-NEXT: retq
608 ; AVX512VLBW-LABEL: splatvar_funnnel_v8i64:
609 ; AVX512VLBW: # %bb.0:
610 ; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %zmm2
611 ; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
612 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
613 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm5
614 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm6 = [64,64]
615 ; AVX512VLBW-NEXT: vpsubq %xmm4, %xmm6, %xmm4
616 ; AVX512VLBW-NEXT: vpsllq %xmm4, %zmm0, %zmm0
617 ; AVX512VLBW-NEXT: vporq %zmm5, %zmm0, %zmm0
618 ; AVX512VLBW-NEXT: vptestnmq %zmm3, %zmm2, %k1
619 ; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1}
620 ; AVX512VLBW-NEXT: retq
622 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i64:
623 ; AVX512VLVBMI2: # %bb.0:
624 ; AVX512VLVBMI2-NEXT: vpbroadcastq %xmm2, %zmm2
625 ; AVX512VLVBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
626 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
627 ; AVX512VLVBMI2-NEXT: retq
628 %splat = shufflevector <8 x i64> %amt, <8 x i64> undef, <8 x i32> zeroinitializer
629 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %splat)
633 define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt) nounwind {
634 ; AVX512F-LABEL: splatvar_funnnel_v16i32:
636 ; AVX512F-NEXT: vpbroadcastd %xmm2, %zmm2
637 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
638 ; AVX512F-NEXT: vpandd %zmm3, %zmm2, %zmm4
639 ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero
640 ; AVX512F-NEXT: vpsrld %xmm5, %zmm1, %zmm5
641 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm6 = [32,32,32,32]
642 ; AVX512F-NEXT: vpsubd %xmm4, %xmm6, %xmm4
643 ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
644 ; AVX512F-NEXT: vpslld %xmm4, %zmm0, %zmm0
645 ; AVX512F-NEXT: vpord %zmm5, %zmm0, %zmm0
646 ; AVX512F-NEXT: vptestnmd %zmm3, %zmm2, %k1
647 ; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
650 ; AVX512VL-LABEL: splatvar_funnnel_v16i32:
652 ; AVX512VL-NEXT: vpbroadcastd %xmm2, %zmm2
653 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
654 ; AVX512VL-NEXT: vpandd %zmm3, %zmm2, %zmm4
655 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero
656 ; AVX512VL-NEXT: vpsrld %xmm5, %zmm1, %zmm5
657 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm6 = [32,32,32,32]
658 ; AVX512VL-NEXT: vpsubd %xmm4, %xmm6, %xmm4
659 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
660 ; AVX512VL-NEXT: vpslld %xmm4, %zmm0, %zmm0
661 ; AVX512VL-NEXT: vpord %zmm5, %zmm0, %zmm0
662 ; AVX512VL-NEXT: vptestnmd %zmm3, %zmm2, %k1
663 ; AVX512VL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
664 ; AVX512VL-NEXT: retq
666 ; AVX512BW-LABEL: splatvar_funnnel_v16i32:
668 ; AVX512BW-NEXT: vpbroadcastd %xmm2, %zmm2
669 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
670 ; AVX512BW-NEXT: vpandd %zmm3, %zmm2, %zmm4
671 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero
672 ; AVX512BW-NEXT: vpsrld %xmm5, %zmm1, %zmm5
673 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm6 = [32,32,32,32]
674 ; AVX512BW-NEXT: vpsubd %xmm4, %xmm6, %xmm4
675 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
676 ; AVX512BW-NEXT: vpslld %xmm4, %zmm0, %zmm0
677 ; AVX512BW-NEXT: vpord %zmm5, %zmm0, %zmm0
678 ; AVX512BW-NEXT: vptestnmd %zmm3, %zmm2, %k1
679 ; AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
680 ; AVX512BW-NEXT: retq
682 ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i32:
683 ; AVX512VBMI2: # %bb.0:
684 ; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %zmm2
685 ; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
686 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
687 ; AVX512VBMI2-NEXT: retq
689 ; AVX512VLBW-LABEL: splatvar_funnnel_v16i32:
690 ; AVX512VLBW: # %bb.0:
691 ; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %zmm2
692 ; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
693 ; AVX512VLBW-NEXT: vpandd %zmm3, %zmm2, %zmm4
694 ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero
695 ; AVX512VLBW-NEXT: vpsrld %xmm5, %zmm1, %zmm5
696 ; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} xmm6 = [32,32,32,32]
697 ; AVX512VLBW-NEXT: vpsubd %xmm4, %xmm6, %xmm4
698 ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
699 ; AVX512VLBW-NEXT: vpslld %xmm4, %zmm0, %zmm0
700 ; AVX512VLBW-NEXT: vpord %zmm5, %zmm0, %zmm0
701 ; AVX512VLBW-NEXT: vptestnmd %zmm3, %zmm2, %k1
702 ; AVX512VLBW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
703 ; AVX512VLBW-NEXT: retq
705 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i32:
706 ; AVX512VLVBMI2: # %bb.0:
707 ; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm2, %zmm2
708 ; AVX512VLVBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
709 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
710 ; AVX512VLVBMI2-NEXT: retq
711 %splat = shufflevector <16 x i32> %amt, <16 x i32> undef, <16 x i32> zeroinitializer
712 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %splat)
716 define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
717 ; AVX512F-LABEL: splatvar_funnnel_v32i16:
719 ; AVX512F-NEXT: vpbroadcastw %xmm4, %ymm4
720 ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
721 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
722 ; AVX512F-NEXT: vpsrlw %xmm5, %ymm2, %ymm6
723 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [16,16,16,16,16,16,16,16]
724 ; AVX512F-NEXT: vpsubw %xmm4, %xmm7, %xmm7
725 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero
726 ; AVX512F-NEXT: vpsllw %xmm7, %ymm0, %ymm0
727 ; AVX512F-NEXT: vpor %ymm6, %ymm0, %ymm0
728 ; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
729 ; AVX512F-NEXT: vpcmpeqw %ymm6, %ymm4, %ymm4
730 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
731 ; AVX512F-NEXT: vpsrlw %xmm5, %ymm3, %ymm2
732 ; AVX512F-NEXT: vpsllw %xmm7, %ymm1, %ymm1
733 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
734 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm3, %ymm1, %ymm1
737 ; AVX512VL-LABEL: splatvar_funnnel_v32i16:
739 ; AVX512VL-NEXT: vpbroadcastw %xmm4, %ymm4
740 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
741 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
742 ; AVX512VL-NEXT: vpsrlw %xmm5, %ymm2, %ymm6
743 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [16,16,16,16,16,16,16,16]
744 ; AVX512VL-NEXT: vpsubw %xmm4, %xmm7, %xmm7
745 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero
746 ; AVX512VL-NEXT: vpsllw %xmm7, %ymm0, %ymm0
747 ; AVX512VL-NEXT: vpor %ymm6, %ymm0, %ymm0
748 ; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
749 ; AVX512VL-NEXT: vpcmpeqw %ymm6, %ymm4, %ymm4
750 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
751 ; AVX512VL-NEXT: vpsrlw %xmm5, %ymm3, %ymm2
752 ; AVX512VL-NEXT: vpsllw %xmm7, %ymm1, %ymm1
753 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
754 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm3, %ymm1, %ymm1
755 ; AVX512VL-NEXT: retq
757 ; AVX512BW-LABEL: splatvar_funnnel_v32i16:
759 ; AVX512BW-NEXT: vpbroadcastw %xmm2, %zmm2
760 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
761 ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
762 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
763 ; AVX512BW-NEXT: vpsrlw %xmm5, %zmm1, %zmm5
764 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm6 = [16,16,16,16,16,16,16,16]
765 ; AVX512BW-NEXT: vpsubw %xmm4, %xmm6, %xmm4
766 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
767 ; AVX512BW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
768 ; AVX512BW-NEXT: vporq %zmm5, %zmm0, %zmm0
769 ; AVX512BW-NEXT: vptestnmw %zmm3, %zmm2, %k1
770 ; AVX512BW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
771 ; AVX512BW-NEXT: retq
773 ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16:
774 ; AVX512VBMI2: # %bb.0:
775 ; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %zmm2
776 ; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
777 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
778 ; AVX512VBMI2-NEXT: retq
780 ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
781 ; AVX512VLBW: # %bb.0:
782 ; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %zmm2
783 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
784 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
785 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
786 ; AVX512VLBW-NEXT: vpsrlw %xmm5, %zmm1, %zmm5
787 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm6 = [16,16,16,16,16,16,16,16]
788 ; AVX512VLBW-NEXT: vpsubw %xmm4, %xmm6, %xmm4
789 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
790 ; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
791 ; AVX512VLBW-NEXT: vporq %zmm5, %zmm0, %zmm0
792 ; AVX512VLBW-NEXT: vptestnmw %zmm3, %zmm2, %k1
793 ; AVX512VLBW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
794 ; AVX512VLBW-NEXT: retq
796 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i16:
797 ; AVX512VLVBMI2: # %bb.0:
798 ; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm2, %zmm2
799 ; AVX512VLVBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
800 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
801 ; AVX512VLVBMI2-NEXT: retq
802 %splat = shufflevector <32 x i16> %amt, <32 x i16> undef, <32 x i32> zeroinitializer
803 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %splat)
807 define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
808 ; AVX512F-LABEL: splatvar_funnnel_v64i8:
810 ; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
811 ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
812 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
813 ; AVX512F-NEXT: vpsrlw %xmm5, %ymm2, %ymm6
814 ; AVX512F-NEXT: vpcmpeqd %ymm9, %ymm9, %ymm9
815 ; AVX512F-NEXT: vpsrlw %xmm5, %ymm9, %ymm8
816 ; AVX512F-NEXT: vpsrlw $8, %ymm8, %ymm8
817 ; AVX512F-NEXT: vpbroadcastb %xmm8, %ymm8
818 ; AVX512F-NEXT: vpand %ymm8, %ymm6, %ymm6
819 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
820 ; AVX512F-NEXT: vpsubb %xmm4, %xmm7, %xmm7
821 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero
822 ; AVX512F-NEXT: vpsllw %xmm7, %ymm0, %ymm0
823 ; AVX512F-NEXT: vpsllw %xmm7, %ymm9, %ymm9
824 ; AVX512F-NEXT: vpbroadcastb %xmm9, %ymm9
825 ; AVX512F-NEXT: vpand %ymm9, %ymm0, %ymm0
826 ; AVX512F-NEXT: vpor %ymm6, %ymm0, %ymm0
827 ; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
828 ; AVX512F-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm4
829 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
830 ; AVX512F-NEXT: vpsrlw %xmm5, %ymm3, %ymm2
831 ; AVX512F-NEXT: vpand %ymm8, %ymm2, %ymm2
832 ; AVX512F-NEXT: vpsllw %xmm7, %ymm1, %ymm1
833 ; AVX512F-NEXT: vpand %ymm9, %ymm1, %ymm1
834 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
835 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm3, %ymm1, %ymm1
838 ; AVX512VL-LABEL: splatvar_funnnel_v64i8:
840 ; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
841 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
842 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
843 ; AVX512VL-NEXT: vpsrlw %xmm5, %ymm2, %ymm6
844 ; AVX512VL-NEXT: vpcmpeqd %ymm9, %ymm9, %ymm9
845 ; AVX512VL-NEXT: vpsrlw %xmm5, %ymm9, %ymm8
846 ; AVX512VL-NEXT: vpsrlw $8, %ymm8, %ymm8
847 ; AVX512VL-NEXT: vpbroadcastb %xmm8, %ymm8
848 ; AVX512VL-NEXT: vpand %ymm8, %ymm6, %ymm6
849 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
850 ; AVX512VL-NEXT: vpsubb %xmm4, %xmm7, %xmm7
851 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero
852 ; AVX512VL-NEXT: vpsllw %xmm7, %ymm0, %ymm0
853 ; AVX512VL-NEXT: vpsllw %xmm7, %ymm9, %ymm9
854 ; AVX512VL-NEXT: vpbroadcastb %xmm9, %ymm9
855 ; AVX512VL-NEXT: vpand %ymm9, %ymm0, %ymm0
856 ; AVX512VL-NEXT: vpor %ymm6, %ymm0, %ymm0
857 ; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
858 ; AVX512VL-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm4
859 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
860 ; AVX512VL-NEXT: vpsrlw %xmm5, %ymm3, %ymm2
861 ; AVX512VL-NEXT: vpand %ymm8, %ymm2, %ymm2
862 ; AVX512VL-NEXT: vpsllw %xmm7, %ymm1, %ymm1
863 ; AVX512VL-NEXT: vpand %ymm9, %ymm1, %ymm1
864 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
865 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm3, %ymm1, %ymm1
866 ; AVX512VL-NEXT: retq
868 ; AVX512BW-LABEL: splatvar_funnnel_v64i8:
870 ; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2
871 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
872 ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
873 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
874 ; AVX512BW-NEXT: vpsrlw %xmm5, %zmm1, %zmm6
875 ; AVX512BW-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7
876 ; AVX512BW-NEXT: vpsrlw %xmm5, %zmm7, %zmm5
877 ; AVX512BW-NEXT: vpsrlw $8, %zmm5, %zmm5
878 ; AVX512BW-NEXT: vpbroadcastb %xmm5, %zmm5
879 ; AVX512BW-NEXT: vpandq %zmm5, %zmm6, %zmm5
880 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
881 ; AVX512BW-NEXT: vpsubb %xmm4, %xmm6, %xmm4
882 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
883 ; AVX512BW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
884 ; AVX512BW-NEXT: vpsllw %xmm4, %zmm7, %zmm4
885 ; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4
886 ; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm0
887 ; AVX512BW-NEXT: vporq %zmm5, %zmm0, %zmm0
888 ; AVX512BW-NEXT: vptestnmb %zmm3, %zmm2, %k1
889 ; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
890 ; AVX512BW-NEXT: retq
892 ; AVX512VBMI2-LABEL: splatvar_funnnel_v64i8:
893 ; AVX512VBMI2: # %bb.0:
894 ; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %zmm2
895 ; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
896 ; AVX512VBMI2-NEXT: vpandq %zmm3, %zmm2, %zmm4
897 ; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
898 ; AVX512VBMI2-NEXT: vpsrlw %xmm5, %zmm1, %zmm6
899 ; AVX512VBMI2-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7
900 ; AVX512VBMI2-NEXT: vpsrlw %xmm5, %zmm7, %zmm5
901 ; AVX512VBMI2-NEXT: vpsrlw $8, %zmm5, %zmm5
902 ; AVX512VBMI2-NEXT: vpbroadcastb %xmm5, %zmm5
903 ; AVX512VBMI2-NEXT: vpandq %zmm5, %zmm6, %zmm5
904 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
905 ; AVX512VBMI2-NEXT: vpsubb %xmm4, %xmm6, %xmm4
906 ; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
907 ; AVX512VBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0
908 ; AVX512VBMI2-NEXT: vpsllw %xmm4, %zmm7, %zmm4
909 ; AVX512VBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
910 ; AVX512VBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm0
911 ; AVX512VBMI2-NEXT: vporq %zmm5, %zmm0, %zmm0
912 ; AVX512VBMI2-NEXT: vptestnmb %zmm3, %zmm2, %k1
913 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
914 ; AVX512VBMI2-NEXT: retq
916 ; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
917 ; AVX512VLBW: # %bb.0:
918 ; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2
919 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
920 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
921 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
922 ; AVX512VLBW-NEXT: vpsrlw %xmm5, %zmm1, %zmm6
923 ; AVX512VLBW-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7
924 ; AVX512VLBW-NEXT: vpsrlw %xmm5, %zmm7, %zmm5
925 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm5, %zmm5
926 ; AVX512VLBW-NEXT: vpbroadcastb %xmm5, %zmm5
927 ; AVX512VLBW-NEXT: vpandq %zmm5, %zmm6, %zmm5
928 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
929 ; AVX512VLBW-NEXT: vpsubb %xmm4, %xmm6, %xmm4
930 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
931 ; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
932 ; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm7, %zmm4
933 ; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4
934 ; AVX512VLBW-NEXT: vpandq %zmm4, %zmm0, %zmm0
935 ; AVX512VLBW-NEXT: vporq %zmm5, %zmm0, %zmm0
936 ; AVX512VLBW-NEXT: vptestnmb %zmm3, %zmm2, %k1
937 ; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
938 ; AVX512VLBW-NEXT: retq
940 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v64i8:
941 ; AVX512VLVBMI2: # %bb.0:
942 ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %zmm2
943 ; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
944 ; AVX512VLVBMI2-NEXT: vpandq %zmm3, %zmm2, %zmm4
945 ; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
946 ; AVX512VLVBMI2-NEXT: vpsrlw %xmm5, %zmm1, %zmm6
947 ; AVX512VLVBMI2-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7
948 ; AVX512VLVBMI2-NEXT: vpsrlw %xmm5, %zmm7, %zmm5
949 ; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm5, %zmm5
950 ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm5, %zmm5
951 ; AVX512VLVBMI2-NEXT: vpandq %zmm5, %zmm6, %zmm5
952 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
953 ; AVX512VLVBMI2-NEXT: vpsubb %xmm4, %xmm6, %xmm4
954 ; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
955 ; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0
956 ; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %zmm7, %zmm4
957 ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
958 ; AVX512VLVBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm0
959 ; AVX512VLVBMI2-NEXT: vporq %zmm5, %zmm0, %zmm0
960 ; AVX512VLVBMI2-NEXT: vptestnmb %zmm3, %zmm2, %k1
961 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
962 ; AVX512VLVBMI2-NEXT: retq
963 %splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer
964 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %splat)
972 define <8 x i64> @constant_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
973 ; AVX512F-LABEL: constant_funnnel_v8i64:
975 ; AVX512F-NEXT: vpsrlvq {{.*}}(%rip), %zmm1, %zmm1
976 ; AVX512F-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
977 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
980 ; AVX512VL-LABEL: constant_funnnel_v8i64:
982 ; AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %zmm1, %zmm1
983 ; AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
984 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
985 ; AVX512VL-NEXT: retq
987 ; AVX512BW-LABEL: constant_funnnel_v8i64:
989 ; AVX512BW-NEXT: vpsrlvq {{.*}}(%rip), %zmm1, %zmm1
990 ; AVX512BW-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
991 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
992 ; AVX512BW-NEXT: retq
994 ; AVX512VBMI2-LABEL: constant_funnnel_v8i64:
995 ; AVX512VBMI2: # %bb.0:
996 ; AVX512VBMI2-NEXT: vpshrdvq {{.*}}(%rip), %zmm0, %zmm1
997 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
998 ; AVX512VBMI2-NEXT: retq
1000 ; AVX512VLBW-LABEL: constant_funnnel_v8i64:
1001 ; AVX512VLBW: # %bb.0:
1002 ; AVX512VLBW-NEXT: vpsrlvq {{.*}}(%rip), %zmm1, %zmm1
1003 ; AVX512VLBW-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
1004 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1005 ; AVX512VLBW-NEXT: retq
1007 ; AVX512VLVBMI2-LABEL: constant_funnnel_v8i64:
1008 ; AVX512VLVBMI2: # %bb.0:
1009 ; AVX512VLVBMI2-NEXT: vpshrdvq {{.*}}(%rip), %zmm0, %zmm1
1010 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
1011 ; AVX512VLVBMI2-NEXT: retq
1012 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> <i64 4, i64 14, i64 50, i64 60, i64 4, i64 14, i64 50, i64 60>)
1016 define <16 x i32> @constant_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
1017 ; AVX512F-LABEL: constant_funnnel_v16i32:
1019 ; AVX512F-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1
1020 ; AVX512F-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1021 ; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
1022 ; AVX512F-NEXT: retq
1024 ; AVX512VL-LABEL: constant_funnnel_v16i32:
1025 ; AVX512VL: # %bb.0:
1026 ; AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1
1027 ; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1028 ; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
1029 ; AVX512VL-NEXT: retq
1031 ; AVX512BW-LABEL: constant_funnnel_v16i32:
1032 ; AVX512BW: # %bb.0:
1033 ; AVX512BW-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1
1034 ; AVX512BW-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1035 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
1036 ; AVX512BW-NEXT: retq
1038 ; AVX512VBMI2-LABEL: constant_funnnel_v16i32:
1039 ; AVX512VBMI2: # %bb.0:
1040 ; AVX512VBMI2-NEXT: vpshrdvd {{.*}}(%rip), %zmm0, %zmm1
1041 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
1042 ; AVX512VBMI2-NEXT: retq
1044 ; AVX512VLBW-LABEL: constant_funnnel_v16i32:
1045 ; AVX512VLBW: # %bb.0:
1046 ; AVX512VLBW-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1
1047 ; AVX512VLBW-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1048 ; AVX512VLBW-NEXT: vpord %zmm1, %zmm0, %zmm0
1049 ; AVX512VLBW-NEXT: retq
1051 ; AVX512VLVBMI2-LABEL: constant_funnnel_v16i32:
1052 ; AVX512VLVBMI2: # %bb.0:
1053 ; AVX512VLVBMI2-NEXT: vpshrdvd {{.*}}(%rip), %zmm0, %zmm1
1054 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
1055 ; AVX512VLVBMI2-NEXT: retq
1056 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>)
1060 define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
1061 ; AVX512F-LABEL: constant_funnnel_v32i16:
1063 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
1064 ; AVX512F-NEXT: vpmulhuw %ymm4, %ymm2, %ymm5
1065 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm6 = ymm2[0],ymm5[1,2,3,4,5,6,7],ymm2[8],ymm5[9,10,11,12,13,14,15]
1066 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
1067 ; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm0
1068 ; AVX512F-NEXT: vpor %ymm5, %ymm0, %ymm0
1069 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0],ymm0[1,2,3,4,5,6,7],ymm2[8],ymm0[9,10,11,12,13,14,15]
1070 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
1071 ; AVX512F-NEXT: vpmulhuw %ymm4, %ymm3, %ymm2
1072 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm5 = ymm3[0],ymm2[1,2,3,4,5,6,7],ymm3[8],ymm2[9,10,11,12,13,14,15]
1073 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
1074 ; AVX512F-NEXT: vpmullw %ymm4, %ymm1, %ymm1
1075 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
1076 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm2 = ymm3[0],ymm1[1,2,3,4,5,6,7],ymm3[8],ymm1[9,10,11,12,13,14,15]
1077 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
1078 ; AVX512F-NEXT: retq
1080 ; AVX512VL-LABEL: constant_funnnel_v32i16:
1081 ; AVX512VL: # %bb.0:
1082 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
1083 ; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm2, %ymm5
1084 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm6 = ymm2[0],ymm5[1,2,3,4,5,6,7],ymm2[8],ymm5[9,10,11,12,13,14,15]
1085 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
1086 ; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm0
1087 ; AVX512VL-NEXT: vpor %ymm5, %ymm0, %ymm0
1088 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0],ymm0[1,2,3,4,5,6,7],ymm2[8],ymm0[9,10,11,12,13,14,15]
1089 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
1090 ; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm3, %ymm2
1091 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm5 = ymm3[0],ymm2[1,2,3,4,5,6,7],ymm3[8],ymm2[9,10,11,12,13,14,15]
1092 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
1093 ; AVX512VL-NEXT: vpmullw %ymm4, %ymm1, %ymm1
1094 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
1095 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm2 = ymm3[0],ymm1[1,2,3,4,5,6,7],ymm3[8],ymm1[9,10,11,12,13,14,15]
1096 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
1097 ; AVX512VL-NEXT: retq
1099 ; AVX512BW-LABEL: constant_funnnel_v32i16:
1100 ; AVX512BW: # %bb.0:
1101 ; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm1, %zmm2
1102 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
1103 ; AVX512BW-NEXT: vporq %zmm2, %zmm0, %zmm0
1104 ; AVX512BW-NEXT: movl $65537, %eax # imm = 0x10001
1105 ; AVX512BW-NEXT: kmovd %eax, %k1
1106 ; AVX512BW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
1107 ; AVX512BW-NEXT: retq
1109 ; AVX512VBMI2-LABEL: constant_funnnel_v32i16:
1110 ; AVX512VBMI2: # %bb.0:
1111 ; AVX512VBMI2-NEXT: vpshrdvw {{.*}}(%rip), %zmm0, %zmm1
1112 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
1113 ; AVX512VBMI2-NEXT: retq
1115 ; AVX512VLBW-LABEL: constant_funnnel_v32i16:
1116 ; AVX512VLBW: # %bb.0:
1117 ; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %zmm1, %zmm2
1118 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
1119 ; AVX512VLBW-NEXT: vporq %zmm2, %zmm0, %zmm0
1120 ; AVX512VLBW-NEXT: movl $65537, %eax # imm = 0x10001
1121 ; AVX512VLBW-NEXT: kmovd %eax, %k1
1122 ; AVX512VLBW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
1123 ; AVX512VLBW-NEXT: retq
1125 ; AVX512VLVBMI2-LABEL: constant_funnnel_v32i16:
1126 ; AVX512VLVBMI2: # %bb.0:
1127 ; AVX512VLVBMI2-NEXT: vpshrdvw {{.*}}(%rip), %zmm0, %zmm1
1128 ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
1129 ; AVX512VLVBMI2-NEXT: retq
1130 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
1134 define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
1135 ; AVX512F-LABEL: constant_funnnel_v64i8:
1137 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
1138 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1139 ; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
1140 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
1141 ; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm0, %ymm0
1142 ; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm4
1143 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
1144 ; AVX512F-NEXT: vpand %ymm7, %ymm4, %ymm4
1145 ; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm8
1146 ; AVX512F-NEXT: vpblendvb %ymm8, %ymm4, %ymm0, %ymm0
1147 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm4
1148 ; AVX512F-NEXT: vpaddb %ymm8, %ymm8, %ymm9
1149 ; AVX512F-NEXT: vpblendvb %ymm9, %ymm4, %ymm0, %ymm0
1150 ; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
1151 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm10 = ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15],ymm2[24],ymm4[24],ymm2[25],ymm4[25],ymm2[26],ymm4[26],ymm2[27],ymm4[27],ymm2[28],ymm4[28],ymm2[29],ymm4[29],ymm2[30],ymm4[30],ymm2[31],ymm4[31]
1152 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm11 = [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128]
1153 ; AVX512F-NEXT: # ymm11 = mem[0,1,0,1]
1154 ; AVX512F-NEXT: vpmullw %ymm11, %ymm10, %ymm10
1155 ; AVX512F-NEXT: vpsrlw $8, %ymm10, %ymm10
1156 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm12 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[16],ymm4[16],ymm2[17],ymm4[17],ymm2[18],ymm4[18],ymm2[19],ymm4[19],ymm2[20],ymm4[20],ymm2[21],ymm4[21],ymm2[22],ymm4[22],ymm2[23],ymm4[23]
1157 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm13 = [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2]
1158 ; AVX512F-NEXT: # ymm13 = mem[0,1,0,1]
1159 ; AVX512F-NEXT: vpmullw %ymm13, %ymm12, %ymm12
1160 ; AVX512F-NEXT: vpsrlw $8, %ymm12, %ymm12
1161 ; AVX512F-NEXT: vpackuswb %ymm10, %ymm12, %ymm10
1162 ; AVX512F-NEXT: vpor %ymm10, %ymm0, %ymm0
1163 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm10 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
1164 ; AVX512F-NEXT: vpblendvb %ymm10, %ymm0, %ymm2, %ymm0
1165 ; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
1166 ; AVX512F-NEXT: vpand %ymm5, %ymm2, %ymm2
1167 ; AVX512F-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
1168 ; AVX512F-NEXT: vpsllw $2, %ymm1, %ymm2
1169 ; AVX512F-NEXT: vpand %ymm7, %ymm2, %ymm2
1170 ; AVX512F-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
1171 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm2
1172 ; AVX512F-NEXT: vpblendvb %ymm9, %ymm2, %ymm1, %ymm1
1173 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15],ymm3[24],ymm4[24],ymm3[25],ymm4[25],ymm3[26],ymm4[26],ymm3[27],ymm4[27],ymm3[28],ymm4[28],ymm3[29],ymm4[29],ymm3[30],ymm4[30],ymm3[31],ymm4[31]
1174 ; AVX512F-NEXT: vpmullw %ymm11, %ymm2, %ymm2
1175 ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
1176 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[16],ymm4[16],ymm3[17],ymm4[17],ymm3[18],ymm4[18],ymm3[19],ymm4[19],ymm3[20],ymm4[20],ymm3[21],ymm4[21],ymm3[22],ymm4[22],ymm3[23],ymm4[23]
1177 ; AVX512F-NEXT: vpmullw %ymm13, %ymm4, %ymm4
1178 ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
1179 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm4, %ymm2
1180 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
1181 ; AVX512F-NEXT: vpblendvb %ymm10, %ymm1, %ymm3, %ymm1
1182 ; AVX512F-NEXT: retq
1184 ; AVX512VL-LABEL: constant_funnnel_v64i8:
1185 ; AVX512VL: # %bb.0:
1186 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
1187 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1188 ; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
1189 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
1190 ; AVX512VL-NEXT: vpblendvb %ymm6, %ymm4, %ymm0, %ymm0
1191 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm4
1192 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
1193 ; AVX512VL-NEXT: vpand %ymm7, %ymm4, %ymm4
1194 ; AVX512VL-NEXT: vpaddb %ymm6, %ymm6, %ymm8
1195 ; AVX512VL-NEXT: vpblendvb %ymm8, %ymm4, %ymm0, %ymm0
1196 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm4
1197 ; AVX512VL-NEXT: vpaddb %ymm8, %ymm8, %ymm9
1198 ; AVX512VL-NEXT: vpblendvb %ymm9, %ymm4, %ymm0, %ymm0
1199 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1200 ; AVX512VL-NEXT: vpsrlw $8, %ymm4, %ymm4
1201 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm10 = [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128]
1202 ; AVX512VL-NEXT: # ymm10 = mem[0,1,0,1]
1203 ; AVX512VL-NEXT: vpmullw %ymm10, %ymm4, %ymm4
1204 ; AVX512VL-NEXT: vpsrlw $8, %ymm4, %ymm4
1205 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm11 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1206 ; AVX512VL-NEXT: vpsrlw $8, %ymm11, %ymm11
1207 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm12 = [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2]
1208 ; AVX512VL-NEXT: # ymm12 = mem[0,1,0,1]
1209 ; AVX512VL-NEXT: vpmullw %ymm12, %ymm11, %ymm11
1210 ; AVX512VL-NEXT: vpsrlw $8, %ymm11, %ymm11
1211 ; AVX512VL-NEXT: vpackuswb %ymm4, %ymm11, %ymm4
1212 ; AVX512VL-NEXT: vpor %ymm4, %ymm0, %ymm0
1213 ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm4 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
1214 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
1215 ; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
1216 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
1217 ; AVX512VL-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
1218 ; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm2
1219 ; AVX512VL-NEXT: vpand %ymm7, %ymm2, %ymm2
1220 ; AVX512VL-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
1221 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm2
1222 ; AVX512VL-NEXT: vpblendvb %ymm9, %ymm2, %ymm1, %ymm1
1223 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1224 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
1225 ; AVX512VL-NEXT: vpmullw %ymm10, %ymm2, %ymm2
1226 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
1227 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1228 ; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
1229 ; AVX512VL-NEXT: vpmullw %ymm12, %ymm5, %ymm5
1230 ; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
1231 ; AVX512VL-NEXT: vpackuswb %ymm2, %ymm5, %ymm2
1232 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
1233 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1
1234 ; AVX512VL-NEXT: retq
1236 ; AVX512BW-LABEL: constant_funnnel_v64i8:
1237 ; AVX512BW: # %bb.0:
1238 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
1239 ; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
1240 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm3
1241 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
1242 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1243 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm3
1244 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
1245 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1246 ; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
1247 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1248 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1249 ; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
1250 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
1251 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
1252 ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
1253 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm2, %zmm2
1254 ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
1255 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
1256 ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3
1257 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm3, %zmm3
1258 ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3
1259 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm3, %zmm2
1260 ; AVX512BW-NEXT: vporq %zmm2, %zmm0, %zmm0
1261 ; AVX512BW-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
1262 ; AVX512BW-NEXT: kmovq %rax, %k1
1263 ; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
1264 ; AVX512BW-NEXT: retq
1266 ; AVX512VBMI2-LABEL: constant_funnnel_v64i8:
1267 ; AVX512VBMI2: # %bb.0:
1268 ; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
1269 ; AVX512VBMI2-NEXT: vpmovb2m %zmm2, %k1
1270 ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm3
1271 ; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
1272 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1273 ; AVX512VBMI2-NEXT: vpsllw $2, %zmm0, %zmm3
1274 ; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
1275 ; AVX512VBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1276 ; AVX512VBMI2-NEXT: vpmovb2m %zmm2, %k1
1277 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1278 ; AVX512VBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1279 ; AVX512VBMI2-NEXT: vpmovb2m %zmm2, %k1
1280 ; AVX512VBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
1281 ; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
1282 ; AVX512VBMI2-NEXT: vpsrlw $8, %zmm2, %zmm2
1283 ; AVX512VBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm2, %zmm2
1284 ; AVX512VBMI2-NEXT: vpsrlw $8, %zmm2, %zmm2
1285 ; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
1286 ; AVX512VBMI2-NEXT: vpsrlw $8, %zmm3, %zmm3
1287 ; AVX512VBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm3, %zmm3
1288 ; AVX512VBMI2-NEXT: vpsrlw $8, %zmm3, %zmm3
1289 ; AVX512VBMI2-NEXT: vpackuswb %zmm2, %zmm3, %zmm2
1290 ; AVX512VBMI2-NEXT: vporq %zmm2, %zmm0, %zmm0
1291 ; AVX512VBMI2-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
1292 ; AVX512VBMI2-NEXT: kmovq %rax, %k1
1293 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
1294 ; AVX512VBMI2-NEXT: retq
1296 ; AVX512VLBW-LABEL: constant_funnnel_v64i8:
1297 ; AVX512VLBW: # %bb.0:
1298 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
1299 ; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
1300 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm3
1301 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
1302 ; AVX512VLBW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1303 ; AVX512VLBW-NEXT: vpsllw $2, %zmm0, %zmm3
1304 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
1305 ; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1306 ; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
1307 ; AVX512VLBW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1308 ; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1309 ; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
1310 ; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
1311 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
1312 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm2, %zmm2
1313 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm2, %zmm2
1314 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm2, %zmm2
1315 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
1316 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm3, %zmm3
1317 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm3, %zmm3
1318 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm3, %zmm3
1319 ; AVX512VLBW-NEXT: vpackuswb %zmm2, %zmm3, %zmm2
1320 ; AVX512VLBW-NEXT: vporq %zmm2, %zmm0, %zmm0
1321 ; AVX512VLBW-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
1322 ; AVX512VLBW-NEXT: kmovq %rax, %k1
1323 ; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
1324 ; AVX512VLBW-NEXT: retq
1326 ; AVX512VLVBMI2-LABEL: constant_funnnel_v64i8:
1327 ; AVX512VLVBMI2: # %bb.0:
1328 ; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
1329 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm2, %k1
1330 ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm3
1331 ; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
1332 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1333 ; AVX512VLVBMI2-NEXT: vpsllw $2, %zmm0, %zmm3
1334 ; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
1335 ; AVX512VLVBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1336 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm2, %k1
1337 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
1338 ; AVX512VLVBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1339 ; AVX512VLVBMI2-NEXT: vpmovb2m %zmm2, %k1
1340 ; AVX512VLVBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
1341 ; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
1342 ; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm2, %zmm2
1343 ; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm2, %zmm2
1344 ; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm2, %zmm2
1345 ; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
1346 ; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm3, %zmm3
1347 ; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm3, %zmm3
1348 ; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm3, %zmm3
1349 ; AVX512VLVBMI2-NEXT: vpackuswb %zmm2, %zmm3, %zmm2
1350 ; AVX512VLVBMI2-NEXT: vporq %zmm2, %zmm0, %zmm0
1351 ; AVX512VLVBMI2-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
1352 ; AVX512VLVBMI2-NEXT: kmovq %rax, %k1
1353 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
1354 ; AVX512VLVBMI2-NEXT: retq
1355 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
1360 ; Uniform Constant Shifts
1363 define <8 x i64> @splatconstant_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
1364 ; AVX512F-LABEL: splatconstant_funnnel_v8i64:
1366 ; AVX512F-NEXT: vpsrlq $14, %zmm1, %zmm1
1367 ; AVX512F-NEXT: vpsllq $50, %zmm0, %zmm0
1368 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
1369 ; AVX512F-NEXT: retq
1371 ; AVX512VL-LABEL: splatconstant_funnnel_v8i64:
1372 ; AVX512VL: # %bb.0:
1373 ; AVX512VL-NEXT: vpsrlq $14, %zmm1, %zmm1
1374 ; AVX512VL-NEXT: vpsllq $50, %zmm0, %zmm0
1375 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
1376 ; AVX512VL-NEXT: retq
1378 ; AVX512BW-LABEL: splatconstant_funnnel_v8i64:
1379 ; AVX512BW: # %bb.0:
1380 ; AVX512BW-NEXT: vpsrlq $14, %zmm1, %zmm1
1381 ; AVX512BW-NEXT: vpsllq $50, %zmm0, %zmm0
1382 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1383 ; AVX512BW-NEXT: retq
1385 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i64:
1386 ; AVX512VBMI2: # %bb.0:
1387 ; AVX512VBMI2-NEXT: vpshrdq $14, %zmm0, %zmm1, %zmm0
1388 ; AVX512VBMI2-NEXT: retq
1390 ; AVX512VLBW-LABEL: splatconstant_funnnel_v8i64:
1391 ; AVX512VLBW: # %bb.0:
1392 ; AVX512VLBW-NEXT: vpsrlq $14, %zmm1, %zmm1
1393 ; AVX512VLBW-NEXT: vpsllq $50, %zmm0, %zmm0
1394 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1395 ; AVX512VLBW-NEXT: retq
1397 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i64:
1398 ; AVX512VLVBMI2: # %bb.0:
1399 ; AVX512VLVBMI2-NEXT: vpshrdq $14, %zmm0, %zmm1, %zmm0
1400 ; AVX512VLVBMI2-NEXT: retq
1401 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> <i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14>)
1405 define <16 x i32> @splatconstant_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
1406 ; AVX512F-LABEL: splatconstant_funnnel_v16i32:
1408 ; AVX512F-NEXT: vpsrld $4, %zmm1, %zmm1
1409 ; AVX512F-NEXT: vpslld $28, %zmm0, %zmm0
1410 ; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
1411 ; AVX512F-NEXT: retq
1413 ; AVX512VL-LABEL: splatconstant_funnnel_v16i32:
1414 ; AVX512VL: # %bb.0:
1415 ; AVX512VL-NEXT: vpsrld $4, %zmm1, %zmm1
1416 ; AVX512VL-NEXT: vpslld $28, %zmm0, %zmm0
1417 ; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
1418 ; AVX512VL-NEXT: retq
1420 ; AVX512BW-LABEL: splatconstant_funnnel_v16i32:
1421 ; AVX512BW: # %bb.0:
1422 ; AVX512BW-NEXT: vpsrld $4, %zmm1, %zmm1
1423 ; AVX512BW-NEXT: vpslld $28, %zmm0, %zmm0
1424 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
1425 ; AVX512BW-NEXT: retq
1427 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i32:
1428 ; AVX512VBMI2: # %bb.0:
1429 ; AVX512VBMI2-NEXT: vpshrdd $4, %zmm0, %zmm1, %zmm0
1430 ; AVX512VBMI2-NEXT: retq
1432 ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i32:
1433 ; AVX512VLBW: # %bb.0:
1434 ; AVX512VLBW-NEXT: vpsrld $4, %zmm1, %zmm1
1435 ; AVX512VLBW-NEXT: vpslld $28, %zmm0, %zmm0
1436 ; AVX512VLBW-NEXT: vpord %zmm1, %zmm0, %zmm0
1437 ; AVX512VLBW-NEXT: retq
1439 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i32:
1440 ; AVX512VLVBMI2: # %bb.0:
1441 ; AVX512VLVBMI2-NEXT: vpshrdd $4, %zmm0, %zmm1, %zmm0
1442 ; AVX512VLVBMI2-NEXT: retq
1443 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>)
1447 define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
1448 ; AVX512F-LABEL: splatconstant_funnnel_v32i16:
1450 ; AVX512F-NEXT: vpsrlw $7, %ymm2, %ymm2
1451 ; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm0
1452 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
1453 ; AVX512F-NEXT: vpsrlw $7, %ymm3, %ymm2
1454 ; AVX512F-NEXT: vpsllw $9, %ymm1, %ymm1
1455 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
1456 ; AVX512F-NEXT: retq
1458 ; AVX512VL-LABEL: splatconstant_funnnel_v32i16:
1459 ; AVX512VL: # %bb.0:
1460 ; AVX512VL-NEXT: vpsrlw $7, %ymm2, %ymm2
1461 ; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm0
1462 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
1463 ; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm2
1464 ; AVX512VL-NEXT: vpsllw $9, %ymm1, %ymm1
1465 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
1466 ; AVX512VL-NEXT: retq
1468 ; AVX512BW-LABEL: splatconstant_funnnel_v32i16:
1469 ; AVX512BW: # %bb.0:
1470 ; AVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1
1471 ; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm0
1472 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1473 ; AVX512BW-NEXT: retq
1475 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i16:
1476 ; AVX512VBMI2: # %bb.0:
1477 ; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm1, %zmm0
1478 ; AVX512VBMI2-NEXT: retq
1480 ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16:
1481 ; AVX512VLBW: # %bb.0:
1482 ; AVX512VLBW-NEXT: vpsrlw $7, %zmm1, %zmm1
1483 ; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm0
1484 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1485 ; AVX512VLBW-NEXT: retq
1487 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i16:
1488 ; AVX512VLVBMI2: # %bb.0:
1489 ; AVX512VLVBMI2-NEXT: vpshrdw $7, %zmm0, %zmm1, %zmm0
1490 ; AVX512VLVBMI2-NEXT: retq
1491 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1495 define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
1496 ; AVX512F-LABEL: splatconstant_funnnel_v64i8:
1498 ; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
1499 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1500 ; AVX512F-NEXT: vpandn %ymm2, %ymm4, %ymm2
1501 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
1502 ; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
1503 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
1504 ; AVX512F-NEXT: vpsrlw $4, %ymm3, %ymm2
1505 ; AVX512F-NEXT: vpandn %ymm2, %ymm4, %ymm2
1506 ; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1
1507 ; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
1508 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
1509 ; AVX512F-NEXT: retq
1511 ; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
1512 ; AVX512VL: # %bb.0:
1513 ; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
1514 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1515 ; AVX512VL-NEXT: vpandn %ymm2, %ymm4, %ymm2
1516 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
1517 ; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
1518 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
1519 ; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm2
1520 ; AVX512VL-NEXT: vpandn %ymm2, %ymm4, %ymm2
1521 ; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
1522 ; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1
1523 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
1524 ; AVX512VL-NEXT: retq
1526 ; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
1527 ; AVX512BW: # %bb.0:
1528 ; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1
1529 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
1530 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
1531 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1532 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1533 ; AVX512BW-NEXT: retq
1535 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
1536 ; AVX512VBMI2: # %bb.0:
1537 ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
1538 ; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
1539 ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
1540 ; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1541 ; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
1542 ; AVX512VBMI2-NEXT: retq
1544 ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
1545 ; AVX512VLBW: # %bb.0:
1546 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1
1547 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
1548 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
1549 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1550 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1551 ; AVX512VLBW-NEXT: retq
1553 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
1554 ; AVX512VLVBMI2: # %bb.0:
1555 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
1556 ; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
1557 ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
1558 ; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1559 ; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
1560 ; AVX512VLVBMI2-NEXT: retq
1561 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)