1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
7 declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
8 declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
9 declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
10 declare <64 x i8> @llvm.fshr.v64i8(<64 x i8>, <64 x i8>, <64 x i8>)
16 define <8 x i64> @var_funnnel_v8i64(<8 x i64> %x, <8 x i64> %amt) nounwind {
17 ; AVX512-LABEL: var_funnnel_v8i64:
19 ; AVX512-NEXT: vprorvq %zmm1, %zmm0, %zmm0
21 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %x, <8 x i64> %amt)
25 define <16 x i32> @var_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounwind {
26 ; AVX512-LABEL: var_funnnel_v16i32:
28 ; AVX512-NEXT: vprorvd %zmm1, %zmm0, %zmm0
30 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %x, <16 x i32> %amt)
34 define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
35 ; AVX512F-LABEL: var_funnnel_v32i16:
37 ; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
38 ; AVX512F-NEXT: vpsubw %ymm2, %ymm4, %ymm2
39 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
40 ; AVX512F-NEXT: vpand %ymm5, %ymm2, %ymm2
41 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm6 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
42 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
43 ; AVX512F-NEXT: vpsllvd %zmm6, %zmm0, %zmm6
44 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
45 ; AVX512F-NEXT: vpsubw %ymm2, %ymm7, %ymm2
46 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
47 ; AVX512F-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0
48 ; AVX512F-NEXT: vpord %zmm0, %zmm6, %zmm0
49 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
50 ; AVX512F-NEXT: vpsubw %ymm3, %ymm4, %ymm2
51 ; AVX512F-NEXT: vpand %ymm5, %ymm2, %ymm2
52 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
53 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
54 ; AVX512F-NEXT: vpsllvd %zmm3, %zmm1, %zmm3
55 ; AVX512F-NEXT: vpsubw %ymm2, %ymm7, %ymm2
56 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
57 ; AVX512F-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1
58 ; AVX512F-NEXT: vpord %zmm1, %zmm3, %zmm1
59 ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
62 ; AVX512VL-LABEL: var_funnnel_v32i16:
64 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
65 ; AVX512VL-NEXT: vpsubw %ymm2, %ymm4, %ymm2
66 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
67 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
68 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm6 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
69 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
70 ; AVX512VL-NEXT: vpsllvd %zmm6, %zmm0, %zmm6
71 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
72 ; AVX512VL-NEXT: vpsubw %ymm2, %ymm7, %ymm2
73 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
74 ; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0
75 ; AVX512VL-NEXT: vpord %zmm0, %zmm6, %zmm0
76 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
77 ; AVX512VL-NEXT: vpsubw %ymm3, %ymm4, %ymm2
78 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
79 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
80 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
81 ; AVX512VL-NEXT: vpsllvd %zmm3, %zmm1, %zmm3
82 ; AVX512VL-NEXT: vpsubw %ymm2, %ymm7, %ymm2
83 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
84 ; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1
85 ; AVX512VL-NEXT: vpord %zmm1, %zmm3, %zmm1
86 ; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1
89 ; AVX512BW-LABEL: var_funnnel_v32i16:
91 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
92 ; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3
93 ; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
94 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
95 ; AVX512BW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
96 ; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
97 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
98 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
101 ; AVX512VLBW-LABEL: var_funnnel_v32i16:
102 ; AVX512VLBW: # %bb.0:
103 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
104 ; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3
105 ; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
106 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
107 ; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
108 ; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
109 ; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
110 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
111 ; AVX512VLBW-NEXT: retq
112 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %amt)
116 define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
117 ; AVX512F-LABEL: var_funnnel_v64i8:
119 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4
120 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
121 ; AVX512F-NEXT: vpandn %ymm4, %ymm5, %ymm4
122 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm6
123 ; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm6
124 ; AVX512F-NEXT: vpor %ymm4, %ymm6, %ymm4
125 ; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
126 ; AVX512F-NEXT: vpsubb %ymm2, %ymm6, %ymm2
127 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
128 ; AVX512F-NEXT: vpand %ymm7, %ymm2, %ymm2
129 ; AVX512F-NEXT: vpsllw $5, %ymm2, %ymm2
130 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
131 ; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm4
132 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
133 ; AVX512F-NEXT: vpandn %ymm4, %ymm8, %ymm4
134 ; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm9
135 ; AVX512F-NEXT: vpand %ymm8, %ymm9, %ymm9
136 ; AVX512F-NEXT: vpor %ymm4, %ymm9, %ymm4
137 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
138 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
139 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm4
140 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
141 ; AVX512F-NEXT: vpand %ymm9, %ymm4, %ymm4
142 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm10
143 ; AVX512F-NEXT: vpor %ymm4, %ymm10, %ymm4
144 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
145 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
146 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2
147 ; AVX512F-NEXT: vpandn %ymm2, %ymm5, %ymm2
148 ; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm4
149 ; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
150 ; AVX512F-NEXT: vpor %ymm2, %ymm4, %ymm2
151 ; AVX512F-NEXT: vpsubb %ymm3, %ymm6, %ymm3
152 ; AVX512F-NEXT: vpand %ymm7, %ymm3, %ymm3
153 ; AVX512F-NEXT: vpsllw $5, %ymm3, %ymm3
154 ; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
155 ; AVX512F-NEXT: vpsrlw $6, %ymm1, %ymm2
156 ; AVX512F-NEXT: vpandn %ymm2, %ymm8, %ymm2
157 ; AVX512F-NEXT: vpsllw $2, %ymm1, %ymm4
158 ; AVX512F-NEXT: vpand %ymm8, %ymm4, %ymm4
159 ; AVX512F-NEXT: vpor %ymm2, %ymm4, %ymm2
160 ; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm3
161 ; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
162 ; AVX512F-NEXT: vpsrlw $7, %ymm1, %ymm2
163 ; AVX512F-NEXT: vpand %ymm9, %ymm2, %ymm2
164 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm4
165 ; AVX512F-NEXT: vpor %ymm2, %ymm4, %ymm2
166 ; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm3
167 ; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
170 ; AVX512VL-LABEL: var_funnnel_v64i8:
172 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4
173 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
174 ; AVX512VL-NEXT: vpandn %ymm4, %ymm5, %ymm4
175 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm6
176 ; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6
177 ; AVX512VL-NEXT: vpor %ymm4, %ymm6, %ymm4
178 ; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
179 ; AVX512VL-NEXT: vpsubb %ymm2, %ymm6, %ymm2
180 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
181 ; AVX512VL-NEXT: vpand %ymm7, %ymm2, %ymm2
182 ; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2
183 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
184 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm4
185 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
186 ; AVX512VL-NEXT: vpandn %ymm4, %ymm8, %ymm4
187 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm9
188 ; AVX512VL-NEXT: vpand %ymm8, %ymm9, %ymm9
189 ; AVX512VL-NEXT: vpor %ymm4, %ymm9, %ymm4
190 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
191 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
192 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm4
193 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
194 ; AVX512VL-NEXT: vpand %ymm9, %ymm4, %ymm4
195 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm10
196 ; AVX512VL-NEXT: vpor %ymm4, %ymm10, %ymm4
197 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
198 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
199 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
200 ; AVX512VL-NEXT: vpandn %ymm2, %ymm5, %ymm2
201 ; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm4
202 ; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
203 ; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2
204 ; AVX512VL-NEXT: vpsubb %ymm3, %ymm6, %ymm3
205 ; AVX512VL-NEXT: vpand %ymm7, %ymm3, %ymm3
206 ; AVX512VL-NEXT: vpsllw $5, %ymm3, %ymm3
207 ; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
208 ; AVX512VL-NEXT: vpsrlw $6, %ymm1, %ymm2
209 ; AVX512VL-NEXT: vpandn %ymm2, %ymm8, %ymm2
210 ; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm4
211 ; AVX512VL-NEXT: vpand %ymm8, %ymm4, %ymm4
212 ; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2
213 ; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm3
214 ; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
215 ; AVX512VL-NEXT: vpsrlw $7, %ymm1, %ymm2
216 ; AVX512VL-NEXT: vpand %ymm9, %ymm2, %ymm2
217 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm4
218 ; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2
219 ; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm3
220 ; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
221 ; AVX512VL-NEXT: retq
223 ; AVX512BW-LABEL: var_funnnel_v64i8:
225 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
226 ; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3
227 ; AVX512BW-NEXT: vpsllw $5, %zmm3, %zmm3
228 ; AVX512BW-NEXT: vpaddb %zmm3, %zmm3, %zmm4
229 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
230 ; AVX512BW-NEXT: vpmovb2m %zmm3, %k2
231 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm3
232 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
233 ; AVX512BW-NEXT: vpblendmb %zmm3, %zmm0, %zmm3 {%k2}
234 ; AVX512BW-NEXT: vpsrlw $2, %zmm3, %zmm5
235 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
236 ; AVX512BW-NEXT: vmovdqu8 %zmm5, %zmm3 {%k1}
237 ; AVX512BW-NEXT: vpsrlw $1, %zmm3, %zmm5
238 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
239 ; AVX512BW-NEXT: vpaddb %zmm4, %zmm4, %zmm4
240 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
241 ; AVX512BW-NEXT: vmovdqu8 %zmm5, %zmm3 {%k1}
242 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
243 ; AVX512BW-NEXT: vpsubb %zmm1, %zmm4, %zmm1
244 ; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
245 ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
246 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm2
247 ; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
248 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k2
249 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
250 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
251 ; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k2}
252 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm1
253 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
254 ; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
255 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm1
256 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
257 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
258 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
259 ; AVX512BW-NEXT: retq
261 ; AVX512VLBW-LABEL: var_funnnel_v64i8:
262 ; AVX512VLBW: # %bb.0:
263 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
264 ; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3
265 ; AVX512VLBW-NEXT: vpsllw $5, %zmm3, %zmm3
266 ; AVX512VLBW-NEXT: vpaddb %zmm3, %zmm3, %zmm4
267 ; AVX512VLBW-NEXT: vpmovb2m %zmm4, %k1
268 ; AVX512VLBW-NEXT: vpmovb2m %zmm3, %k2
269 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm3
270 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
271 ; AVX512VLBW-NEXT: vpblendmb %zmm3, %zmm0, %zmm3 {%k2}
272 ; AVX512VLBW-NEXT: vpsrlw $2, %zmm3, %zmm5
273 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
274 ; AVX512VLBW-NEXT: vmovdqu8 %zmm5, %zmm3 {%k1}
275 ; AVX512VLBW-NEXT: vpsrlw $1, %zmm3, %zmm5
276 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
277 ; AVX512VLBW-NEXT: vpaddb %zmm4, %zmm4, %zmm4
278 ; AVX512VLBW-NEXT: vpmovb2m %zmm4, %k1
279 ; AVX512VLBW-NEXT: vmovdqu8 %zmm5, %zmm3 {%k1}
280 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
281 ; AVX512VLBW-NEXT: vpsubb %zmm1, %zmm4, %zmm1
282 ; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
283 ; AVX512VLBW-NEXT: vpsllw $5, %zmm1, %zmm1
284 ; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm2
285 ; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
286 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k2
287 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
288 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
289 ; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k2}
290 ; AVX512VLBW-NEXT: vpsllw $2, %zmm0, %zmm1
291 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
292 ; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
293 ; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm1
294 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
295 ; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
296 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
297 ; AVX512VLBW-NEXT: retq
298 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> %amt)
303 ; Uniform Variable Shifts
306 define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %amt) nounwind {
307 ; AVX512-LABEL: splatvar_funnnel_v8i64:
309 ; AVX512-NEXT: vpbroadcastq %xmm1, %zmm1
310 ; AVX512-NEXT: vprorvq %zmm1, %zmm0, %zmm0
312 %splat = shufflevector <8 x i64> %amt, <8 x i64> undef, <8 x i32> zeroinitializer
313 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %x, <8 x i64> %splat)
317 define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounwind {
318 ; AVX512-LABEL: splatvar_funnnel_v16i32:
320 ; AVX512-NEXT: vpbroadcastd %xmm1, %zmm1
321 ; AVX512-NEXT: vprorvd %zmm1, %zmm0, %zmm0
323 %splat = shufflevector <16 x i32> %amt, <16 x i32> undef, <16 x i32> zeroinitializer
324 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %x, <16 x i32> %splat)
328 define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
329 ; AVX512F-LABEL: splatvar_funnnel_v32i16:
331 ; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm2
332 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
333 ; AVX512F-NEXT: vpsubw %xmm2, %xmm3, %xmm2
334 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
335 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
336 ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4
337 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
338 ; AVX512F-NEXT: vpsubw %xmm2, %xmm5, %xmm2
339 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
340 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
341 ; AVX512F-NEXT: vpor %ymm0, %ymm4, %ymm0
342 ; AVX512F-NEXT: vpsllw %xmm3, %ymm1, %ymm3
343 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
344 ; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
347 ; AVX512VL-LABEL: splatvar_funnnel_v32i16:
349 ; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm2
350 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
351 ; AVX512VL-NEXT: vpsubw %xmm2, %xmm3, %xmm2
352 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
353 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
354 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4
355 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
356 ; AVX512VL-NEXT: vpsubw %xmm2, %xmm5, %xmm2
357 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
358 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
359 ; AVX512VL-NEXT: vpor %ymm0, %ymm4, %ymm0
360 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm1, %ymm3
361 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
362 ; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
363 ; AVX512VL-NEXT: retq
365 ; AVX512BW-LABEL: splatvar_funnnel_v32i16:
367 ; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
368 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
369 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
370 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
371 ; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
372 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
373 ; AVX512BW-NEXT: vpsubw %xmm1, %xmm4, %xmm1
374 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
375 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
376 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
377 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
378 ; AVX512BW-NEXT: retq
380 ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
381 ; AVX512VLBW: # %bb.0:
382 ; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm1
383 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
384 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
385 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
386 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
387 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
388 ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm4, %xmm1
389 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
390 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
391 ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
392 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
393 ; AVX512VLBW-NEXT: retq
394 %splat = shufflevector <32 x i16> %amt, <32 x i16> undef, <32 x i32> zeroinitializer
395 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %splat)
399 define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
400 ; AVX512F-LABEL: splatvar_funnnel_v64i8:
402 ; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
403 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
404 ; AVX512F-NEXT: vpsubb %xmm2, %xmm3, %xmm2
405 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
406 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
407 ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4
408 ; AVX512F-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5
409 ; AVX512F-NEXT: vpsllw %xmm3, %ymm5, %ymm6
410 ; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6
411 ; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
412 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
413 ; AVX512F-NEXT: vpsubb %xmm2, %xmm7, %xmm2
414 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
415 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
416 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm5, %ymm5
417 ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
418 ; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5
419 ; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
420 ; AVX512F-NEXT: vpor %ymm0, %ymm4, %ymm0
421 ; AVX512F-NEXT: vpsllw %xmm3, %ymm1, %ymm3
422 ; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
423 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
424 ; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
425 ; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
428 ; AVX512VL-LABEL: splatvar_funnnel_v64i8:
430 ; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
431 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
432 ; AVX512VL-NEXT: vpsubb %xmm2, %xmm3, %xmm2
433 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
434 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
435 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4
436 ; AVX512VL-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5
437 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm5, %ymm6
438 ; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6
439 ; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
440 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
441 ; AVX512VL-NEXT: vpsubb %xmm2, %xmm7, %xmm2
442 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
443 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
444 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm5, %ymm5
445 ; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
446 ; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5
447 ; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0
448 ; AVX512VL-NEXT: vpor %ymm0, %ymm4, %ymm0
449 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm1, %ymm3
450 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
451 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
452 ; AVX512VL-NEXT: vpand %ymm5, %ymm1, %ymm1
453 ; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
454 ; AVX512VL-NEXT: retq
456 ; AVX512BW-LABEL: splatvar_funnnel_v64i8:
458 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
459 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
460 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
461 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
462 ; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
463 ; AVX512BW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5
464 ; AVX512BW-NEXT: vpsrlw %xmm3, %zmm5, %zmm3
465 ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3
466 ; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3
467 ; AVX512BW-NEXT: vpandq %zmm3, %zmm4, %zmm3
468 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
469 ; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
470 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
471 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
472 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
473 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm5, %zmm1
474 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
475 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
476 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
477 ; AVX512BW-NEXT: retq
479 ; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
480 ; AVX512VLBW: # %bb.0:
481 ; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1
482 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
483 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
484 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
485 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
486 ; AVX512VLBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5
487 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm5, %zmm3
488 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm3, %zmm3
489 ; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3
490 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm4, %zmm3
491 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
492 ; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
493 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
494 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
495 ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
496 ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm5, %zmm1
497 ; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1
498 ; AVX512VLBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
499 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
500 ; AVX512VLBW-NEXT: retq
501 %splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer
502 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> %splat)
510 define <8 x i64> @constant_funnnel_v8i64(<8 x i64> %x) nounwind {
511 ; AVX512-LABEL: constant_funnnel_v8i64:
513 ; AVX512-NEXT: vprorvq {{.*}}(%rip), %zmm0, %zmm0
515 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %x, <8 x i64> <i64 4, i64 14, i64 50, i64 60, i64 4, i64 14, i64 50, i64 60>)
519 define <16 x i32> @constant_funnnel_v16i32(<16 x i32> %x) nounwind {
520 ; AVX512-LABEL: constant_funnnel_v16i32:
522 ; AVX512-NEXT: vprorvd {{.*}}(%rip), %zmm0, %zmm0
524 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %x, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>)
528 define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
529 ; AVX512F-LABEL: constant_funnnel_v32i16:
531 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
532 ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm3
533 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm4 = ymm0[0],ymm3[1,2,3,4,5,6,7],ymm0[8],ymm3[9,10,11,12,13,14,15]
534 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
535 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
536 ; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm0
537 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
538 ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm2
539 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm1[0],ymm2[1,2,3,4,5,6,7],ymm1[8],ymm2[9,10,11,12,13,14,15]
540 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
541 ; AVX512F-NEXT: vpmullw %ymm4, %ymm1, %ymm1
542 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
545 ; AVX512VL-LABEL: constant_funnnel_v32i16:
547 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
548 ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm3
549 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm4 = ymm0[0],ymm3[1,2,3,4,5,6,7],ymm0[8],ymm3[9,10,11,12,13,14,15]
550 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
551 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
552 ; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm0
553 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
554 ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm2
555 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm1[0],ymm2[1,2,3,4,5,6,7],ymm1[8],ymm2[9,10,11,12,13,14,15]
556 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
557 ; AVX512VL-NEXT: vpmullw %ymm4, %ymm1, %ymm1
558 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
559 ; AVX512VL-NEXT: retq
561 ; AVX512BW-LABEL: constant_funnnel_v32i16:
563 ; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
564 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
565 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
566 ; AVX512BW-NEXT: retq
568 ; AVX512VLBW-LABEL: constant_funnnel_v32i16:
569 ; AVX512VLBW: # %bb.0:
570 ; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
571 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
572 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
573 ; AVX512VLBW-NEXT: retq
574 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
578 define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
579 ; AVX512F-LABEL: constant_funnnel_v64i8:
581 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
582 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
583 ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
584 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
585 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm2
586 ; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm5
587 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
588 ; AVX512F-NEXT: vpand %ymm6, %ymm5, %ymm5
589 ; AVX512F-NEXT: vpaddb %ymm4, %ymm4, %ymm7
590 ; AVX512F-NEXT: vpblendvb %ymm7, %ymm5, %ymm2, %ymm2
591 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm5
592 ; AVX512F-NEXT: vpaddb %ymm7, %ymm7, %ymm8
593 ; AVX512F-NEXT: vpblendvb %ymm8, %ymm5, %ymm2, %ymm2
594 ; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5
595 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm9 = ymm0[8],ymm5[8],ymm0[9],ymm5[9],ymm0[10],ymm5[10],ymm0[11],ymm5[11],ymm0[12],ymm5[12],ymm0[13],ymm5[13],ymm0[14],ymm5[14],ymm0[15],ymm5[15],ymm0[24],ymm5[24],ymm0[25],ymm5[25],ymm0[26],ymm5[26],ymm0[27],ymm5[27],ymm0[28],ymm5[28],ymm0[29],ymm5[29],ymm0[30],ymm5[30],ymm0[31],ymm5[31]
596 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm10 = [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128]
597 ; AVX512F-NEXT: # ymm10 = mem[0,1,0,1]
598 ; AVX512F-NEXT: vpmullw %ymm10, %ymm9, %ymm9
599 ; AVX512F-NEXT: vpsrlw $8, %ymm9, %ymm9
600 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[4],ymm5[4],ymm0[5],ymm5[5],ymm0[6],ymm5[6],ymm0[7],ymm5[7],ymm0[16],ymm5[16],ymm0[17],ymm5[17],ymm0[18],ymm5[18],ymm0[19],ymm5[19],ymm0[20],ymm5[20],ymm0[21],ymm5[21],ymm0[22],ymm5[22],ymm0[23],ymm5[23]
601 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm11 = [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2]
602 ; AVX512F-NEXT: # ymm11 = mem[0,1,0,1]
603 ; AVX512F-NEXT: vpmullw %ymm11, %ymm0, %ymm0
604 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
605 ; AVX512F-NEXT: vpackuswb %ymm9, %ymm0, %ymm0
606 ; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
607 ; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
608 ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
609 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm2
610 ; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm3
611 ; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
612 ; AVX512F-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
613 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm3
614 ; AVX512F-NEXT: vpblendvb %ymm8, %ymm3, %ymm2, %ymm2
615 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm5[8],ymm1[9],ymm5[9],ymm1[10],ymm5[10],ymm1[11],ymm5[11],ymm1[12],ymm5[12],ymm1[13],ymm5[13],ymm1[14],ymm5[14],ymm1[15],ymm5[15],ymm1[24],ymm5[24],ymm1[25],ymm5[25],ymm1[26],ymm5[26],ymm1[27],ymm5[27],ymm1[28],ymm5[28],ymm1[29],ymm5[29],ymm1[30],ymm5[30],ymm1[31],ymm5[31]
616 ; AVX512F-NEXT: vpmullw %ymm10, %ymm3, %ymm3
617 ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
618 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm5[0],ymm1[1],ymm5[1],ymm1[2],ymm5[2],ymm1[3],ymm5[3],ymm1[4],ymm5[4],ymm1[5],ymm5[5],ymm1[6],ymm5[6],ymm1[7],ymm5[7],ymm1[16],ymm5[16],ymm1[17],ymm5[17],ymm1[18],ymm5[18],ymm1[19],ymm5[19],ymm1[20],ymm5[20],ymm1[21],ymm5[21],ymm1[22],ymm5[22],ymm1[23],ymm5[23]
619 ; AVX512F-NEXT: vpmullw %ymm11, %ymm1, %ymm1
620 ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
621 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm1, %ymm1
622 ; AVX512F-NEXT: vpor %ymm1, %ymm2, %ymm1
625 ; AVX512VL-LABEL: constant_funnnel_v64i8:
627 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
628 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
629 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
630 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
631 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm2
632 ; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm5
633 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
634 ; AVX512VL-NEXT: vpand %ymm6, %ymm5, %ymm5
635 ; AVX512VL-NEXT: vpaddb %ymm4, %ymm4, %ymm7
636 ; AVX512VL-NEXT: vpblendvb %ymm7, %ymm5, %ymm2, %ymm2
637 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm5
638 ; AVX512VL-NEXT: vpaddb %ymm7, %ymm7, %ymm8
639 ; AVX512VL-NEXT: vpblendvb %ymm8, %ymm5, %ymm2, %ymm2
640 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
641 ; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
642 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm9 = [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128]
643 ; AVX512VL-NEXT: # ymm9 = mem[0,1,0,1]
644 ; AVX512VL-NEXT: vpmullw %ymm9, %ymm5, %ymm5
645 ; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
646 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
647 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
648 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm10 = [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2]
649 ; AVX512VL-NEXT: # ymm10 = mem[0,1,0,1]
650 ; AVX512VL-NEXT: vpmullw %ymm10, %ymm0, %ymm0
651 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
652 ; AVX512VL-NEXT: vpackuswb %ymm5, %ymm0, %ymm0
653 ; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
654 ; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
655 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
656 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm2
657 ; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm3
658 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
659 ; AVX512VL-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
660 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm3
661 ; AVX512VL-NEXT: vpblendvb %ymm8, %ymm3, %ymm2, %ymm2
662 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
663 ; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
664 ; AVX512VL-NEXT: vpmullw %ymm9, %ymm3, %ymm3
665 ; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
666 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
667 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
668 ; AVX512VL-NEXT: vpmullw %ymm10, %ymm1, %ymm1
669 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
670 ; AVX512VL-NEXT: vpackuswb %ymm3, %ymm1, %ymm1
671 ; AVX512VL-NEXT: vpor %ymm1, %ymm2, %ymm1
672 ; AVX512VL-NEXT: retq
674 ; AVX512BW-LABEL: constant_funnnel_v64i8:
676 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
677 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
678 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
679 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
680 ; AVX512BW-NEXT: vpblendmb %zmm2, %zmm0, %zmm2 {%k1}
681 ; AVX512BW-NEXT: vpsllw $2, %zmm2, %zmm3
682 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
683 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
684 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
685 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
686 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
687 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
688 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2 {%k1}
689 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
690 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
691 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm1, %zmm1
692 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
693 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
694 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
695 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
696 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
697 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
698 ; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
699 ; AVX512BW-NEXT: retq
701 ; AVX512VLBW-LABEL: constant_funnnel_v64i8:
702 ; AVX512VLBW: # %bb.0:
703 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
704 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
705 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
706 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
707 ; AVX512VLBW-NEXT: vpblendmb %zmm2, %zmm0, %zmm2 {%k1}
708 ; AVX512VLBW-NEXT: vpsllw $2, %zmm2, %zmm3
709 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
710 ; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
711 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
712 ; AVX512VLBW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
713 ; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
714 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
715 ; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm2 {%k1}
716 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
717 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
718 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm1, %zmm1
719 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
720 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
721 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm0, %zmm0
722 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
723 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm0, %zmm0
724 ; AVX512VLBW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
725 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
726 ; AVX512VLBW-NEXT: retq
727 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
732 ; Uniform Constant Shifts
735 define <8 x i64> @splatconstant_funnnel_v8i64(<8 x i64> %x) nounwind {
736 ; AVX512-LABEL: splatconstant_funnnel_v8i64:
738 ; AVX512-NEXT: vprorq $14, %zmm0, %zmm0
740 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %x, <8 x i64> <i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14>)
744 define <16 x i32> @splatconstant_funnnel_v16i32(<16 x i32> %x) nounwind {
745 ; AVX512-LABEL: splatconstant_funnnel_v16i32:
747 ; AVX512-NEXT: vprord $4, %zmm0, %zmm0
749 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %x, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>)
753 define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
754 ; AVX512F-LABEL: splatconstant_funnnel_v32i16:
756 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2
757 ; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm0
758 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
759 ; AVX512F-NEXT: vpsrlw $7, %ymm1, %ymm2
760 ; AVX512F-NEXT: vpsllw $9, %ymm1, %ymm1
761 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
764 ; AVX512VL-LABEL: splatconstant_funnnel_v32i16:
766 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2
767 ; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm0
768 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
769 ; AVX512VL-NEXT: vpsrlw $7, %ymm1, %ymm2
770 ; AVX512VL-NEXT: vpsllw $9, %ymm1, %ymm1
771 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
772 ; AVX512VL-NEXT: retq
774 ; AVX512BW-LABEL: splatconstant_funnnel_v32i16:
776 ; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm1
777 ; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm0
778 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
779 ; AVX512BW-NEXT: retq
781 ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16:
782 ; AVX512VLBW: # %bb.0:
783 ; AVX512VLBW-NEXT: vpsrlw $7, %zmm0, %zmm1
784 ; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm0
785 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
786 ; AVX512VLBW-NEXT: retq
787 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
791 define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
792 ; AVX512F-LABEL: splatconstant_funnnel_v64i8:
794 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2
795 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
796 ; AVX512F-NEXT: vpandn %ymm2, %ymm3, %ymm2
797 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
798 ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
799 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
800 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2
801 ; AVX512F-NEXT: vpandn %ymm2, %ymm3, %ymm2
802 ; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1
803 ; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
804 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
807 ; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
809 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
810 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
811 ; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
812 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
813 ; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
814 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
815 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
816 ; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
817 ; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
818 ; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
819 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
820 ; AVX512VL-NEXT: retq
822 ; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
824 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
825 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
826 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
827 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
828 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
829 ; AVX512BW-NEXT: retq
831 ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
832 ; AVX512VLBW: # %bb.0:
833 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1
834 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
835 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
836 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
837 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
838 ; AVX512VLBW-NEXT: retq
839 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)