1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
7 declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
8 declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
9 declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
10 declare <64 x i8> @llvm.fshr.v64i8(<64 x i8>, <64 x i8>, <64 x i8>)
16 define <8 x i64> @var_funnnel_v8i64(<8 x i64> %x, <8 x i64> %amt) nounwind {
17 ; AVX512-LABEL: var_funnnel_v8i64:
19 ; AVX512-NEXT: vprorvq %zmm1, %zmm0, %zmm0
21 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %x, <8 x i64> %amt)
25 define <16 x i32> @var_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounwind {
26 ; AVX512-LABEL: var_funnnel_v16i32:
28 ; AVX512-NEXT: vprorvd %zmm1, %zmm0, %zmm0
30 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %x, <16 x i32> %amt)
34 define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
35 ; AVX512F-LABEL: var_funnnel_v32i16:
37 ; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
38 ; AVX512F-NEXT: vpsubw %ymm2, %ymm4, %ymm2
39 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
40 ; AVX512F-NEXT: vpand %ymm5, %ymm2, %ymm2
41 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm6 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
42 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
43 ; AVX512F-NEXT: vpsllvd %zmm6, %zmm0, %zmm6
44 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
45 ; AVX512F-NEXT: vpsubw %ymm2, %ymm7, %ymm2
46 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
47 ; AVX512F-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0
48 ; AVX512F-NEXT: vpord %zmm0, %zmm6, %zmm0
49 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
50 ; AVX512F-NEXT: vpsubw %ymm3, %ymm4, %ymm2
51 ; AVX512F-NEXT: vpand %ymm5, %ymm2, %ymm2
52 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
53 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
54 ; AVX512F-NEXT: vpsllvd %zmm3, %zmm1, %zmm3
55 ; AVX512F-NEXT: vpsubw %ymm2, %ymm7, %ymm2
56 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
57 ; AVX512F-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1
58 ; AVX512F-NEXT: vpord %zmm1, %zmm3, %zmm1
59 ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
62 ; AVX512VL-LABEL: var_funnnel_v32i16:
64 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
65 ; AVX512VL-NEXT: vpsubw %ymm2, %ymm4, %ymm2
66 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
67 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
68 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm6 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
69 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
70 ; AVX512VL-NEXT: vpsllvd %zmm6, %zmm0, %zmm6
71 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
72 ; AVX512VL-NEXT: vpsubw %ymm2, %ymm7, %ymm2
73 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
74 ; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0
75 ; AVX512VL-NEXT: vpord %zmm0, %zmm6, %zmm0
76 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
77 ; AVX512VL-NEXT: vpsubw %ymm3, %ymm4, %ymm2
78 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
79 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
80 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
81 ; AVX512VL-NEXT: vpsllvd %zmm3, %zmm1, %zmm3
82 ; AVX512VL-NEXT: vpsubw %ymm2, %ymm7, %ymm2
83 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
84 ; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1
85 ; AVX512VL-NEXT: vpord %zmm1, %zmm3, %zmm1
86 ; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1
89 ; AVX512BW-LABEL: var_funnnel_v32i16:
91 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
92 ; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3
93 ; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
94 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
95 ; AVX512BW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
96 ; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
97 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
98 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
101 ; AVX512VLBW-LABEL: var_funnnel_v32i16:
102 ; AVX512VLBW: # %bb.0:
103 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
104 ; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3
105 ; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
106 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
107 ; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
108 ; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
109 ; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
110 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
111 ; AVX512VLBW-NEXT: retq
112 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %amt)
116 define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
117 ; AVX512F-LABEL: var_funnnel_v64i8:
119 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4
120 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
121 ; AVX512F-NEXT: vpandn %ymm4, %ymm5, %ymm4
122 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm6
123 ; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm6
124 ; AVX512F-NEXT: vpor %ymm4, %ymm6, %ymm4
125 ; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
126 ; AVX512F-NEXT: vpsubb %ymm2, %ymm6, %ymm2
127 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
128 ; AVX512F-NEXT: vpand %ymm7, %ymm2, %ymm2
129 ; AVX512F-NEXT: vpsllw $5, %ymm2, %ymm2
130 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
131 ; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm4
132 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
133 ; AVX512F-NEXT: vpandn %ymm4, %ymm8, %ymm4
134 ; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm9
135 ; AVX512F-NEXT: vpand %ymm8, %ymm9, %ymm9
136 ; AVX512F-NEXT: vpor %ymm4, %ymm9, %ymm4
137 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
138 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
139 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm4
140 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
141 ; AVX512F-NEXT: vpand %ymm9, %ymm4, %ymm4
142 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm10
143 ; AVX512F-NEXT: vpor %ymm4, %ymm10, %ymm4
144 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
145 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
146 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2
147 ; AVX512F-NEXT: vpandn %ymm2, %ymm5, %ymm2
148 ; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm4
149 ; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
150 ; AVX512F-NEXT: vpor %ymm2, %ymm4, %ymm2
151 ; AVX512F-NEXT: vpsubb %ymm3, %ymm6, %ymm3
152 ; AVX512F-NEXT: vpand %ymm7, %ymm3, %ymm3
153 ; AVX512F-NEXT: vpsllw $5, %ymm3, %ymm3
154 ; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
155 ; AVX512F-NEXT: vpsrlw $6, %ymm1, %ymm2
156 ; AVX512F-NEXT: vpandn %ymm2, %ymm8, %ymm2
157 ; AVX512F-NEXT: vpsllw $2, %ymm1, %ymm4
158 ; AVX512F-NEXT: vpand %ymm8, %ymm4, %ymm4
159 ; AVX512F-NEXT: vpor %ymm2, %ymm4, %ymm2
160 ; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm3
161 ; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
162 ; AVX512F-NEXT: vpsrlw $7, %ymm1, %ymm2
163 ; AVX512F-NEXT: vpand %ymm9, %ymm2, %ymm2
164 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm4
165 ; AVX512F-NEXT: vpor %ymm2, %ymm4, %ymm2
166 ; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm3
167 ; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
170 ; AVX512VL-LABEL: var_funnnel_v64i8:
172 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4
173 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
174 ; AVX512VL-NEXT: vpandn %ymm4, %ymm5, %ymm4
175 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm6
176 ; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6
177 ; AVX512VL-NEXT: vpor %ymm4, %ymm6, %ymm4
178 ; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
179 ; AVX512VL-NEXT: vpsubb %ymm2, %ymm6, %ymm2
180 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
181 ; AVX512VL-NEXT: vpand %ymm7, %ymm2, %ymm2
182 ; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2
183 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
184 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm4
185 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
186 ; AVX512VL-NEXT: vpandn %ymm4, %ymm8, %ymm4
187 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm9
188 ; AVX512VL-NEXT: vpand %ymm8, %ymm9, %ymm9
189 ; AVX512VL-NEXT: vpor %ymm4, %ymm9, %ymm4
190 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
191 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
192 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm4
193 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
194 ; AVX512VL-NEXT: vpand %ymm9, %ymm4, %ymm4
195 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm10
196 ; AVX512VL-NEXT: vpor %ymm4, %ymm10, %ymm4
197 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
198 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
199 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
200 ; AVX512VL-NEXT: vpandn %ymm2, %ymm5, %ymm2
201 ; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm4
202 ; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
203 ; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2
204 ; AVX512VL-NEXT: vpsubb %ymm3, %ymm6, %ymm3
205 ; AVX512VL-NEXT: vpand %ymm7, %ymm3, %ymm3
206 ; AVX512VL-NEXT: vpsllw $5, %ymm3, %ymm3
207 ; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
208 ; AVX512VL-NEXT: vpsrlw $6, %ymm1, %ymm2
209 ; AVX512VL-NEXT: vpandn %ymm2, %ymm8, %ymm2
210 ; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm4
211 ; AVX512VL-NEXT: vpand %ymm8, %ymm4, %ymm4
212 ; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2
213 ; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm3
214 ; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
215 ; AVX512VL-NEXT: vpsrlw $7, %ymm1, %ymm2
216 ; AVX512VL-NEXT: vpand %ymm9, %ymm2, %ymm2
217 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm4
218 ; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2
219 ; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm3
220 ; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
221 ; AVX512VL-NEXT: retq
223 ; AVX512BW-LABEL: var_funnnel_v64i8:
225 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
226 ; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3
227 ; AVX512BW-NEXT: vpsllw $5, %zmm3, %zmm3
228 ; AVX512BW-NEXT: vpaddb %zmm3, %zmm3, %zmm4
229 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
230 ; AVX512BW-NEXT: vpmovb2m %zmm3, %k2
231 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm3
232 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
233 ; AVX512BW-NEXT: vpblendmb %zmm3, %zmm0, %zmm3 {%k2}
234 ; AVX512BW-NEXT: vpsrlw $2, %zmm3, %zmm5
235 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
236 ; AVX512BW-NEXT: vmovdqu8 %zmm5, %zmm3 {%k1}
237 ; AVX512BW-NEXT: vpsrlw $1, %zmm3, %zmm5
238 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
239 ; AVX512BW-NEXT: vpaddb %zmm4, %zmm4, %zmm4
240 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
241 ; AVX512BW-NEXT: vmovdqu8 %zmm5, %zmm3 {%k1}
242 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
243 ; AVX512BW-NEXT: vpsubb %zmm1, %zmm4, %zmm1
244 ; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
245 ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
246 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm2
247 ; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
248 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k2
249 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
250 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
251 ; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k2}
252 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm1
253 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
254 ; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
255 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm1
256 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
257 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
258 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
259 ; AVX512BW-NEXT: retq
261 ; AVX512VLBW-LABEL: var_funnnel_v64i8:
262 ; AVX512VLBW: # %bb.0:
263 ; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
264 ; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3
265 ; AVX512VLBW-NEXT: vpsllw $5, %zmm3, %zmm3
266 ; AVX512VLBW-NEXT: vpaddb %zmm3, %zmm3, %zmm4
267 ; AVX512VLBW-NEXT: vpmovb2m %zmm4, %k1
268 ; AVX512VLBW-NEXT: vpmovb2m %zmm3, %k2
269 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm3
270 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
271 ; AVX512VLBW-NEXT: vpblendmb %zmm3, %zmm0, %zmm3 {%k2}
272 ; AVX512VLBW-NEXT: vpsrlw $2, %zmm3, %zmm5
273 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
274 ; AVX512VLBW-NEXT: vmovdqu8 %zmm5, %zmm3 {%k1}
275 ; AVX512VLBW-NEXT: vpsrlw $1, %zmm3, %zmm5
276 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
277 ; AVX512VLBW-NEXT: vpaddb %zmm4, %zmm4, %zmm4
278 ; AVX512VLBW-NEXT: vpmovb2m %zmm4, %k1
279 ; AVX512VLBW-NEXT: vmovdqu8 %zmm5, %zmm3 {%k1}
280 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
281 ; AVX512VLBW-NEXT: vpsubb %zmm1, %zmm4, %zmm1
282 ; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
283 ; AVX512VLBW-NEXT: vpsllw $5, %zmm1, %zmm1
284 ; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm2
285 ; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
286 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k2
287 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
288 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
289 ; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k2}
290 ; AVX512VLBW-NEXT: vpsllw $2, %zmm0, %zmm1
291 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
292 ; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
293 ; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm1
294 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
295 ; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
296 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
297 ; AVX512VLBW-NEXT: retq
298 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> %amt)
303 ; Uniform Variable Shifts
306 define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %amt) nounwind {
307 ; AVX512-LABEL: splatvar_funnnel_v8i64:
309 ; AVX512-NEXT: vpbroadcastq %xmm1, %zmm1
310 ; AVX512-NEXT: vprorvq %zmm1, %zmm0, %zmm0
312 %splat = shufflevector <8 x i64> %amt, <8 x i64> undef, <8 x i32> zeroinitializer
313 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %x, <8 x i64> %splat)
317 define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounwind {
318 ; AVX512-LABEL: splatvar_funnnel_v16i32:
320 ; AVX512-NEXT: vpbroadcastd %xmm1, %zmm1
321 ; AVX512-NEXT: vprorvd %zmm1, %zmm0, %zmm0
323 %splat = shufflevector <16 x i32> %amt, <16 x i32> undef, <16 x i32> zeroinitializer
324 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %x, <16 x i32> %splat)
328 define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
329 ; AVX512F-LABEL: splatvar_funnnel_v32i16:
331 ; AVX512F-NEXT: vpbroadcastw %xmm2, %xmm2
332 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
333 ; AVX512F-NEXT: vpsubw %xmm2, %xmm3, %xmm2
334 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
335 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
336 ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4
337 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
338 ; AVX512F-NEXT: vpsubw %xmm2, %xmm5, %xmm2
339 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
340 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
341 ; AVX512F-NEXT: vpor %ymm0, %ymm4, %ymm0
342 ; AVX512F-NEXT: vpsllw %xmm3, %ymm1, %ymm3
343 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
344 ; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
347 ; AVX512VL-LABEL: splatvar_funnnel_v32i16:
349 ; AVX512VL-NEXT: vpbroadcastw %xmm2, %xmm2
350 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
351 ; AVX512VL-NEXT: vpsubw %xmm2, %xmm3, %xmm2
352 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
353 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
354 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4
355 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
356 ; AVX512VL-NEXT: vpsubw %xmm2, %xmm5, %xmm2
357 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
358 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
359 ; AVX512VL-NEXT: vpor %ymm0, %ymm4, %ymm0
360 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm1, %ymm3
361 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
362 ; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
363 ; AVX512VL-NEXT: retq
365 ; AVX512BW-LABEL: splatvar_funnnel_v32i16:
367 ; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
368 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
369 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
370 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
371 ; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
372 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
373 ; AVX512BW-NEXT: vpsubw %xmm1, %xmm4, %xmm1
374 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
375 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
376 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
377 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
378 ; AVX512BW-NEXT: retq
380 ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
381 ; AVX512VLBW: # %bb.0:
382 ; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
383 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
384 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
385 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
386 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
387 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
388 ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm4, %xmm1
389 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
390 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
391 ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
392 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
393 ; AVX512VLBW-NEXT: retq
394 %splat = shufflevector <32 x i16> %amt, <32 x i16> undef, <32 x i32> zeroinitializer
395 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %splat)
399 define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
400 ; AVX512F-LABEL: splatvar_funnnel_v64i8:
402 ; AVX512F-NEXT: vpbroadcastb %xmm2, %xmm2
403 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
404 ; AVX512F-NEXT: vpsubb %xmm2, %xmm3, %xmm2
405 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
406 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
407 ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4
408 ; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
409 ; AVX512F-NEXT: vpsllw %xmm3, %xmm5, %xmm6
410 ; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6
411 ; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
412 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
413 ; AVX512F-NEXT: vpsubb %xmm2, %xmm7, %xmm2
414 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
415 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
416 ; AVX512F-NEXT: vpsrlw %xmm2, %xmm5, %xmm5
417 ; AVX512F-NEXT: vpsrlw $8, %xmm5, %xmm5
418 ; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5
419 ; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
420 ; AVX512F-NEXT: vpor %ymm0, %ymm4, %ymm0
421 ; AVX512F-NEXT: vpsllw %xmm3, %ymm1, %ymm3
422 ; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
423 ; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
424 ; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
425 ; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
428 ; AVX512VL-LABEL: splatvar_funnnel_v64i8:
430 ; AVX512VL-NEXT: vpbroadcastb %xmm2, %xmm2
431 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
432 ; AVX512VL-NEXT: vpsubb %xmm2, %xmm3, %xmm2
433 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
434 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
435 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4
436 ; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
437 ; AVX512VL-NEXT: vpsllw %xmm3, %xmm5, %xmm6
438 ; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6
439 ; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
440 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
441 ; AVX512VL-NEXT: vpsubb %xmm2, %xmm7, %xmm2
442 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
443 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
444 ; AVX512VL-NEXT: vpsrlw %xmm2, %xmm5, %xmm5
445 ; AVX512VL-NEXT: vpsrlw $8, %xmm5, %xmm5
446 ; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5
447 ; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0
448 ; AVX512VL-NEXT: vpor %ymm0, %ymm4, %ymm0
449 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm1, %ymm3
450 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
451 ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
452 ; AVX512VL-NEXT: vpand %ymm5, %ymm1, %ymm1
453 ; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
454 ; AVX512VL-NEXT: retq
456 ; AVX512BW-LABEL: splatvar_funnnel_v64i8:
458 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
459 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
460 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
461 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
462 ; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
463 ; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
464 ; AVX512BW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3
465 ; AVX512BW-NEXT: vpsrlw $8, %xmm3, %xmm3
466 ; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3
467 ; AVX512BW-NEXT: vpandq %zmm3, %zmm4, %zmm3
468 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
469 ; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
470 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
471 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
472 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
473 ; AVX512BW-NEXT: vpsllw %xmm1, %xmm5, %xmm1
474 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
475 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
476 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
477 ; AVX512BW-NEXT: retq
479 ; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
480 ; AVX512VLBW: # %bb.0:
481 ; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1
482 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
483 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
484 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
485 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
486 ; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
487 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3
488 ; AVX512VLBW-NEXT: vpsrlw $8, %xmm3, %xmm3
489 ; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3
490 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm4, %zmm3
491 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
492 ; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
493 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
494 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
495 ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
496 ; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm5, %xmm1
497 ; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1
498 ; AVX512VLBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
499 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
500 ; AVX512VLBW-NEXT: retq
501 %splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer
502 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> %splat)
510 define <8 x i64> @constant_funnnel_v8i64(<8 x i64> %x) nounwind {
511 ; AVX512-LABEL: constant_funnnel_v8i64:
513 ; AVX512-NEXT: vprorvq {{.*}}(%rip), %zmm0, %zmm0
515 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %x, <8 x i64> <i64 4, i64 14, i64 50, i64 60, i64 4, i64 14, i64 50, i64 60>)
519 define <16 x i32> @constant_funnnel_v16i32(<16 x i32> %x) nounwind {
520 ; AVX512-LABEL: constant_funnnel_v16i32:
522 ; AVX512-NEXT: vprorvd {{.*}}(%rip), %zmm0, %zmm0
524 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %x, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>)
528 define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
529 ; AVX512F-LABEL: constant_funnnel_v32i16:
531 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
532 ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm3
533 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm4 = ymm0[0],ymm3[1,2,3,4,5,6,7],ymm0[8],ymm3[9,10,11,12,13,14,15]
534 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
535 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
536 ; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm0
537 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
538 ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm2
539 ; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm1[0],ymm2[1,2,3,4,5,6,7],ymm1[8],ymm2[9,10,11,12,13,14,15]
540 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
541 ; AVX512F-NEXT: vpmullw %ymm4, %ymm1, %ymm1
542 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
545 ; AVX512VL-LABEL: constant_funnnel_v32i16:
547 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
548 ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm3
549 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm4 = ymm0[0],ymm3[1,2,3,4,5,6,7],ymm0[8],ymm3[9,10,11,12,13,14,15]
550 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
551 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
552 ; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm0
553 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
554 ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm2
555 ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm1[0],ymm2[1,2,3,4,5,6,7],ymm1[8],ymm2[9,10,11,12,13,14,15]
556 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
557 ; AVX512VL-NEXT: vpmullw %ymm4, %ymm1, %ymm1
558 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
559 ; AVX512VL-NEXT: retq
561 ; AVX512BW-LABEL: constant_funnnel_v32i16:
563 ; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
564 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
565 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
566 ; AVX512BW-NEXT: retq
568 ; AVX512VLBW-LABEL: constant_funnnel_v32i16:
569 ; AVX512VLBW: # %bb.0:
570 ; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
571 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
572 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
573 ; AVX512VLBW-NEXT: retq
574 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
578 define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
579 ; AVX512F-LABEL: constant_funnnel_v64i8:
581 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
582 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
583 ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
584 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
585 ; AVX512F-NEXT: # ymm4 = mem[0,1,0,1]
586 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm2
587 ; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm5
588 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
589 ; AVX512F-NEXT: vpand %ymm6, %ymm5, %ymm5
590 ; AVX512F-NEXT: vpaddb %ymm4, %ymm4, %ymm7
591 ; AVX512F-NEXT: vpblendvb %ymm7, %ymm5, %ymm2, %ymm2
592 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm5
593 ; AVX512F-NEXT: vpaddb %ymm7, %ymm7, %ymm8
594 ; AVX512F-NEXT: vpblendvb %ymm8, %ymm5, %ymm2, %ymm2
595 ; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5
596 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm9 = ymm0[8],ymm5[8],ymm0[9],ymm5[9],ymm0[10],ymm5[10],ymm0[11],ymm5[11],ymm0[12],ymm5[12],ymm0[13],ymm5[13],ymm0[14],ymm5[14],ymm0[15],ymm5[15],ymm0[24],ymm5[24],ymm0[25],ymm5[25],ymm0[26],ymm5[26],ymm0[27],ymm5[27],ymm0[28],ymm5[28],ymm0[29],ymm5[29],ymm0[30],ymm5[30],ymm0[31],ymm5[31]
597 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm10 = [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128]
598 ; AVX512F-NEXT: # ymm10 = mem[0,1,0,1]
599 ; AVX512F-NEXT: vpmullw %ymm10, %ymm9, %ymm9
600 ; AVX512F-NEXT: vpsrlw $8, %ymm9, %ymm9
601 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[4],ymm5[4],ymm0[5],ymm5[5],ymm0[6],ymm5[6],ymm0[7],ymm5[7],ymm0[16],ymm5[16],ymm0[17],ymm5[17],ymm0[18],ymm5[18],ymm0[19],ymm5[19],ymm0[20],ymm5[20],ymm0[21],ymm5[21],ymm0[22],ymm5[22],ymm0[23],ymm5[23]
602 ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm11 = [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2]
603 ; AVX512F-NEXT: # ymm11 = mem[0,1,0,1]
604 ; AVX512F-NEXT: vpmullw %ymm11, %ymm0, %ymm0
605 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
606 ; AVX512F-NEXT: vpackuswb %ymm9, %ymm0, %ymm0
607 ; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
608 ; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
609 ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
610 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm2
611 ; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm3
612 ; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
613 ; AVX512F-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
614 ; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm3
615 ; AVX512F-NEXT: vpblendvb %ymm8, %ymm3, %ymm2, %ymm2
616 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm5[8],ymm1[9],ymm5[9],ymm1[10],ymm5[10],ymm1[11],ymm5[11],ymm1[12],ymm5[12],ymm1[13],ymm5[13],ymm1[14],ymm5[14],ymm1[15],ymm5[15],ymm1[24],ymm5[24],ymm1[25],ymm5[25],ymm1[26],ymm5[26],ymm1[27],ymm5[27],ymm1[28],ymm5[28],ymm1[29],ymm5[29],ymm1[30],ymm5[30],ymm1[31],ymm5[31]
617 ; AVX512F-NEXT: vpmullw %ymm10, %ymm3, %ymm3
618 ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
619 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm5[0],ymm1[1],ymm5[1],ymm1[2],ymm5[2],ymm1[3],ymm5[3],ymm1[4],ymm5[4],ymm1[5],ymm5[5],ymm1[6],ymm5[6],ymm1[7],ymm5[7],ymm1[16],ymm5[16],ymm1[17],ymm5[17],ymm1[18],ymm5[18],ymm1[19],ymm5[19],ymm1[20],ymm5[20],ymm1[21],ymm5[21],ymm1[22],ymm5[22],ymm1[23],ymm5[23]
620 ; AVX512F-NEXT: vpmullw %ymm11, %ymm1, %ymm1
621 ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
622 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm1, %ymm1
623 ; AVX512F-NEXT: vpor %ymm1, %ymm2, %ymm1
626 ; AVX512VL-LABEL: constant_funnnel_v64i8:
628 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
629 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
630 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
631 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
632 ; AVX512VL-NEXT: # ymm4 = mem[0,1,0,1]
633 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm2
634 ; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm5
635 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
636 ; AVX512VL-NEXT: vpand %ymm6, %ymm5, %ymm5
637 ; AVX512VL-NEXT: vpaddb %ymm4, %ymm4, %ymm7
638 ; AVX512VL-NEXT: vpblendvb %ymm7, %ymm5, %ymm2, %ymm2
639 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm5
640 ; AVX512VL-NEXT: vpaddb %ymm7, %ymm7, %ymm8
641 ; AVX512VL-NEXT: vpblendvb %ymm8, %ymm5, %ymm2, %ymm2
642 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
643 ; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
644 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm9 = [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128]
645 ; AVX512VL-NEXT: # ymm9 = mem[0,1,0,1]
646 ; AVX512VL-NEXT: vpmullw %ymm9, %ymm5, %ymm5
647 ; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
648 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
649 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
650 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm10 = [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2]
651 ; AVX512VL-NEXT: # ymm10 = mem[0,1,0,1]
652 ; AVX512VL-NEXT: vpmullw %ymm10, %ymm0, %ymm0
653 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
654 ; AVX512VL-NEXT: vpackuswb %ymm5, %ymm0, %ymm0
655 ; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
656 ; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
657 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
658 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm2
659 ; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm3
660 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
661 ; AVX512VL-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
662 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm3
663 ; AVX512VL-NEXT: vpblendvb %ymm8, %ymm3, %ymm2, %ymm2
664 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
665 ; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
666 ; AVX512VL-NEXT: vpmullw %ymm9, %ymm3, %ymm3
667 ; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
668 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
669 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
670 ; AVX512VL-NEXT: vpmullw %ymm10, %ymm1, %ymm1
671 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
672 ; AVX512VL-NEXT: vpackuswb %ymm3, %ymm1, %ymm1
673 ; AVX512VL-NEXT: vpor %ymm1, %ymm2, %ymm1
674 ; AVX512VL-NEXT: retq
676 ; AVX512BW-LABEL: constant_funnnel_v64i8:
678 ; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
679 ; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
680 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
681 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
682 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
683 ; AVX512BW-NEXT: vpblendmb %zmm2, %zmm0, %zmm2 {%k1}
684 ; AVX512BW-NEXT: vpsllw $2, %zmm2, %zmm3
685 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
686 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
687 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
688 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
689 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
690 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
691 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2 {%k1}
692 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
693 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
694 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm1, %zmm1
695 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
696 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
697 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
698 ; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
699 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
700 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
701 ; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
702 ; AVX512BW-NEXT: retq
704 ; AVX512VLBW-LABEL: constant_funnnel_v64i8:
705 ; AVX512VLBW: # %bb.0:
706 ; AVX512VLBW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
707 ; AVX512VLBW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
708 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
709 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
710 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
711 ; AVX512VLBW-NEXT: vpblendmb %zmm2, %zmm0, %zmm2 {%k1}
712 ; AVX512VLBW-NEXT: vpsllw $2, %zmm2, %zmm3
713 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
714 ; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
715 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
716 ; AVX512VLBW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
717 ; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
718 ; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
719 ; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm2 {%k1}
720 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
721 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
722 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm1, %zmm1
723 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
724 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
725 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm0, %zmm0
726 ; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
727 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm0, %zmm0
728 ; AVX512VLBW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
729 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
730 ; AVX512VLBW-NEXT: retq
731 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
736 ; Uniform Constant Shifts
739 define <8 x i64> @splatconstant_funnnel_v8i64(<8 x i64> %x) nounwind {
740 ; AVX512-LABEL: splatconstant_funnnel_v8i64:
742 ; AVX512-NEXT: vprorq $14, %zmm0, %zmm0
744 %res = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x, <8 x i64> %x, <8 x i64> <i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14>)
748 define <16 x i32> @splatconstant_funnnel_v16i32(<16 x i32> %x) nounwind {
749 ; AVX512-LABEL: splatconstant_funnnel_v16i32:
751 ; AVX512-NEXT: vprord $4, %zmm0, %zmm0
753 %res = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x, <16 x i32> %x, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>)
757 define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
758 ; AVX512F-LABEL: splatconstant_funnnel_v32i16:
760 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2
761 ; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm0
762 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
763 ; AVX512F-NEXT: vpsrlw $7, %ymm1, %ymm2
764 ; AVX512F-NEXT: vpsllw $9, %ymm1, %ymm1
765 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
768 ; AVX512VL-LABEL: splatconstant_funnnel_v32i16:
770 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2
771 ; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm0
772 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
773 ; AVX512VL-NEXT: vpsrlw $7, %ymm1, %ymm2
774 ; AVX512VL-NEXT: vpsllw $9, %ymm1, %ymm1
775 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
776 ; AVX512VL-NEXT: retq
778 ; AVX512BW-LABEL: splatconstant_funnnel_v32i16:
780 ; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm1
781 ; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm0
782 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
783 ; AVX512BW-NEXT: retq
785 ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16:
786 ; AVX512VLBW: # %bb.0:
787 ; AVX512VLBW-NEXT: vpsrlw $7, %zmm0, %zmm1
788 ; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm0
789 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
790 ; AVX512VLBW-NEXT: retq
791 %res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
795 define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
796 ; AVX512F-LABEL: splatconstant_funnnel_v64i8:
798 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2
799 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
800 ; AVX512F-NEXT: vpandn %ymm2, %ymm3, %ymm2
801 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
802 ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
803 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
804 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2
805 ; AVX512F-NEXT: vpandn %ymm2, %ymm3, %ymm2
806 ; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1
807 ; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
808 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
811 ; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
813 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
814 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
815 ; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
816 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
817 ; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
818 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
819 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
820 ; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
821 ; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
822 ; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
823 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
824 ; AVX512VL-NEXT: retq
826 ; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
828 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
829 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
830 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
831 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
832 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
833 ; AVX512BW-NEXT: retq
835 ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
836 ; AVX512VLBW: # %bb.0:
837 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1
838 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
839 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
840 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
841 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
842 ; AVX512VLBW-NEXT: retq
843 %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)