1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,NOBW,NOVBMI,AVX512F
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,NOVBMI,AVX512BW
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512BW,VBMI
6 define <8 x i64> @var_shuffle_v8i64(<8 x i64> %v, <8 x i64> %indices) nounwind {
7 ; AVX512-LABEL: var_shuffle_v8i64:
9 ; AVX512-NEXT: vpermpd %zmm0, %zmm1, %zmm0
11 %index0 = extractelement <8 x i64> %indices, i32 0
12 %index1 = extractelement <8 x i64> %indices, i32 1
13 %index2 = extractelement <8 x i64> %indices, i32 2
14 %index3 = extractelement <8 x i64> %indices, i32 3
15 %index4 = extractelement <8 x i64> %indices, i32 4
16 %index5 = extractelement <8 x i64> %indices, i32 5
17 %index6 = extractelement <8 x i64> %indices, i32 6
18 %index7 = extractelement <8 x i64> %indices, i32 7
19 %v0 = extractelement <8 x i64> %v, i64 %index0
20 %v1 = extractelement <8 x i64> %v, i64 %index1
21 %v2 = extractelement <8 x i64> %v, i64 %index2
22 %v3 = extractelement <8 x i64> %v, i64 %index3
23 %v4 = extractelement <8 x i64> %v, i64 %index4
24 %v5 = extractelement <8 x i64> %v, i64 %index5
25 %v6 = extractelement <8 x i64> %v, i64 %index6
26 %v7 = extractelement <8 x i64> %v, i64 %index7
27 %ret0 = insertelement <8 x i64> undef, i64 %v0, i32 0
28 %ret1 = insertelement <8 x i64> %ret0, i64 %v1, i32 1
29 %ret2 = insertelement <8 x i64> %ret1, i64 %v2, i32 2
30 %ret3 = insertelement <8 x i64> %ret2, i64 %v3, i32 3
31 %ret4 = insertelement <8 x i64> %ret3, i64 %v4, i32 4
32 %ret5 = insertelement <8 x i64> %ret4, i64 %v5, i32 5
33 %ret6 = insertelement <8 x i64> %ret5, i64 %v6, i32 6
34 %ret7 = insertelement <8 x i64> %ret6, i64 %v7, i32 7
38 define <16 x i32> @var_shuffle_v16i32(<16 x i32> %v, <16 x i32> %indices) nounwind {
39 ; AVX512-LABEL: var_shuffle_v16i32:
41 ; AVX512-NEXT: vpermps %zmm0, %zmm1, %zmm0
43 %index0 = extractelement <16 x i32> %indices, i32 0
44 %index1 = extractelement <16 x i32> %indices, i32 1
45 %index2 = extractelement <16 x i32> %indices, i32 2
46 %index3 = extractelement <16 x i32> %indices, i32 3
47 %index4 = extractelement <16 x i32> %indices, i32 4
48 %index5 = extractelement <16 x i32> %indices, i32 5
49 %index6 = extractelement <16 x i32> %indices, i32 6
50 %index7 = extractelement <16 x i32> %indices, i32 7
51 %index8 = extractelement <16 x i32> %indices, i32 8
52 %index9 = extractelement <16 x i32> %indices, i32 9
53 %index10 = extractelement <16 x i32> %indices, i32 10
54 %index11 = extractelement <16 x i32> %indices, i32 11
55 %index12 = extractelement <16 x i32> %indices, i32 12
56 %index13 = extractelement <16 x i32> %indices, i32 13
57 %index14 = extractelement <16 x i32> %indices, i32 14
58 %index15 = extractelement <16 x i32> %indices, i32 15
59 %v0 = extractelement <16 x i32> %v, i32 %index0
60 %v1 = extractelement <16 x i32> %v, i32 %index1
61 %v2 = extractelement <16 x i32> %v, i32 %index2
62 %v3 = extractelement <16 x i32> %v, i32 %index3
63 %v4 = extractelement <16 x i32> %v, i32 %index4
64 %v5 = extractelement <16 x i32> %v, i32 %index5
65 %v6 = extractelement <16 x i32> %v, i32 %index6
66 %v7 = extractelement <16 x i32> %v, i32 %index7
67 %v8 = extractelement <16 x i32> %v, i32 %index8
68 %v9 = extractelement <16 x i32> %v, i32 %index9
69 %v10 = extractelement <16 x i32> %v, i32 %index10
70 %v11 = extractelement <16 x i32> %v, i32 %index11
71 %v12 = extractelement <16 x i32> %v, i32 %index12
72 %v13 = extractelement <16 x i32> %v, i32 %index13
73 %v14 = extractelement <16 x i32> %v, i32 %index14
74 %v15 = extractelement <16 x i32> %v, i32 %index15
75 %ret0 = insertelement <16 x i32> undef, i32 %v0, i32 0
76 %ret1 = insertelement <16 x i32> %ret0, i32 %v1, i32 1
77 %ret2 = insertelement <16 x i32> %ret1, i32 %v2, i32 2
78 %ret3 = insertelement <16 x i32> %ret2, i32 %v3, i32 3
79 %ret4 = insertelement <16 x i32> %ret3, i32 %v4, i32 4
80 %ret5 = insertelement <16 x i32> %ret4, i32 %v5, i32 5
81 %ret6 = insertelement <16 x i32> %ret5, i32 %v6, i32 6
82 %ret7 = insertelement <16 x i32> %ret6, i32 %v7, i32 7
83 %ret8 = insertelement <16 x i32> %ret7, i32 %v8, i32 8
84 %ret9 = insertelement <16 x i32> %ret8, i32 %v9, i32 9
85 %ret10 = insertelement <16 x i32> %ret9, i32 %v10, i32 10
86 %ret11 = insertelement <16 x i32> %ret10, i32 %v11, i32 11
87 %ret12 = insertelement <16 x i32> %ret11, i32 %v12, i32 12
88 %ret13 = insertelement <16 x i32> %ret12, i32 %v13, i32 13
89 %ret14 = insertelement <16 x i32> %ret13, i32 %v14, i32 14
90 %ret15 = insertelement <16 x i32> %ret14, i32 %v15, i32 15
94 define <32 x i16> @var_shuffle_v32i16(<32 x i16> %v, <32 x i16> %indices) nounwind {
95 ; NOBW-LABEL: var_shuffle_v32i16:
97 ; NOBW-NEXT: pushq %rbp
98 ; NOBW-NEXT: movq %rsp, %rbp
99 ; NOBW-NEXT: andq $-64, %rsp
100 ; NOBW-NEXT: subq $2112, %rsp # imm = 0x840
101 ; NOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2
102 ; NOBW-NEXT: vextracti128 $1, %ymm1, %xmm3
103 ; NOBW-NEXT: vextracti128 $1, %ymm2, %xmm4
104 ; NOBW-NEXT: vmovd %xmm4, %eax
105 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
106 ; NOBW-NEXT: vmovaps %ymm0, (%rsp)
107 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
108 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
109 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
110 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
111 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
112 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
113 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
114 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
115 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
116 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
117 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
118 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
119 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
120 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
121 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
122 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
123 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
124 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
125 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
126 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
127 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
128 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
129 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
130 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
131 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
132 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
133 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
134 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
135 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
136 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
137 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
138 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
139 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
140 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
141 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
142 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
143 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
144 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
145 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
146 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
147 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
148 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
149 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
150 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
151 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
152 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
153 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
154 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
155 ; NOBW-NEXT: andl $31, %eax
156 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
157 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
158 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
159 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
160 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
161 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
162 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
163 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
164 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
165 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
166 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
167 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
168 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
169 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
170 ; NOBW-NEXT: movzwl 1536(%rsp,%rax,2), %eax
171 ; NOBW-NEXT: vmovd %eax, %xmm0
172 ; NOBW-NEXT: vpextrw $1, %xmm4, %eax
173 ; NOBW-NEXT: andl $31, %eax
174 ; NOBW-NEXT: vpinsrw $1, 1600(%rsp,%rax,2), %xmm0, %xmm0
175 ; NOBW-NEXT: vpextrw $2, %xmm4, %eax
176 ; NOBW-NEXT: andl $31, %eax
177 ; NOBW-NEXT: vpinsrw $2, 1664(%rsp,%rax,2), %xmm0, %xmm0
178 ; NOBW-NEXT: vpextrw $3, %xmm4, %eax
179 ; NOBW-NEXT: andl $31, %eax
180 ; NOBW-NEXT: vpinsrw $3, 1728(%rsp,%rax,2), %xmm0, %xmm0
181 ; NOBW-NEXT: vpextrw $4, %xmm4, %eax
182 ; NOBW-NEXT: andl $31, %eax
183 ; NOBW-NEXT: vpinsrw $4, 1792(%rsp,%rax,2), %xmm0, %xmm0
184 ; NOBW-NEXT: vpextrw $5, %xmm4, %eax
185 ; NOBW-NEXT: andl $31, %eax
186 ; NOBW-NEXT: vpinsrw $5, 1856(%rsp,%rax,2), %xmm0, %xmm0
187 ; NOBW-NEXT: vpextrw $6, %xmm4, %eax
188 ; NOBW-NEXT: andl $31, %eax
189 ; NOBW-NEXT: vpinsrw $6, 1920(%rsp,%rax,2), %xmm0, %xmm0
190 ; NOBW-NEXT: vpextrw $7, %xmm4, %eax
191 ; NOBW-NEXT: andl $31, %eax
192 ; NOBW-NEXT: vpinsrw $7, 1984(%rsp,%rax,2), %xmm0, %xmm0
193 ; NOBW-NEXT: vmovd %xmm2, %eax
194 ; NOBW-NEXT: andl $31, %eax
195 ; NOBW-NEXT: movzwl 1024(%rsp,%rax,2), %eax
196 ; NOBW-NEXT: vmovd %eax, %xmm4
197 ; NOBW-NEXT: vpextrw $1, %xmm2, %eax
198 ; NOBW-NEXT: andl $31, %eax
199 ; NOBW-NEXT: vpinsrw $1, 1088(%rsp,%rax,2), %xmm4, %xmm4
200 ; NOBW-NEXT: vpextrw $2, %xmm2, %eax
201 ; NOBW-NEXT: andl $31, %eax
202 ; NOBW-NEXT: vpinsrw $2, 1152(%rsp,%rax,2), %xmm4, %xmm4
203 ; NOBW-NEXT: vpextrw $3, %xmm2, %eax
204 ; NOBW-NEXT: andl $31, %eax
205 ; NOBW-NEXT: vpinsrw $3, 1216(%rsp,%rax,2), %xmm4, %xmm4
206 ; NOBW-NEXT: vpextrw $4, %xmm2, %eax
207 ; NOBW-NEXT: andl $31, %eax
208 ; NOBW-NEXT: vpinsrw $4, 1280(%rsp,%rax,2), %xmm4, %xmm4
209 ; NOBW-NEXT: vpextrw $5, %xmm2, %eax
210 ; NOBW-NEXT: andl $31, %eax
211 ; NOBW-NEXT: vpinsrw $5, 1344(%rsp,%rax,2), %xmm4, %xmm4
212 ; NOBW-NEXT: vpextrw $6, %xmm2, %eax
213 ; NOBW-NEXT: andl $31, %eax
214 ; NOBW-NEXT: vpinsrw $6, 1408(%rsp,%rax,2), %xmm4, %xmm4
215 ; NOBW-NEXT: vpextrw $7, %xmm2, %eax
216 ; NOBW-NEXT: andl $31, %eax
217 ; NOBW-NEXT: vpinsrw $7, 1472(%rsp,%rax,2), %xmm4, %xmm2
218 ; NOBW-NEXT: vmovd %xmm3, %eax
219 ; NOBW-NEXT: andl $31, %eax
220 ; NOBW-NEXT: movzwl 512(%rsp,%rax,2), %eax
221 ; NOBW-NEXT: vmovd %eax, %xmm4
222 ; NOBW-NEXT: vpextrw $1, %xmm3, %eax
223 ; NOBW-NEXT: andl $31, %eax
224 ; NOBW-NEXT: vpinsrw $1, 576(%rsp,%rax,2), %xmm4, %xmm4
225 ; NOBW-NEXT: vpextrw $2, %xmm3, %eax
226 ; NOBW-NEXT: andl $31, %eax
227 ; NOBW-NEXT: vpinsrw $2, 640(%rsp,%rax,2), %xmm4, %xmm4
228 ; NOBW-NEXT: vpextrw $3, %xmm3, %eax
229 ; NOBW-NEXT: andl $31, %eax
230 ; NOBW-NEXT: vpinsrw $3, 704(%rsp,%rax,2), %xmm4, %xmm4
231 ; NOBW-NEXT: vpextrw $4, %xmm3, %eax
232 ; NOBW-NEXT: andl $31, %eax
233 ; NOBW-NEXT: vpinsrw $4, 768(%rsp,%rax,2), %xmm4, %xmm4
234 ; NOBW-NEXT: vpextrw $5, %xmm3, %eax
235 ; NOBW-NEXT: andl $31, %eax
236 ; NOBW-NEXT: vpinsrw $5, 832(%rsp,%rax,2), %xmm4, %xmm4
237 ; NOBW-NEXT: vpextrw $6, %xmm3, %eax
238 ; NOBW-NEXT: andl $31, %eax
239 ; NOBW-NEXT: vpinsrw $6, 896(%rsp,%rax,2), %xmm4, %xmm4
240 ; NOBW-NEXT: vpextrw $7, %xmm3, %eax
241 ; NOBW-NEXT: andl $31, %eax
242 ; NOBW-NEXT: vpinsrw $7, 960(%rsp,%rax,2), %xmm4, %xmm3
243 ; NOBW-NEXT: vmovd %xmm1, %eax
244 ; NOBW-NEXT: andl $31, %eax
245 ; NOBW-NEXT: movzwl (%rsp,%rax,2), %eax
246 ; NOBW-NEXT: vmovd %eax, %xmm4
247 ; NOBW-NEXT: vpextrw $1, %xmm1, %eax
248 ; NOBW-NEXT: andl $31, %eax
249 ; NOBW-NEXT: vpinsrw $1, 64(%rsp,%rax,2), %xmm4, %xmm4
250 ; NOBW-NEXT: vpextrw $2, %xmm1, %eax
251 ; NOBW-NEXT: andl $31, %eax
252 ; NOBW-NEXT: vpinsrw $2, 128(%rsp,%rax,2), %xmm4, %xmm4
253 ; NOBW-NEXT: vpextrw $3, %xmm1, %eax
254 ; NOBW-NEXT: andl $31, %eax
255 ; NOBW-NEXT: vpinsrw $3, 192(%rsp,%rax,2), %xmm4, %xmm4
256 ; NOBW-NEXT: vpextrw $4, %xmm1, %eax
257 ; NOBW-NEXT: andl $31, %eax
258 ; NOBW-NEXT: vpinsrw $4, 256(%rsp,%rax,2), %xmm4, %xmm4
259 ; NOBW-NEXT: vpextrw $5, %xmm1, %eax
260 ; NOBW-NEXT: andl $31, %eax
261 ; NOBW-NEXT: vpinsrw $5, 320(%rsp,%rax,2), %xmm4, %xmm4
262 ; NOBW-NEXT: vpextrw $6, %xmm1, %eax
263 ; NOBW-NEXT: andl $31, %eax
264 ; NOBW-NEXT: vpinsrw $6, 384(%rsp,%rax,2), %xmm4, %xmm4
265 ; NOBW-NEXT: vpextrw $7, %xmm1, %eax
266 ; NOBW-NEXT: andl $31, %eax
267 ; NOBW-NEXT: vpinsrw $7, 448(%rsp,%rax,2), %xmm4, %xmm1
268 ; NOBW-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
269 ; NOBW-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
270 ; NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
271 ; NOBW-NEXT: movq %rbp, %rsp
272 ; NOBW-NEXT: popq %rbp
275 ; AVX512BW-LABEL: var_shuffle_v32i16:
277 ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
278 ; AVX512BW-NEXT: retq
279 %index0 = extractelement <32 x i16> %indices, i32 0
280 %index1 = extractelement <32 x i16> %indices, i32 1
281 %index2 = extractelement <32 x i16> %indices, i32 2
282 %index3 = extractelement <32 x i16> %indices, i32 3
283 %index4 = extractelement <32 x i16> %indices, i32 4
284 %index5 = extractelement <32 x i16> %indices, i32 5
285 %index6 = extractelement <32 x i16> %indices, i32 6
286 %index7 = extractelement <32 x i16> %indices, i32 7
287 %index8 = extractelement <32 x i16> %indices, i32 8
288 %index9 = extractelement <32 x i16> %indices, i32 9
289 %index10 = extractelement <32 x i16> %indices, i32 10
290 %index11 = extractelement <32 x i16> %indices, i32 11
291 %index12 = extractelement <32 x i16> %indices, i32 12
292 %index13 = extractelement <32 x i16> %indices, i32 13
293 %index14 = extractelement <32 x i16> %indices, i32 14
294 %index15 = extractelement <32 x i16> %indices, i32 15
295 %index16 = extractelement <32 x i16> %indices, i32 16
296 %index17 = extractelement <32 x i16> %indices, i32 17
297 %index18 = extractelement <32 x i16> %indices, i32 18
298 %index19 = extractelement <32 x i16> %indices, i32 19
299 %index20 = extractelement <32 x i16> %indices, i32 20
300 %index21 = extractelement <32 x i16> %indices, i32 21
301 %index22 = extractelement <32 x i16> %indices, i32 22
302 %index23 = extractelement <32 x i16> %indices, i32 23
303 %index24 = extractelement <32 x i16> %indices, i32 24
304 %index25 = extractelement <32 x i16> %indices, i32 25
305 %index26 = extractelement <32 x i16> %indices, i32 26
306 %index27 = extractelement <32 x i16> %indices, i32 27
307 %index28 = extractelement <32 x i16> %indices, i32 28
308 %index29 = extractelement <32 x i16> %indices, i32 29
309 %index30 = extractelement <32 x i16> %indices, i32 30
310 %index31 = extractelement <32 x i16> %indices, i32 31
311 %v0 = extractelement <32 x i16> %v, i16 %index0
312 %v1 = extractelement <32 x i16> %v, i16 %index1
313 %v2 = extractelement <32 x i16> %v, i16 %index2
314 %v3 = extractelement <32 x i16> %v, i16 %index3
315 %v4 = extractelement <32 x i16> %v, i16 %index4
316 %v5 = extractelement <32 x i16> %v, i16 %index5
317 %v6 = extractelement <32 x i16> %v, i16 %index6
318 %v7 = extractelement <32 x i16> %v, i16 %index7
319 %v8 = extractelement <32 x i16> %v, i16 %index8
320 %v9 = extractelement <32 x i16> %v, i16 %index9
321 %v10 = extractelement <32 x i16> %v, i16 %index10
322 %v11 = extractelement <32 x i16> %v, i16 %index11
323 %v12 = extractelement <32 x i16> %v, i16 %index12
324 %v13 = extractelement <32 x i16> %v, i16 %index13
325 %v14 = extractelement <32 x i16> %v, i16 %index14
326 %v15 = extractelement <32 x i16> %v, i16 %index15
327 %v16 = extractelement <32 x i16> %v, i16 %index16
328 %v17 = extractelement <32 x i16> %v, i16 %index17
329 %v18 = extractelement <32 x i16> %v, i16 %index18
330 %v19 = extractelement <32 x i16> %v, i16 %index19
331 %v20 = extractelement <32 x i16> %v, i16 %index20
332 %v21 = extractelement <32 x i16> %v, i16 %index21
333 %v22 = extractelement <32 x i16> %v, i16 %index22
334 %v23 = extractelement <32 x i16> %v, i16 %index23
335 %v24 = extractelement <32 x i16> %v, i16 %index24
336 %v25 = extractelement <32 x i16> %v, i16 %index25
337 %v26 = extractelement <32 x i16> %v, i16 %index26
338 %v27 = extractelement <32 x i16> %v, i16 %index27
339 %v28 = extractelement <32 x i16> %v, i16 %index28
340 %v29 = extractelement <32 x i16> %v, i16 %index29
341 %v30 = extractelement <32 x i16> %v, i16 %index30
342 %v31 = extractelement <32 x i16> %v, i16 %index31
343 %ret0 = insertelement <32 x i16> undef, i16 %v0, i32 0
344 %ret1 = insertelement <32 x i16> %ret0, i16 %v1, i32 1
345 %ret2 = insertelement <32 x i16> %ret1, i16 %v2, i32 2
346 %ret3 = insertelement <32 x i16> %ret2, i16 %v3, i32 3
347 %ret4 = insertelement <32 x i16> %ret3, i16 %v4, i32 4
348 %ret5 = insertelement <32 x i16> %ret4, i16 %v5, i32 5
349 %ret6 = insertelement <32 x i16> %ret5, i16 %v6, i32 6
350 %ret7 = insertelement <32 x i16> %ret6, i16 %v7, i32 7
351 %ret8 = insertelement <32 x i16> %ret7, i16 %v8, i32 8
352 %ret9 = insertelement <32 x i16> %ret8, i16 %v9, i32 9
353 %ret10 = insertelement <32 x i16> %ret9, i16 %v10, i32 10
354 %ret11 = insertelement <32 x i16> %ret10, i16 %v11, i32 11
355 %ret12 = insertelement <32 x i16> %ret11, i16 %v12, i32 12
356 %ret13 = insertelement <32 x i16> %ret12, i16 %v13, i32 13
357 %ret14 = insertelement <32 x i16> %ret13, i16 %v14, i32 14
358 %ret15 = insertelement <32 x i16> %ret14, i16 %v15, i32 15
359 %ret16 = insertelement <32 x i16> %ret15, i16 %v16, i32 16
360 %ret17 = insertelement <32 x i16> %ret16, i16 %v17, i32 17
361 %ret18 = insertelement <32 x i16> %ret17, i16 %v18, i32 18
362 %ret19 = insertelement <32 x i16> %ret18, i16 %v19, i32 19
363 %ret20 = insertelement <32 x i16> %ret19, i16 %v20, i32 20
364 %ret21 = insertelement <32 x i16> %ret20, i16 %v21, i32 21
365 %ret22 = insertelement <32 x i16> %ret21, i16 %v22, i32 22
366 %ret23 = insertelement <32 x i16> %ret22, i16 %v23, i32 23
367 %ret24 = insertelement <32 x i16> %ret23, i16 %v24, i32 24
368 %ret25 = insertelement <32 x i16> %ret24, i16 %v25, i32 25
369 %ret26 = insertelement <32 x i16> %ret25, i16 %v26, i32 26
370 %ret27 = insertelement <32 x i16> %ret26, i16 %v27, i32 27
371 %ret28 = insertelement <32 x i16> %ret27, i16 %v28, i32 28
372 %ret29 = insertelement <32 x i16> %ret28, i16 %v29, i32 29
373 %ret30 = insertelement <32 x i16> %ret29, i16 %v30, i32 30
374 %ret31 = insertelement <32 x i16> %ret30, i16 %v31, i32 31
375 ret <32 x i16> %ret31
378 define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
379 ; NOBW-LABEL: var_shuffle_v64i8:
381 ; NOBW-NEXT: pushq %rbp
382 ; NOBW-NEXT: movq %rsp, %rbp
383 ; NOBW-NEXT: andq $-64, %rsp
384 ; NOBW-NEXT: subq $4160, %rsp # imm = 0x1040
385 ; NOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2
386 ; NOBW-NEXT: vextracti128 $1, %ymm1, %xmm3
387 ; NOBW-NEXT: vextracti128 $1, %ymm2, %xmm4
388 ; NOBW-NEXT: vpextrb $0, %xmm4, %eax
389 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
390 ; NOBW-NEXT: vmovaps %ymm0, (%rsp)
391 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
392 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
393 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
394 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
395 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
396 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
397 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
398 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
399 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
400 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
401 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
402 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
403 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
404 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
405 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
406 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
407 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
408 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
409 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
410 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
411 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
412 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
413 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
414 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
415 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
416 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
417 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
418 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
419 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
420 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
421 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
422 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
423 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
424 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
425 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
426 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
427 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
428 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
429 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
430 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
431 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
432 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
433 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
434 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
435 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
436 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
437 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
438 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
439 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
440 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
441 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
442 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
443 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
444 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
445 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
446 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
447 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
448 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
449 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
450 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
451 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
452 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
453 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
454 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
455 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
456 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
457 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
458 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
459 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
460 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
461 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
462 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
463 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
464 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
465 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
466 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
467 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
468 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
469 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
470 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
471 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
472 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
473 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
474 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
475 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
476 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
477 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
478 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
479 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
480 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
481 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
482 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
483 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
484 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
485 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
486 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
487 ; NOBW-NEXT: andl $63, %eax
488 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
489 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
490 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
491 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
492 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
493 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
494 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
495 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
496 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
497 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
498 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
499 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
500 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
501 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
502 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
503 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
504 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
505 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
506 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
507 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
508 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
509 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
510 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
511 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
512 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
513 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
514 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
515 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
516 ; NOBW-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
517 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
518 ; NOBW-NEXT: movzbl 3072(%rsp,%rax), %eax
519 ; NOBW-NEXT: vmovd %eax, %xmm0
520 ; NOBW-NEXT: vpextrb $1, %xmm4, %eax
521 ; NOBW-NEXT: andl $63, %eax
522 ; NOBW-NEXT: vpinsrb $1, 3136(%rsp,%rax), %xmm0, %xmm0
523 ; NOBW-NEXT: vpextrb $2, %xmm4, %eax
524 ; NOBW-NEXT: andl $63, %eax
525 ; NOBW-NEXT: vpinsrb $2, 3200(%rsp,%rax), %xmm0, %xmm0
526 ; NOBW-NEXT: vpextrb $3, %xmm4, %eax
527 ; NOBW-NEXT: andl $63, %eax
528 ; NOBW-NEXT: vpinsrb $3, 3264(%rsp,%rax), %xmm0, %xmm0
529 ; NOBW-NEXT: vpextrb $4, %xmm4, %eax
530 ; NOBW-NEXT: andl $63, %eax
531 ; NOBW-NEXT: vpinsrb $4, 3328(%rsp,%rax), %xmm0, %xmm0
532 ; NOBW-NEXT: vpextrb $5, %xmm4, %eax
533 ; NOBW-NEXT: andl $63, %eax
534 ; NOBW-NEXT: vpinsrb $5, 3392(%rsp,%rax), %xmm0, %xmm0
535 ; NOBW-NEXT: vpextrb $6, %xmm4, %eax
536 ; NOBW-NEXT: andl $63, %eax
537 ; NOBW-NEXT: vpinsrb $6, 3456(%rsp,%rax), %xmm0, %xmm0
538 ; NOBW-NEXT: vpextrb $7, %xmm4, %eax
539 ; NOBW-NEXT: andl $63, %eax
540 ; NOBW-NEXT: vpinsrb $7, 3520(%rsp,%rax), %xmm0, %xmm0
541 ; NOBW-NEXT: vpextrb $8, %xmm4, %eax
542 ; NOBW-NEXT: andl $63, %eax
543 ; NOBW-NEXT: vpinsrb $8, 3584(%rsp,%rax), %xmm0, %xmm0
544 ; NOBW-NEXT: vpextrb $9, %xmm4, %eax
545 ; NOBW-NEXT: andl $63, %eax
546 ; NOBW-NEXT: vpinsrb $9, 3648(%rsp,%rax), %xmm0, %xmm0
547 ; NOBW-NEXT: vpextrb $10, %xmm4, %eax
548 ; NOBW-NEXT: andl $63, %eax
549 ; NOBW-NEXT: vpinsrb $10, 3712(%rsp,%rax), %xmm0, %xmm0
550 ; NOBW-NEXT: vpextrb $11, %xmm4, %eax
551 ; NOBW-NEXT: andl $63, %eax
552 ; NOBW-NEXT: vpinsrb $11, 3776(%rsp,%rax), %xmm0, %xmm0
553 ; NOBW-NEXT: vpextrb $12, %xmm4, %eax
554 ; NOBW-NEXT: andl $63, %eax
555 ; NOBW-NEXT: vpinsrb $12, 3840(%rsp,%rax), %xmm0, %xmm0
556 ; NOBW-NEXT: vpextrb $13, %xmm4, %eax
557 ; NOBW-NEXT: andl $63, %eax
558 ; NOBW-NEXT: vpinsrb $13, 3904(%rsp,%rax), %xmm0, %xmm0
559 ; NOBW-NEXT: vpextrb $14, %xmm4, %eax
560 ; NOBW-NEXT: andl $63, %eax
561 ; NOBW-NEXT: vpinsrb $14, 3968(%rsp,%rax), %xmm0, %xmm0
562 ; NOBW-NEXT: vpextrb $15, %xmm4, %eax
563 ; NOBW-NEXT: andl $63, %eax
564 ; NOBW-NEXT: vpinsrb $15, 4032(%rsp,%rax), %xmm0, %xmm0
565 ; NOBW-NEXT: vpextrb $0, %xmm2, %eax
566 ; NOBW-NEXT: andl $63, %eax
567 ; NOBW-NEXT: movzbl 2048(%rsp,%rax), %eax
568 ; NOBW-NEXT: vmovd %eax, %xmm4
569 ; NOBW-NEXT: vpextrb $1, %xmm2, %eax
570 ; NOBW-NEXT: andl $63, %eax
571 ; NOBW-NEXT: vpinsrb $1, 2112(%rsp,%rax), %xmm4, %xmm4
572 ; NOBW-NEXT: vpextrb $2, %xmm2, %eax
573 ; NOBW-NEXT: andl $63, %eax
574 ; NOBW-NEXT: vpinsrb $2, 2176(%rsp,%rax), %xmm4, %xmm4
575 ; NOBW-NEXT: vpextrb $3, %xmm2, %eax
576 ; NOBW-NEXT: andl $63, %eax
577 ; NOBW-NEXT: vpinsrb $3, 2240(%rsp,%rax), %xmm4, %xmm4
578 ; NOBW-NEXT: vpextrb $4, %xmm2, %eax
579 ; NOBW-NEXT: andl $63, %eax
580 ; NOBW-NEXT: vpinsrb $4, 2304(%rsp,%rax), %xmm4, %xmm4
581 ; NOBW-NEXT: vpextrb $5, %xmm2, %eax
582 ; NOBW-NEXT: andl $63, %eax
583 ; NOBW-NEXT: vpinsrb $5, 2368(%rsp,%rax), %xmm4, %xmm4
584 ; NOBW-NEXT: vpextrb $6, %xmm2, %eax
585 ; NOBW-NEXT: andl $63, %eax
586 ; NOBW-NEXT: vpinsrb $6, 2432(%rsp,%rax), %xmm4, %xmm4
587 ; NOBW-NEXT: vpextrb $7, %xmm2, %eax
588 ; NOBW-NEXT: andl $63, %eax
589 ; NOBW-NEXT: vpinsrb $7, 2496(%rsp,%rax), %xmm4, %xmm4
590 ; NOBW-NEXT: vpextrb $8, %xmm2, %eax
591 ; NOBW-NEXT: andl $63, %eax
592 ; NOBW-NEXT: vpinsrb $8, 2560(%rsp,%rax), %xmm4, %xmm4
593 ; NOBW-NEXT: vpextrb $9, %xmm2, %eax
594 ; NOBW-NEXT: andl $63, %eax
595 ; NOBW-NEXT: vpinsrb $9, 2624(%rsp,%rax), %xmm4, %xmm4
596 ; NOBW-NEXT: vpextrb $10, %xmm2, %eax
597 ; NOBW-NEXT: andl $63, %eax
598 ; NOBW-NEXT: vpinsrb $10, 2688(%rsp,%rax), %xmm4, %xmm4
599 ; NOBW-NEXT: vpextrb $11, %xmm2, %eax
600 ; NOBW-NEXT: andl $63, %eax
601 ; NOBW-NEXT: vpinsrb $11, 2752(%rsp,%rax), %xmm4, %xmm4
602 ; NOBW-NEXT: vpextrb $12, %xmm2, %eax
603 ; NOBW-NEXT: andl $63, %eax
604 ; NOBW-NEXT: vpinsrb $12, 2816(%rsp,%rax), %xmm4, %xmm4
605 ; NOBW-NEXT: vpextrb $13, %xmm2, %eax
606 ; NOBW-NEXT: andl $63, %eax
607 ; NOBW-NEXT: vpinsrb $13, 2880(%rsp,%rax), %xmm4, %xmm4
608 ; NOBW-NEXT: vpextrb $14, %xmm2, %eax
609 ; NOBW-NEXT: andl $63, %eax
610 ; NOBW-NEXT: vpinsrb $14, 2944(%rsp,%rax), %xmm4, %xmm4
611 ; NOBW-NEXT: vpextrb $15, %xmm2, %eax
612 ; NOBW-NEXT: andl $63, %eax
613 ; NOBW-NEXT: vpinsrb $15, 3008(%rsp,%rax), %xmm4, %xmm2
614 ; NOBW-NEXT: vpextrb $0, %xmm3, %eax
615 ; NOBW-NEXT: andl $63, %eax
616 ; NOBW-NEXT: movzbl 1024(%rsp,%rax), %eax
617 ; NOBW-NEXT: vmovd %eax, %xmm4
618 ; NOBW-NEXT: vpextrb $1, %xmm3, %eax
619 ; NOBW-NEXT: andl $63, %eax
620 ; NOBW-NEXT: vpinsrb $1, 1088(%rsp,%rax), %xmm4, %xmm4
621 ; NOBW-NEXT: vpextrb $2, %xmm3, %eax
622 ; NOBW-NEXT: andl $63, %eax
623 ; NOBW-NEXT: vpinsrb $2, 1152(%rsp,%rax), %xmm4, %xmm4
624 ; NOBW-NEXT: vpextrb $3, %xmm3, %eax
625 ; NOBW-NEXT: andl $63, %eax
626 ; NOBW-NEXT: vpinsrb $3, 1216(%rsp,%rax), %xmm4, %xmm4
627 ; NOBW-NEXT: vpextrb $4, %xmm3, %eax
628 ; NOBW-NEXT: andl $63, %eax
629 ; NOBW-NEXT: vpinsrb $4, 1280(%rsp,%rax), %xmm4, %xmm4
630 ; NOBW-NEXT: vpextrb $5, %xmm3, %eax
631 ; NOBW-NEXT: andl $63, %eax
632 ; NOBW-NEXT: vpinsrb $5, 1344(%rsp,%rax), %xmm4, %xmm4
633 ; NOBW-NEXT: vpextrb $6, %xmm3, %eax
634 ; NOBW-NEXT: andl $63, %eax
635 ; NOBW-NEXT: vpinsrb $6, 1408(%rsp,%rax), %xmm4, %xmm4
636 ; NOBW-NEXT: vpextrb $7, %xmm3, %eax
637 ; NOBW-NEXT: andl $63, %eax
638 ; NOBW-NEXT: vpinsrb $7, 1472(%rsp,%rax), %xmm4, %xmm4
639 ; NOBW-NEXT: vpextrb $8, %xmm3, %eax
640 ; NOBW-NEXT: andl $63, %eax
641 ; NOBW-NEXT: vpinsrb $8, 1536(%rsp,%rax), %xmm4, %xmm4
642 ; NOBW-NEXT: vpextrb $9, %xmm3, %eax
643 ; NOBW-NEXT: andl $63, %eax
644 ; NOBW-NEXT: vpinsrb $9, 1600(%rsp,%rax), %xmm4, %xmm4
645 ; NOBW-NEXT: vpextrb $10, %xmm3, %eax
646 ; NOBW-NEXT: andl $63, %eax
647 ; NOBW-NEXT: vpinsrb $10, 1664(%rsp,%rax), %xmm4, %xmm4
648 ; NOBW-NEXT: vpextrb $11, %xmm3, %eax
649 ; NOBW-NEXT: andl $63, %eax
650 ; NOBW-NEXT: vpinsrb $11, 1728(%rsp,%rax), %xmm4, %xmm4
651 ; NOBW-NEXT: vpextrb $12, %xmm3, %eax
652 ; NOBW-NEXT: andl $63, %eax
653 ; NOBW-NEXT: vpinsrb $12, 1792(%rsp,%rax), %xmm4, %xmm4
654 ; NOBW-NEXT: vpextrb $13, %xmm3, %eax
655 ; NOBW-NEXT: andl $63, %eax
656 ; NOBW-NEXT: vpinsrb $13, 1856(%rsp,%rax), %xmm4, %xmm4
657 ; NOBW-NEXT: vpextrb $14, %xmm3, %eax
658 ; NOBW-NEXT: andl $63, %eax
659 ; NOBW-NEXT: vpinsrb $14, 1920(%rsp,%rax), %xmm4, %xmm4
660 ; NOBW-NEXT: vpextrb $15, %xmm3, %eax
661 ; NOBW-NEXT: andl $63, %eax
662 ; NOBW-NEXT: vpinsrb $15, 1984(%rsp,%rax), %xmm4, %xmm3
663 ; NOBW-NEXT: vpextrb $0, %xmm1, %eax
664 ; NOBW-NEXT: andl $63, %eax
665 ; NOBW-NEXT: movzbl (%rsp,%rax), %eax
666 ; NOBW-NEXT: vmovd %eax, %xmm4
667 ; NOBW-NEXT: vpextrb $1, %xmm1, %eax
668 ; NOBW-NEXT: andl $63, %eax
669 ; NOBW-NEXT: vpinsrb $1, 64(%rsp,%rax), %xmm4, %xmm4
670 ; NOBW-NEXT: vpextrb $2, %xmm1, %eax
671 ; NOBW-NEXT: andl $63, %eax
672 ; NOBW-NEXT: vpinsrb $2, 128(%rsp,%rax), %xmm4, %xmm4
673 ; NOBW-NEXT: vpextrb $3, %xmm1, %eax
674 ; NOBW-NEXT: andl $63, %eax
675 ; NOBW-NEXT: vpinsrb $3, 192(%rsp,%rax), %xmm4, %xmm4
676 ; NOBW-NEXT: vpextrb $4, %xmm1, %eax
677 ; NOBW-NEXT: andl $63, %eax
678 ; NOBW-NEXT: vpinsrb $4, 256(%rsp,%rax), %xmm4, %xmm4
679 ; NOBW-NEXT: vpextrb $5, %xmm1, %eax
680 ; NOBW-NEXT: andl $63, %eax
681 ; NOBW-NEXT: vpinsrb $5, 320(%rsp,%rax), %xmm4, %xmm4
682 ; NOBW-NEXT: vpextrb $6, %xmm1, %eax
683 ; NOBW-NEXT: andl $63, %eax
684 ; NOBW-NEXT: vpinsrb $6, 384(%rsp,%rax), %xmm4, %xmm4
685 ; NOBW-NEXT: vpextrb $7, %xmm1, %eax
686 ; NOBW-NEXT: andl $63, %eax
687 ; NOBW-NEXT: vpinsrb $7, 448(%rsp,%rax), %xmm4, %xmm4
688 ; NOBW-NEXT: vpextrb $8, %xmm1, %eax
689 ; NOBW-NEXT: andl $63, %eax
690 ; NOBW-NEXT: vpinsrb $8, 512(%rsp,%rax), %xmm4, %xmm4
691 ; NOBW-NEXT: vpextrb $9, %xmm1, %eax
692 ; NOBW-NEXT: andl $63, %eax
693 ; NOBW-NEXT: vpinsrb $9, 576(%rsp,%rax), %xmm4, %xmm4
694 ; NOBW-NEXT: vpextrb $10, %xmm1, %eax
695 ; NOBW-NEXT: andl $63, %eax
696 ; NOBW-NEXT: vpinsrb $10, 640(%rsp,%rax), %xmm4, %xmm4
697 ; NOBW-NEXT: vpextrb $11, %xmm1, %eax
698 ; NOBW-NEXT: andl $63, %eax
699 ; NOBW-NEXT: vpinsrb $11, 704(%rsp,%rax), %xmm4, %xmm4
700 ; NOBW-NEXT: vpextrb $12, %xmm1, %eax
701 ; NOBW-NEXT: andl $63, %eax
702 ; NOBW-NEXT: vpinsrb $12, 768(%rsp,%rax), %xmm4, %xmm4
703 ; NOBW-NEXT: vpextrb $13, %xmm1, %eax
704 ; NOBW-NEXT: andl $63, %eax
705 ; NOBW-NEXT: vpinsrb $13, 832(%rsp,%rax), %xmm4, %xmm4
706 ; NOBW-NEXT: vpextrb $14, %xmm1, %eax
707 ; NOBW-NEXT: andl $63, %eax
708 ; NOBW-NEXT: vpinsrb $14, 896(%rsp,%rax), %xmm4, %xmm4
709 ; NOBW-NEXT: vpextrb $15, %xmm1, %eax
710 ; NOBW-NEXT: andl $63, %eax
711 ; NOBW-NEXT: vpinsrb $15, 960(%rsp,%rax), %xmm4, %xmm1
712 ; NOBW-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
713 ; NOBW-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
714 ; NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
715 ; NOBW-NEXT: movq %rbp, %rsp
716 ; NOBW-NEXT: popq %rbp
719 ; VBMI-LABEL: var_shuffle_v64i8:
721 ; VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0
723 %index0 = extractelement <64 x i8> %indices, i32 0
724 %index1 = extractelement <64 x i8> %indices, i32 1
725 %index2 = extractelement <64 x i8> %indices, i32 2
726 %index3 = extractelement <64 x i8> %indices, i32 3
727 %index4 = extractelement <64 x i8> %indices, i32 4
728 %index5 = extractelement <64 x i8> %indices, i32 5
729 %index6 = extractelement <64 x i8> %indices, i32 6
730 %index7 = extractelement <64 x i8> %indices, i32 7
731 %index8 = extractelement <64 x i8> %indices, i32 8
732 %index9 = extractelement <64 x i8> %indices, i32 9
733 %index10 = extractelement <64 x i8> %indices, i32 10
734 %index11 = extractelement <64 x i8> %indices, i32 11
735 %index12 = extractelement <64 x i8> %indices, i32 12
736 %index13 = extractelement <64 x i8> %indices, i32 13
737 %index14 = extractelement <64 x i8> %indices, i32 14
738 %index15 = extractelement <64 x i8> %indices, i32 15
739 %index16 = extractelement <64 x i8> %indices, i32 16
740 %index17 = extractelement <64 x i8> %indices, i32 17
741 %index18 = extractelement <64 x i8> %indices, i32 18
742 %index19 = extractelement <64 x i8> %indices, i32 19
743 %index20 = extractelement <64 x i8> %indices, i32 20
744 %index21 = extractelement <64 x i8> %indices, i32 21
745 %index22 = extractelement <64 x i8> %indices, i32 22
746 %index23 = extractelement <64 x i8> %indices, i32 23
747 %index24 = extractelement <64 x i8> %indices, i32 24
748 %index25 = extractelement <64 x i8> %indices, i32 25
749 %index26 = extractelement <64 x i8> %indices, i32 26
750 %index27 = extractelement <64 x i8> %indices, i32 27
751 %index28 = extractelement <64 x i8> %indices, i32 28
752 %index29 = extractelement <64 x i8> %indices, i32 29
753 %index30 = extractelement <64 x i8> %indices, i32 30
754 %index31 = extractelement <64 x i8> %indices, i32 31
755 %index32 = extractelement <64 x i8> %indices, i32 32
756 %index33 = extractelement <64 x i8> %indices, i32 33
757 %index34 = extractelement <64 x i8> %indices, i32 34
758 %index35 = extractelement <64 x i8> %indices, i32 35
759 %index36 = extractelement <64 x i8> %indices, i32 36
760 %index37 = extractelement <64 x i8> %indices, i32 37
761 %index38 = extractelement <64 x i8> %indices, i32 38
762 %index39 = extractelement <64 x i8> %indices, i32 39
763 %index40 = extractelement <64 x i8> %indices, i32 40
764 %index41 = extractelement <64 x i8> %indices, i32 41
765 %index42 = extractelement <64 x i8> %indices, i32 42
766 %index43 = extractelement <64 x i8> %indices, i32 43
767 %index44 = extractelement <64 x i8> %indices, i32 44
768 %index45 = extractelement <64 x i8> %indices, i32 45
769 %index46 = extractelement <64 x i8> %indices, i32 46
770 %index47 = extractelement <64 x i8> %indices, i32 47
771 %index48 = extractelement <64 x i8> %indices, i32 48
772 %index49 = extractelement <64 x i8> %indices, i32 49
773 %index50 = extractelement <64 x i8> %indices, i32 50
774 %index51 = extractelement <64 x i8> %indices, i32 51
775 %index52 = extractelement <64 x i8> %indices, i32 52
776 %index53 = extractelement <64 x i8> %indices, i32 53
777 %index54 = extractelement <64 x i8> %indices, i32 54
778 %index55 = extractelement <64 x i8> %indices, i32 55
779 %index56 = extractelement <64 x i8> %indices, i32 56
780 %index57 = extractelement <64 x i8> %indices, i32 57
781 %index58 = extractelement <64 x i8> %indices, i32 58
782 %index59 = extractelement <64 x i8> %indices, i32 59
783 %index60 = extractelement <64 x i8> %indices, i32 60
784 %index61 = extractelement <64 x i8> %indices, i32 61
785 %index62 = extractelement <64 x i8> %indices, i32 62
786 %index63 = extractelement <64 x i8> %indices, i32 63
787 %v0 = extractelement <64 x i8> %v, i8 %index0
788 %v1 = extractelement <64 x i8> %v, i8 %index1
789 %v2 = extractelement <64 x i8> %v, i8 %index2
790 %v3 = extractelement <64 x i8> %v, i8 %index3
791 %v4 = extractelement <64 x i8> %v, i8 %index4
792 %v5 = extractelement <64 x i8> %v, i8 %index5
793 %v6 = extractelement <64 x i8> %v, i8 %index6
794 %v7 = extractelement <64 x i8> %v, i8 %index7
795 %v8 = extractelement <64 x i8> %v, i8 %index8
796 %v9 = extractelement <64 x i8> %v, i8 %index9
797 %v10 = extractelement <64 x i8> %v, i8 %index10
798 %v11 = extractelement <64 x i8> %v, i8 %index11
799 %v12 = extractelement <64 x i8> %v, i8 %index12
800 %v13 = extractelement <64 x i8> %v, i8 %index13
801 %v14 = extractelement <64 x i8> %v, i8 %index14
802 %v15 = extractelement <64 x i8> %v, i8 %index15
803 %v16 = extractelement <64 x i8> %v, i8 %index16
804 %v17 = extractelement <64 x i8> %v, i8 %index17
805 %v18 = extractelement <64 x i8> %v, i8 %index18
806 %v19 = extractelement <64 x i8> %v, i8 %index19
807 %v20 = extractelement <64 x i8> %v, i8 %index20
808 %v21 = extractelement <64 x i8> %v, i8 %index21
809 %v22 = extractelement <64 x i8> %v, i8 %index22
810 %v23 = extractelement <64 x i8> %v, i8 %index23
811 %v24 = extractelement <64 x i8> %v, i8 %index24
812 %v25 = extractelement <64 x i8> %v, i8 %index25
813 %v26 = extractelement <64 x i8> %v, i8 %index26
814 %v27 = extractelement <64 x i8> %v, i8 %index27
815 %v28 = extractelement <64 x i8> %v, i8 %index28
816 %v29 = extractelement <64 x i8> %v, i8 %index29
817 %v30 = extractelement <64 x i8> %v, i8 %index30
818 %v31 = extractelement <64 x i8> %v, i8 %index31
819 %v32 = extractelement <64 x i8> %v, i8 %index32
820 %v33 = extractelement <64 x i8> %v, i8 %index33
821 %v34 = extractelement <64 x i8> %v, i8 %index34
822 %v35 = extractelement <64 x i8> %v, i8 %index35
823 %v36 = extractelement <64 x i8> %v, i8 %index36
824 %v37 = extractelement <64 x i8> %v, i8 %index37
825 %v38 = extractelement <64 x i8> %v, i8 %index38
826 %v39 = extractelement <64 x i8> %v, i8 %index39
827 %v40 = extractelement <64 x i8> %v, i8 %index40
828 %v41 = extractelement <64 x i8> %v, i8 %index41
829 %v42 = extractelement <64 x i8> %v, i8 %index42
830 %v43 = extractelement <64 x i8> %v, i8 %index43
831 %v44 = extractelement <64 x i8> %v, i8 %index44
832 %v45 = extractelement <64 x i8> %v, i8 %index45
833 %v46 = extractelement <64 x i8> %v, i8 %index46
834 %v47 = extractelement <64 x i8> %v, i8 %index47
835 %v48 = extractelement <64 x i8> %v, i8 %index48
836 %v49 = extractelement <64 x i8> %v, i8 %index49
837 %v50 = extractelement <64 x i8> %v, i8 %index50
838 %v51 = extractelement <64 x i8> %v, i8 %index51
839 %v52 = extractelement <64 x i8> %v, i8 %index52
840 %v53 = extractelement <64 x i8> %v, i8 %index53
841 %v54 = extractelement <64 x i8> %v, i8 %index54
842 %v55 = extractelement <64 x i8> %v, i8 %index55
843 %v56 = extractelement <64 x i8> %v, i8 %index56
844 %v57 = extractelement <64 x i8> %v, i8 %index57
845 %v58 = extractelement <64 x i8> %v, i8 %index58
846 %v59 = extractelement <64 x i8> %v, i8 %index59
847 %v60 = extractelement <64 x i8> %v, i8 %index60
848 %v61 = extractelement <64 x i8> %v, i8 %index61
849 %v62 = extractelement <64 x i8> %v, i8 %index62
850 %v63 = extractelement <64 x i8> %v, i8 %index63
851 %ret0 = insertelement <64 x i8> undef, i8 %v0, i32 0
852 %ret1 = insertelement <64 x i8> %ret0, i8 %v1, i32 1
853 %ret2 = insertelement <64 x i8> %ret1, i8 %v2, i32 2
854 %ret3 = insertelement <64 x i8> %ret2, i8 %v3, i32 3
855 %ret4 = insertelement <64 x i8> %ret3, i8 %v4, i32 4
856 %ret5 = insertelement <64 x i8> %ret4, i8 %v5, i32 5
857 %ret6 = insertelement <64 x i8> %ret5, i8 %v6, i32 6
858 %ret7 = insertelement <64 x i8> %ret6, i8 %v7, i32 7
859 %ret8 = insertelement <64 x i8> %ret7, i8 %v8, i32 8
860 %ret9 = insertelement <64 x i8> %ret8, i8 %v9, i32 9
861 %ret10 = insertelement <64 x i8> %ret9, i8 %v10, i32 10
862 %ret11 = insertelement <64 x i8> %ret10, i8 %v11, i32 11
863 %ret12 = insertelement <64 x i8> %ret11, i8 %v12, i32 12
864 %ret13 = insertelement <64 x i8> %ret12, i8 %v13, i32 13
865 %ret14 = insertelement <64 x i8> %ret13, i8 %v14, i32 14
866 %ret15 = insertelement <64 x i8> %ret14, i8 %v15, i32 15
867 %ret16 = insertelement <64 x i8> %ret15, i8 %v16, i32 16
868 %ret17 = insertelement <64 x i8> %ret16, i8 %v17, i32 17
869 %ret18 = insertelement <64 x i8> %ret17, i8 %v18, i32 18
870 %ret19 = insertelement <64 x i8> %ret18, i8 %v19, i32 19
871 %ret20 = insertelement <64 x i8> %ret19, i8 %v20, i32 20
872 %ret21 = insertelement <64 x i8> %ret20, i8 %v21, i32 21
873 %ret22 = insertelement <64 x i8> %ret21, i8 %v22, i32 22
874 %ret23 = insertelement <64 x i8> %ret22, i8 %v23, i32 23
875 %ret24 = insertelement <64 x i8> %ret23, i8 %v24, i32 24
876 %ret25 = insertelement <64 x i8> %ret24, i8 %v25, i32 25
877 %ret26 = insertelement <64 x i8> %ret25, i8 %v26, i32 26
878 %ret27 = insertelement <64 x i8> %ret26, i8 %v27, i32 27
879 %ret28 = insertelement <64 x i8> %ret27, i8 %v28, i32 28
880 %ret29 = insertelement <64 x i8> %ret28, i8 %v29, i32 29
881 %ret30 = insertelement <64 x i8> %ret29, i8 %v30, i32 30
882 %ret31 = insertelement <64 x i8> %ret30, i8 %v31, i32 31
883 %ret32 = insertelement <64 x i8> %ret31, i8 %v32, i32 32
884 %ret33 = insertelement <64 x i8> %ret32, i8 %v33, i32 33
885 %ret34 = insertelement <64 x i8> %ret33, i8 %v34, i32 34
886 %ret35 = insertelement <64 x i8> %ret34, i8 %v35, i32 35
887 %ret36 = insertelement <64 x i8> %ret35, i8 %v36, i32 36
888 %ret37 = insertelement <64 x i8> %ret36, i8 %v37, i32 37
889 %ret38 = insertelement <64 x i8> %ret37, i8 %v38, i32 38
890 %ret39 = insertelement <64 x i8> %ret38, i8 %v39, i32 39
891 %ret40 = insertelement <64 x i8> %ret39, i8 %v40, i32 40
892 %ret41 = insertelement <64 x i8> %ret40, i8 %v41, i32 41
893 %ret42 = insertelement <64 x i8> %ret41, i8 %v42, i32 42
894 %ret43 = insertelement <64 x i8> %ret42, i8 %v43, i32 43
895 %ret44 = insertelement <64 x i8> %ret43, i8 %v44, i32 44
896 %ret45 = insertelement <64 x i8> %ret44, i8 %v45, i32 45
897 %ret46 = insertelement <64 x i8> %ret45, i8 %v46, i32 46
898 %ret47 = insertelement <64 x i8> %ret46, i8 %v47, i32 47
899 %ret48 = insertelement <64 x i8> %ret47, i8 %v48, i32 48
900 %ret49 = insertelement <64 x i8> %ret48, i8 %v49, i32 49
901 %ret50 = insertelement <64 x i8> %ret49, i8 %v50, i32 50
902 %ret51 = insertelement <64 x i8> %ret50, i8 %v51, i32 51
903 %ret52 = insertelement <64 x i8> %ret51, i8 %v52, i32 52
904 %ret53 = insertelement <64 x i8> %ret52, i8 %v53, i32 53
905 %ret54 = insertelement <64 x i8> %ret53, i8 %v54, i32 54
906 %ret55 = insertelement <64 x i8> %ret54, i8 %v55, i32 55
907 %ret56 = insertelement <64 x i8> %ret55, i8 %v56, i32 56
908 %ret57 = insertelement <64 x i8> %ret56, i8 %v57, i32 57
909 %ret58 = insertelement <64 x i8> %ret57, i8 %v58, i32 58
910 %ret59 = insertelement <64 x i8> %ret58, i8 %v59, i32 59
911 %ret60 = insertelement <64 x i8> %ret59, i8 %v60, i32 60
912 %ret61 = insertelement <64 x i8> %ret60, i8 %v61, i32 61
913 %ret62 = insertelement <64 x i8> %ret61, i8 %v62, i32 62
914 %ret63 = insertelement <64 x i8> %ret62, i8 %v63, i32 63
918 define <8 x double> @var_shuffle_v8f64(<8 x double> %v, <8 x i64> %indices) nounwind {
919 ; AVX512-LABEL: var_shuffle_v8f64:
921 ; AVX512-NEXT: vpermpd %zmm0, %zmm1, %zmm0
923 %index0 = extractelement <8 x i64> %indices, i32 0
924 %index1 = extractelement <8 x i64> %indices, i32 1
925 %index2 = extractelement <8 x i64> %indices, i32 2
926 %index3 = extractelement <8 x i64> %indices, i32 3
927 %index4 = extractelement <8 x i64> %indices, i32 4
928 %index5 = extractelement <8 x i64> %indices, i32 5
929 %index6 = extractelement <8 x i64> %indices, i32 6
930 %index7 = extractelement <8 x i64> %indices, i32 7
931 %v0 = extractelement <8 x double> %v, i64 %index0
932 %v1 = extractelement <8 x double> %v, i64 %index1
933 %v2 = extractelement <8 x double> %v, i64 %index2
934 %v3 = extractelement <8 x double> %v, i64 %index3
935 %v4 = extractelement <8 x double> %v, i64 %index4
936 %v5 = extractelement <8 x double> %v, i64 %index5
937 %v6 = extractelement <8 x double> %v, i64 %index6
938 %v7 = extractelement <8 x double> %v, i64 %index7
939 %ret0 = insertelement <8 x double> undef, double %v0, i32 0
940 %ret1 = insertelement <8 x double> %ret0, double %v1, i32 1
941 %ret2 = insertelement <8 x double> %ret1, double %v2, i32 2
942 %ret3 = insertelement <8 x double> %ret2, double %v3, i32 3
943 %ret4 = insertelement <8 x double> %ret3, double %v4, i32 4
944 %ret5 = insertelement <8 x double> %ret4, double %v5, i32 5
945 %ret6 = insertelement <8 x double> %ret5, double %v6, i32 6
946 %ret7 = insertelement <8 x double> %ret6, double %v7, i32 7
947 ret <8 x double> %ret7
950 define <16 x float> @var_shuffle_v16f32(<16 x float> %v, <16 x i32> %indices) nounwind {
951 ; AVX512-LABEL: var_shuffle_v16f32:
953 ; AVX512-NEXT: vpermps %zmm0, %zmm1, %zmm0
955 %index0 = extractelement <16 x i32> %indices, i32 0
956 %index1 = extractelement <16 x i32> %indices, i32 1
957 %index2 = extractelement <16 x i32> %indices, i32 2
958 %index3 = extractelement <16 x i32> %indices, i32 3
959 %index4 = extractelement <16 x i32> %indices, i32 4
960 %index5 = extractelement <16 x i32> %indices, i32 5
961 %index6 = extractelement <16 x i32> %indices, i32 6
962 %index7 = extractelement <16 x i32> %indices, i32 7
963 %index8 = extractelement <16 x i32> %indices, i32 8
964 %index9 = extractelement <16 x i32> %indices, i32 9
965 %index10 = extractelement <16 x i32> %indices, i32 10
966 %index11 = extractelement <16 x i32> %indices, i32 11
967 %index12 = extractelement <16 x i32> %indices, i32 12
968 %index13 = extractelement <16 x i32> %indices, i32 13
969 %index14 = extractelement <16 x i32> %indices, i32 14
970 %index15 = extractelement <16 x i32> %indices, i32 15
971 %v0 = extractelement <16 x float> %v, i32 %index0
972 %v1 = extractelement <16 x float> %v, i32 %index1
973 %v2 = extractelement <16 x float> %v, i32 %index2
974 %v3 = extractelement <16 x float> %v, i32 %index3
975 %v4 = extractelement <16 x float> %v, i32 %index4
976 %v5 = extractelement <16 x float> %v, i32 %index5
977 %v6 = extractelement <16 x float> %v, i32 %index6
978 %v7 = extractelement <16 x float> %v, i32 %index7
979 %v8 = extractelement <16 x float> %v, i32 %index8
980 %v9 = extractelement <16 x float> %v, i32 %index9
981 %v10 = extractelement <16 x float> %v, i32 %index10
982 %v11 = extractelement <16 x float> %v, i32 %index11
983 %v12 = extractelement <16 x float> %v, i32 %index12
984 %v13 = extractelement <16 x float> %v, i32 %index13
985 %v14 = extractelement <16 x float> %v, i32 %index14
986 %v15 = extractelement <16 x float> %v, i32 %index15
987 %ret0 = insertelement <16 x float> undef, float %v0, i32 0
988 %ret1 = insertelement <16 x float> %ret0, float %v1, i32 1
989 %ret2 = insertelement <16 x float> %ret1, float %v2, i32 2
990 %ret3 = insertelement <16 x float> %ret2, float %v3, i32 3
991 %ret4 = insertelement <16 x float> %ret3, float %v4, i32 4
992 %ret5 = insertelement <16 x float> %ret4, float %v5, i32 5
993 %ret6 = insertelement <16 x float> %ret5, float %v6, i32 6
994 %ret7 = insertelement <16 x float> %ret6, float %v7, i32 7
995 %ret8 = insertelement <16 x float> %ret7, float %v8, i32 8
996 %ret9 = insertelement <16 x float> %ret8, float %v9, i32 9
997 %ret10 = insertelement <16 x float> %ret9, float %v10, i32 10
998 %ret11 = insertelement <16 x float> %ret10, float %v11, i32 11
999 %ret12 = insertelement <16 x float> %ret11, float %v12, i32 12
1000 %ret13 = insertelement <16 x float> %ret12, float %v13, i32 13
1001 %ret14 = insertelement <16 x float> %ret13, float %v14, i32 14
1002 %ret15 = insertelement <16 x float> %ret14, float %v15, i32 15
1003 ret <16 x float> %ret15