1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL
6 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
7 ; AVX1-LABEL: shuffle_v8f32_00000000:
9 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
10 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
13 ; AVX2OR512VL-LABEL: shuffle_v8f32_00000000:
14 ; AVX2OR512VL: # BB#0:
15 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0
16 ; AVX2OR512VL-NEXT: retq
17 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
18 ret <8 x float> %shuffle
21 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
22 ; AVX1-LABEL: shuffle_v8f32_00000010:
24 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
25 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
26 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
29 ; AVX2OR512VL-LABEL: shuffle_v8f32_00000010:
30 ; AVX2OR512VL: # BB#0:
31 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
32 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
33 ; AVX2OR512VL-NEXT: retq
34 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
35 ret <8 x float> %shuffle
38 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
39 ; AVX1-LABEL: shuffle_v8f32_00000200:
41 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
42 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
43 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
46 ; AVX2OR512VL-LABEL: shuffle_v8f32_00000200:
47 ; AVX2OR512VL: # BB#0:
48 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2]
49 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
50 ; AVX2OR512VL-NEXT: retq
51 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
52 ret <8 x float> %shuffle
55 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
56 ; AVX1-LABEL: shuffle_v8f32_00003000:
58 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
59 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
60 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
63 ; AVX2OR512VL-LABEL: shuffle_v8f32_00003000:
64 ; AVX2OR512VL: # BB#0:
65 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0]
66 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
67 ; AVX2OR512VL-NEXT: retq
68 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
69 ret <8 x float> %shuffle
72 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
73 ; AVX1-LABEL: shuffle_v8f32_00040000:
75 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
76 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
77 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
78 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
81 ; AVX2OR512VL-LABEL: shuffle_v8f32_00040000:
82 ; AVX2OR512VL: # BB#0:
83 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
84 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
85 ; AVX2OR512VL-NEXT: retq
86 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
87 ret <8 x float> %shuffle
90 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
91 ; AVX1-LABEL: shuffle_v8f32_00500000:
93 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
94 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
95 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
98 ; AVX2OR512VL-LABEL: shuffle_v8f32_00500000:
99 ; AVX2OR512VL: # BB#0:
100 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
101 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
102 ; AVX2OR512VL-NEXT: retq
103 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
104 ret <8 x float> %shuffle
107 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
108 ; AVX1-LABEL: shuffle_v8f32_06000000:
110 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
111 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
112 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
115 ; AVX2OR512VL-LABEL: shuffle_v8f32_06000000:
116 ; AVX2OR512VL: # BB#0:
117 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
118 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
119 ; AVX2OR512VL-NEXT: retq
120 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
121 ret <8 x float> %shuffle
124 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
125 ; AVX1-LABEL: shuffle_v8f32_70000000:
127 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
128 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
129 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
132 ; AVX2OR512VL-LABEL: shuffle_v8f32_70000000:
133 ; AVX2OR512VL: # BB#0:
134 ; AVX2OR512VL-NEXT: movl $7, %eax
135 ; AVX2OR512VL-NEXT: vmovd %eax, %xmm1
136 ; AVX2OR512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
137 ; AVX2OR512VL-NEXT: retq
138 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
139 ret <8 x float> %shuffle
142 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
143 ; ALL-LABEL: shuffle_v8f32_01014545:
145 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
147 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
148 ret <8 x float> %shuffle
151 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
152 ; AVX1-LABEL: shuffle_v8f32_00112233:
154 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
155 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
156 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
159 ; AVX2OR512VL-LABEL: shuffle_v8f32_00112233:
160 ; AVX2OR512VL: # BB#0:
161 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
162 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
163 ; AVX2OR512VL-NEXT: retq
164 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
165 ret <8 x float> %shuffle
168 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
169 ; AVX1-LABEL: shuffle_v8f32_00001111:
171 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
172 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
173 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
176 ; AVX2OR512VL-LABEL: shuffle_v8f32_00001111:
177 ; AVX2OR512VL: # BB#0:
178 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
179 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
180 ; AVX2OR512VL-NEXT: retq
181 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
182 ret <8 x float> %shuffle
185 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
186 ; ALL-LABEL: shuffle_v8f32_81a3c5e7:
188 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
190 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
191 ret <8 x float> %shuffle
194 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
195 ; AVX1-LABEL: shuffle_v8f32_08080808:
197 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
198 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
199 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
202 ; AVX2OR512VL-LABEL: shuffle_v8f32_08080808:
203 ; AVX2OR512VL: # BB#0:
204 ; AVX2OR512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
205 ; AVX2OR512VL-NEXT: vbroadcastsd %xmm0, %ymm0
206 ; AVX2OR512VL-NEXT: retq
207 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
208 ret <8 x float> %shuffle
211 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
212 ; ALL-LABEL: shuffle_v8f32_08084c4c:
214 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
215 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
217 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
218 ret <8 x float> %shuffle
221 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
222 ; ALL-LABEL: shuffle_v8f32_8823cc67:
224 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
226 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
227 ret <8 x float> %shuffle
230 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
231 ; ALL-LABEL: shuffle_v8f32_9832dc76:
233 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
235 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
236 ret <8 x float> %shuffle
239 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
240 ; ALL-LABEL: shuffle_v8f32_9810dc54:
242 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
244 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
245 ret <8 x float> %shuffle
248 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
249 ; ALL-LABEL: shuffle_v8f32_08194c5d:
251 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
253 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
254 ret <8 x float> %shuffle
257 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
258 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
260 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
262 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
263 ret <8 x float> %shuffle
266 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
267 ; AVX1OR2-LABEL: shuffle_v8f32_08192a3b:
269 ; AVX1OR2-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
270 ; AVX1OR2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
271 ; AVX1OR2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
274 ; AVX512VL-LABEL: shuffle_v8f32_08192a3b:
276 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [0,8,1,9,2,10,3,11]
277 ; AVX512VL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
278 ; AVX512VL-NEXT: retq
279 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
280 ret <8 x float> %shuffle
283 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
284 ; AVX1-LABEL: shuffle_v8f32_08991abb:
286 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
287 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
288 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
289 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
290 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
293 ; AVX2-LABEL: shuffle_v8f32_08991abb:
295 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
296 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
297 ; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
298 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
299 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
302 ; AVX512VL-LABEL: shuffle_v8f32_08991abb:
304 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
305 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3]
306 ; AVX512VL-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
307 ; AVX512VL-NEXT: retq
308 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
309 ret <8 x float> %shuffle
312 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
313 ; AVX1-LABEL: shuffle_v8f32_091b2d3f:
315 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
316 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
317 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
318 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
321 ; AVX2-LABEL: shuffle_v8f32_091b2d3f:
323 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
324 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
325 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
328 ; AVX512VL-LABEL: shuffle_v8f32_091b2d3f:
330 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [0,9,1,11,2,13,3,15]
331 ; AVX512VL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
332 ; AVX512VL-NEXT: retq
333 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
334 ret <8 x float> %shuffle
337 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
338 ; AVX1-LABEL: shuffle_v8f32_09ab1def:
340 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
341 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
342 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
345 ; AVX2OR512VL-LABEL: shuffle_v8f32_09ab1def:
346 ; AVX2OR512VL: # BB#0:
347 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
348 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
349 ; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
350 ; AVX2OR512VL-NEXT: retq
351 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
352 ret <8 x float> %shuffle
355 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
356 ; ALL-LABEL: shuffle_v8f32_00014445:
358 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
360 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
361 ret <8 x float> %shuffle
364 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
365 ; ALL-LABEL: shuffle_v8f32_00204464:
367 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
369 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
370 ret <8 x float> %shuffle
373 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
374 ; ALL-LABEL: shuffle_v8f32_03004744:
376 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
378 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
379 ret <8 x float> %shuffle
382 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
383 ; ALL-LABEL: shuffle_v8f32_10005444:
385 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
387 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
388 ret <8 x float> %shuffle
391 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
392 ; ALL-LABEL: shuffle_v8f32_22006644:
394 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
396 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
397 ret <8 x float> %shuffle
400 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
401 ; ALL-LABEL: shuffle_v8f32_33307774:
403 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
405 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
406 ret <8 x float> %shuffle
409 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
410 ; ALL-LABEL: shuffle_v8f32_32107654:
412 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
414 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
415 ret <8 x float> %shuffle
418 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
419 ; ALL-LABEL: shuffle_v8f32_00234467:
421 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
423 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
424 ret <8 x float> %shuffle
427 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
428 ; ALL-LABEL: shuffle_v8f32_00224466:
430 ; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
432 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
433 ret <8 x float> %shuffle
436 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
437 ; ALL-LABEL: shuffle_v8f32_10325476:
439 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
441 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
442 ret <8 x float> %shuffle
445 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
446 ; ALL-LABEL: shuffle_v8f32_11335577:
448 ; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
450 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
451 ret <8 x float> %shuffle
454 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
455 ; ALL-LABEL: shuffle_v8f32_10235467:
457 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
459 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
460 ret <8 x float> %shuffle
463 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
464 ; ALL-LABEL: shuffle_v8f32_10225466:
466 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
468 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
469 ret <8 x float> %shuffle
472 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
473 ; ALL-LABEL: shuffle_v8f32_00015444:
475 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
477 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
478 ret <8 x float> %shuffle
481 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
482 ; ALL-LABEL: shuffle_v8f32_00204644:
484 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
486 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
487 ret <8 x float> %shuffle
490 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
491 ; ALL-LABEL: shuffle_v8f32_03004474:
493 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
495 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
496 ret <8 x float> %shuffle
499 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
500 ; ALL-LABEL: shuffle_v8f32_10004444:
502 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
504 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
505 ret <8 x float> %shuffle
508 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
509 ; ALL-LABEL: shuffle_v8f32_22006446:
511 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
513 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
514 ret <8 x float> %shuffle
517 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
518 ; ALL-LABEL: shuffle_v8f32_33307474:
520 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
522 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
523 ret <8 x float> %shuffle
526 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
527 ; ALL-LABEL: shuffle_v8f32_32104567:
529 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
531 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
532 ret <8 x float> %shuffle
535 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
536 ; ALL-LABEL: shuffle_v8f32_00236744:
538 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
540 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
541 ret <8 x float> %shuffle
544 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
545 ; ALL-LABEL: shuffle_v8f32_00226644:
547 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
549 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
550 ret <8 x float> %shuffle
553 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
554 ; ALL-LABEL: shuffle_v8f32_10324567:
556 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
558 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
559 ret <8 x float> %shuffle
562 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
563 ; ALL-LABEL: shuffle_v8f32_11334567:
565 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
567 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
568 ret <8 x float> %shuffle
571 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
572 ; ALL-LABEL: shuffle_v8f32_01235467:
574 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
576 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
577 ret <8 x float> %shuffle
580 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
581 ; ALL-LABEL: shuffle_v8f32_01235466:
583 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
585 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
586 ret <8 x float> %shuffle
589 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
590 ; ALL-LABEL: shuffle_v8f32_002u6u44:
592 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
594 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
595 ret <8 x float> %shuffle
598 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
599 ; ALL-LABEL: shuffle_v8f32_00uu66uu:
601 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
603 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
604 ret <8 x float> %shuffle
607 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
608 ; ALL-LABEL: shuffle_v8f32_103245uu:
610 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
612 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
613 ret <8 x float> %shuffle
616 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
617 ; ALL-LABEL: shuffle_v8f32_1133uu67:
619 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
621 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
622 ret <8 x float> %shuffle
625 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
626 ; ALL-LABEL: shuffle_v8f32_0uu354uu:
628 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
630 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
631 ret <8 x float> %shuffle
634 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
635 ; ALL-LABEL: shuffle_v8f32_uuu3uu66:
637 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
639 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
640 ret <8 x float> %shuffle
643 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
644 ; AVX1-LABEL: shuffle_v8f32_c348cda0:
646 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
647 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
648 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
649 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
650 ; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
651 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
654 ; AVX2-LABEL: shuffle_v8f32_c348cda0:
656 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
657 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
658 ; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,2,0,4,7,6,4]
659 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,1]
660 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
663 ; AVX512VL-LABEL: shuffle_v8f32_c348cda0:
665 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [4,11,12,0,4,5,2,8]
666 ; AVX512VL-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2
667 ; AVX512VL-NEXT: vmovaps %ymm2, %ymm0
668 ; AVX512VL-NEXT: retq
669 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
670 ret <8 x float> %shuffle
673 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
674 ; AVX1-LABEL: shuffle_v8f32_f511235a:
676 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
677 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
678 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
679 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
680 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[3],ymm0[3]
681 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
684 ; AVX2-LABEL: shuffle_v8f32_f511235a:
686 ; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,2,2,3,7,6,6,7]
687 ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,2,0]
688 ; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,2,3,5,5,6,7]
689 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,1,2]
690 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
693 ; AVX512VL-LABEL: shuffle_v8f32_f511235a:
695 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [15,5,1,1,2,3,5,10]
696 ; AVX512VL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
697 ; AVX512VL-NEXT: retq
698 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
699 ret <8 x float> %shuffle
702 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
703 ; AVX1-LABEL: shuffle_v8f32_32103210:
705 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
706 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
709 ; AVX2OR512VL-LABEL: shuffle_v8f32_32103210:
710 ; AVX2OR512VL: # BB#0:
711 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
712 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
713 ; AVX2OR512VL-NEXT: retq
714 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
715 ret <8 x float> %shuffle
718 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
719 ; AVX1-LABEL: shuffle_v8f32_76547654:
721 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
722 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
725 ; AVX2OR512VL-LABEL: shuffle_v8f32_76547654:
726 ; AVX2OR512VL: # BB#0:
727 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
728 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
729 ; AVX2OR512VL-NEXT: retq
730 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
731 ret <8 x float> %shuffle
734 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
735 ; AVX1-LABEL: shuffle_v8f32_76543210:
737 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
738 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
741 ; AVX2OR512VL-LABEL: shuffle_v8f32_76543210:
742 ; AVX2OR512VL: # BB#0:
743 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
744 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
745 ; AVX2OR512VL-NEXT: retq
746 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
747 ret <8 x float> %shuffle
750 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
751 ; ALL-LABEL: shuffle_v8f32_3210ba98:
753 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
754 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
756 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
757 ret <8 x float> %shuffle
760 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
761 ; ALL-LABEL: shuffle_v8f32_3210fedc:
763 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
764 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
766 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
767 ret <8 x float> %shuffle
770 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
771 ; ALL-LABEL: shuffle_v8f32_7654fedc:
773 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
774 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
776 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
777 ret <8 x float> %shuffle
780 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
781 ; ALL-LABEL: shuffle_v8f32_fedc7654:
783 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
784 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
786 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
787 ret <8 x float> %shuffle
790 define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
791 ; AVX1-LABEL: PR21138:
793 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
794 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
795 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
796 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
797 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
798 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
801 ; AVX2OR512VL-LABEL: PR21138:
802 ; AVX2OR512VL: # BB#0:
803 ; AVX2OR512VL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
804 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
805 ; AVX2OR512VL-NEXT: retq
806 %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
807 ret <8 x float> %shuffle
810 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
811 ; ALL-LABEL: shuffle_v8f32_ba987654:
813 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
814 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
816 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
817 ret <8 x float> %shuffle
820 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
821 ; ALL-LABEL: shuffle_v8f32_ba983210:
823 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
824 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
826 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 3, i32 2, i32 1, i32 0>
827 ret <8 x float> %shuffle
830 define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) {
831 ; ALL-LABEL: shuffle_v8f32_80u1c4u5:
833 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
835 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5>
836 ret <8 x float> %shuffle
839 define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
840 ; ALL-LABEL: shuffle_v8f32_a2u3e6f7:
842 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
844 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
845 ret <8 x float> %shuffle
848 define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) {
849 ; ALL-LABEL: shuffle_v8f32_uuuu1111:
851 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
852 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
854 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
855 ret <8 x float> %shuffle
858 define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) {
859 ; AVX1-LABEL: shuffle_v8f32_44444444:
861 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
862 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
865 ; AVX2OR512VL-LABEL: shuffle_v8f32_44444444:
866 ; AVX2OR512VL: # BB#0:
867 ; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
868 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0
869 ; AVX2OR512VL-NEXT: retq
870 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
871 ret <8 x float> %shuffle
874 define <8 x float> @shuffle_v8f32_1188uuuu(<8 x float> %a, <8 x float> %b) {
875 ; ALL-LABEL: shuffle_v8f32_1188uuuu:
877 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
879 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
880 ret <8 x float> %shuffle
883 define <8 x float> @shuffle_v8f32_uuuu3210(<8 x float> %a, <8 x float> %b) {
884 ; ALL-LABEL: shuffle_v8f32_uuuu3210:
886 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
887 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
889 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 2, i32 1, i32 0>
890 ret <8 x float> %shuffle
893 define <8 x float> @shuffle_v8f32_uuuu1188(<8 x float> %a, <8 x float> %b) {
894 ; ALL-LABEL: shuffle_v8f32_uuuu1188:
896 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
897 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
899 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 8, i32 8>
900 ret <8 x float> %shuffle
903 define <8 x float> @shuffle_v8f32_1111uuuu(<8 x float> %a, <8 x float> %b) {
904 ; ALL-LABEL: shuffle_v8f32_1111uuuu:
906 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
908 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
909 ret <8 x float> %shuffle
912 define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {
913 ; ALL-LABEL: shuffle_v8f32_5555uuuu:
915 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0
916 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
918 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
919 ret <8 x float> %shuffle
922 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
923 ; AVX1-LABEL: shuffle_v8i32_00000000:
925 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
926 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
929 ; AVX2OR512VL-LABEL: shuffle_v8i32_00000000:
930 ; AVX2OR512VL: # BB#0:
931 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0
932 ; AVX2OR512VL-NEXT: retq
933 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
934 ret <8 x i32> %shuffle
937 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
938 ; AVX1-LABEL: shuffle_v8i32_00000010:
940 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
941 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
942 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
945 ; AVX2OR512VL-LABEL: shuffle_v8i32_00000010:
946 ; AVX2OR512VL: # BB#0:
947 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
948 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
949 ; AVX2OR512VL-NEXT: retq
950 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
951 ret <8 x i32> %shuffle
954 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
955 ; AVX1-LABEL: shuffle_v8i32_00000200:
957 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
958 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
959 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
962 ; AVX2OR512VL-LABEL: shuffle_v8i32_00000200:
963 ; AVX2OR512VL: # BB#0:
964 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2]
965 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
966 ; AVX2OR512VL-NEXT: retq
967 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
968 ret <8 x i32> %shuffle
971 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
972 ; AVX1-LABEL: shuffle_v8i32_00003000:
974 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
975 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
976 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
979 ; AVX2OR512VL-LABEL: shuffle_v8i32_00003000:
980 ; AVX2OR512VL: # BB#0:
981 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0]
982 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
983 ; AVX2OR512VL-NEXT: retq
984 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
985 ret <8 x i32> %shuffle
988 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
989 ; AVX1-LABEL: shuffle_v8i32_00040000:
991 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
992 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
993 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
994 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
997 ; AVX2OR512VL-LABEL: shuffle_v8i32_00040000:
998 ; AVX2OR512VL: # BB#0:
999 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
1000 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1001 ; AVX2OR512VL-NEXT: retq
1002 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
1003 ret <8 x i32> %shuffle
1006 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
1007 ; AVX1-LABEL: shuffle_v8i32_00500000:
1009 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
1010 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
1011 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
1014 ; AVX2OR512VL-LABEL: shuffle_v8i32_00500000:
1015 ; AVX2OR512VL: # BB#0:
1016 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
1017 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1018 ; AVX2OR512VL-NEXT: retq
1019 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
1020 ret <8 x i32> %shuffle
1023 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
1024 ; AVX1-LABEL: shuffle_v8i32_06000000:
1026 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
1027 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
1028 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
1031 ; AVX2OR512VL-LABEL: shuffle_v8i32_06000000:
1032 ; AVX2OR512VL: # BB#0:
1033 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
1034 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1035 ; AVX2OR512VL-NEXT: retq
1036 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1037 ret <8 x i32> %shuffle
1040 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
1041 ; AVX1-LABEL: shuffle_v8i32_70000000:
1043 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
1044 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
1045 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
1048 ; AVX2OR512VL-LABEL: shuffle_v8i32_70000000:
1049 ; AVX2OR512VL: # BB#0:
1050 ; AVX2OR512VL-NEXT: movl $7, %eax
1051 ; AVX2OR512VL-NEXT: vmovd %eax, %xmm1
1052 ; AVX2OR512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
1053 ; AVX2OR512VL-NEXT: retq
1054 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1055 ret <8 x i32> %shuffle
1058 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
1059 ; AVX1-LABEL: shuffle_v8i32_01014545:
1061 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
1064 ; AVX2OR512VL-LABEL: shuffle_v8i32_01014545:
1065 ; AVX2OR512VL: # BB#0:
1066 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1067 ; AVX2OR512VL-NEXT: retq
1068 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
1069 ret <8 x i32> %shuffle
1072 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
1073 ; AVX1-LABEL: shuffle_v8i32_00112233:
1075 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
1076 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
1077 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1080 ; AVX2OR512VL-LABEL: shuffle_v8i32_00112233:
1081 ; AVX2OR512VL: # BB#0:
1082 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
1083 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1084 ; AVX2OR512VL-NEXT: retq
1085 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
1086 ret <8 x i32> %shuffle
1089 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
1090 ; AVX1-LABEL: shuffle_v8i32_00001111:
1092 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
1093 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1094 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1097 ; AVX2OR512VL-LABEL: shuffle_v8i32_00001111:
1098 ; AVX2OR512VL: # BB#0:
1099 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
1100 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
1101 ; AVX2OR512VL-NEXT: retq
1102 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
1103 ret <8 x i32> %shuffle
1106 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
1107 ; ALL-LABEL: shuffle_v8i32_81a3c5e7:
1109 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1111 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
1112 ret <8 x i32> %shuffle
1115 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
1116 ; AVX1-LABEL: shuffle_v8i32_08080808:
1118 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
1119 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
1120 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1123 ; AVX2OR512VL-LABEL: shuffle_v8i32_08080808:
1124 ; AVX2OR512VL: # BB#0:
1125 ; AVX2OR512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1126 ; AVX2OR512VL-NEXT: vbroadcastsd %xmm0, %ymm0
1127 ; AVX2OR512VL-NEXT: retq
1128 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
1129 ret <8 x i32> %shuffle
1132 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
1133 ; AVX1-LABEL: shuffle_v8i32_08084c4c:
1135 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
1136 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1139 ; AVX2OR512VL-LABEL: shuffle_v8i32_08084c4c:
1140 ; AVX2OR512VL: # BB#0:
1141 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
1142 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1143 ; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1144 ; AVX2OR512VL-NEXT: retq
1145 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
1146 ret <8 x i32> %shuffle
1149 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
1150 ; ALL-LABEL: shuffle_v8i32_8823cc67:
1152 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
1154 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
1155 ret <8 x i32> %shuffle
1158 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
1159 ; ALL-LABEL: shuffle_v8i32_9832dc76:
1161 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
1163 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
1164 ret <8 x i32> %shuffle
1167 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
1168 ; ALL-LABEL: shuffle_v8i32_9810dc54:
1170 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
1172 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
1173 ret <8 x i32> %shuffle
1176 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
1177 ; ALL-LABEL: shuffle_v8i32_08194c5d:
1179 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1181 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1182 ret <8 x i32> %shuffle
1185 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
1186 ; ALL-LABEL: shuffle_v8i32_2a3b6e7f:
1188 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1190 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1191 ret <8 x i32> %shuffle
1194 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
1195 ; AVX1OR2-LABEL: shuffle_v8i32_08192a3b:
1197 ; AVX1OR2-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1198 ; AVX1OR2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1199 ; AVX1OR2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1200 ; AVX1OR2-NEXT: retq
1202 ; AVX512VL-LABEL: shuffle_v8i32_08192a3b:
1204 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1205 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11]
1206 ; AVX512VL-NEXT: vpermi2d %ymm1, %ymm2, %ymm0
1207 ; AVX512VL-NEXT: retq
1208 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1209 ret <8 x i32> %shuffle
1212 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
1213 ; AVX1-LABEL: shuffle_v8i32_08991abb:
1215 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
1216 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
1217 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1218 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
1219 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1222 ; AVX2-LABEL: shuffle_v8i32_08991abb:
1224 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
1225 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1226 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1227 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
1228 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1231 ; AVX512VL-LABEL: shuffle_v8i32_08991abb:
1233 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
1234 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3]
1235 ; AVX512VL-NEXT: vpermi2d %ymm2, %ymm1, %ymm0
1236 ; AVX512VL-NEXT: retq
1237 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
1238 ret <8 x i32> %shuffle
1241 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
1242 ; AVX1-LABEL: shuffle_v8i32_091b2d3f:
1244 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
1245 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
1246 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1247 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1250 ; AVX2OR512VL-LABEL: shuffle_v8i32_091b2d3f:
1251 ; AVX2OR512VL: # BB#0:
1252 ; AVX2OR512VL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1253 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1254 ; AVX2OR512VL-NEXT: retq
1255 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1256 ret <8 x i32> %shuffle
1259 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
1260 ; AVX1-LABEL: shuffle_v8i32_09ab1def:
1262 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[1,1,3,3]
1263 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1264 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1267 ; AVX2OR512VL-LABEL: shuffle_v8i32_09ab1def:
1268 ; AVX2OR512VL: # BB#0:
1269 ; AVX2OR512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1270 ; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
1271 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1272 ; AVX2OR512VL-NEXT: retq
1273 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1274 ret <8 x i32> %shuffle
1277 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
1278 ; ALL-LABEL: shuffle_v8i32_00014445:
1280 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1282 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
1283 ret <8 x i32> %shuffle
1286 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
1287 ; ALL-LABEL: shuffle_v8i32_00204464:
1289 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1291 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
1292 ret <8 x i32> %shuffle
1295 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
1296 ; ALL-LABEL: shuffle_v8i32_03004744:
1298 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1300 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
1301 ret <8 x i32> %shuffle
1304 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
1305 ; ALL-LABEL: shuffle_v8i32_10005444:
1307 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1309 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
1310 ret <8 x i32> %shuffle
1313 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
1314 ; ALL-LABEL: shuffle_v8i32_22006644:
1316 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1318 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
1319 ret <8 x i32> %shuffle
1322 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
1323 ; ALL-LABEL: shuffle_v8i32_33307774:
1325 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1327 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
1328 ret <8 x i32> %shuffle
1331 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
1332 ; ALL-LABEL: shuffle_v8i32_32107654:
1334 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1336 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1337 ret <8 x i32> %shuffle
1340 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
1341 ; ALL-LABEL: shuffle_v8i32_00234467:
1343 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1345 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
1346 ret <8 x i32> %shuffle
1349 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
1350 ; AVX1-LABEL: shuffle_v8i32_00224466:
1352 ; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1355 ; AVX2OR512VL-LABEL: shuffle_v8i32_00224466:
1356 ; AVX2OR512VL: # BB#0:
1357 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1358 ; AVX2OR512VL-NEXT: retq
1359 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1360 ret <8 x i32> %shuffle
1363 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
1364 ; ALL-LABEL: shuffle_v8i32_10325476:
1366 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1368 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1369 ret <8 x i32> %shuffle
1372 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
1373 ; AVX1-LABEL: shuffle_v8i32_11335577:
1375 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1378 ; AVX2OR512VL-LABEL: shuffle_v8i32_11335577:
1379 ; AVX2OR512VL: # BB#0:
1380 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1381 ; AVX2OR512VL-NEXT: retq
1382 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1383 ret <8 x i32> %shuffle
1386 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
1387 ; ALL-LABEL: shuffle_v8i32_10235467:
1389 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1391 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1392 ret <8 x i32> %shuffle
1395 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
1396 ; ALL-LABEL: shuffle_v8i32_10225466:
1398 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1400 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
1401 ret <8 x i32> %shuffle
1404 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
1405 ; AVX1-LABEL: shuffle_v8i32_00015444:
1407 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
1410 ; AVX2OR512VL-LABEL: shuffle_v8i32_00015444:
1411 ; AVX2OR512VL: # BB#0:
1412 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
1413 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1414 ; AVX2OR512VL-NEXT: retq
1415 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
1416 ret <8 x i32> %shuffle
1419 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
1420 ; AVX1-LABEL: shuffle_v8i32_00204644:
1422 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
1425 ; AVX2OR512VL-LABEL: shuffle_v8i32_00204644:
1426 ; AVX2OR512VL: # BB#0:
1427 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
1428 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1429 ; AVX2OR512VL-NEXT: retq
1430 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
1431 ret <8 x i32> %shuffle
1434 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
1435 ; AVX1-LABEL: shuffle_v8i32_03004474:
1437 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
1440 ; AVX2OR512VL-LABEL: shuffle_v8i32_03004474:
1441 ; AVX2OR512VL: # BB#0:
1442 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
1443 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1444 ; AVX2OR512VL-NEXT: retq
1445 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
1446 ret <8 x i32> %shuffle
1449 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
1450 ; AVX1-LABEL: shuffle_v8i32_10004444:
1452 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
1455 ; AVX2OR512VL-LABEL: shuffle_v8i32_10004444:
1456 ; AVX2OR512VL: # BB#0:
1457 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
1458 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1459 ; AVX2OR512VL-NEXT: retq
1460 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1461 ret <8 x i32> %shuffle
1464 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
1465 ; AVX1-LABEL: shuffle_v8i32_22006446:
1467 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
1470 ; AVX2OR512VL-LABEL: shuffle_v8i32_22006446:
1471 ; AVX2OR512VL: # BB#0:
1472 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
1473 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1474 ; AVX2OR512VL-NEXT: retq
1475 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
1476 ret <8 x i32> %shuffle
1479 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
1480 ; AVX1-LABEL: shuffle_v8i32_33307474:
1482 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
1485 ; AVX2OR512VL-LABEL: shuffle_v8i32_33307474:
1486 ; AVX2OR512VL: # BB#0:
1487 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
1488 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1489 ; AVX2OR512VL-NEXT: retq
1490 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
1491 ret <8 x i32> %shuffle
1494 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
1495 ; AVX1-LABEL: shuffle_v8i32_32104567:
1497 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
1500 ; AVX2OR512VL-LABEL: shuffle_v8i32_32104567:
1501 ; AVX2OR512VL: # BB#0:
1502 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
1503 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1504 ; AVX2OR512VL-NEXT: retq
1505 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
1506 ret <8 x i32> %shuffle
1509 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
1510 ; AVX1-LABEL: shuffle_v8i32_00236744:
1512 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
1515 ; AVX2OR512VL-LABEL: shuffle_v8i32_00236744:
1516 ; AVX2OR512VL: # BB#0:
1517 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
1518 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1519 ; AVX2OR512VL-NEXT: retq
1520 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
1521 ret <8 x i32> %shuffle
1524 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
1525 ; AVX1-LABEL: shuffle_v8i32_00226644:
1527 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
1530 ; AVX2OR512VL-LABEL: shuffle_v8i32_00226644:
1531 ; AVX2OR512VL: # BB#0:
1532 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
1533 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1534 ; AVX2OR512VL-NEXT: retq
1535 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
1536 ret <8 x i32> %shuffle
1539 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
1540 ; AVX1-LABEL: shuffle_v8i32_10324567:
1542 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
1545 ; AVX2OR512VL-LABEL: shuffle_v8i32_10324567:
1546 ; AVX2OR512VL: # BB#0:
1547 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
1548 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1549 ; AVX2OR512VL-NEXT: retq
1550 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1551 ret <8 x i32> %shuffle
1554 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
1555 ; AVX1-LABEL: shuffle_v8i32_11334567:
1557 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
1560 ; AVX2OR512VL-LABEL: shuffle_v8i32_11334567:
1561 ; AVX2OR512VL: # BB#0:
1562 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
1563 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1564 ; AVX2OR512VL-NEXT: retq
1565 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
1566 ret <8 x i32> %shuffle
1569 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
1570 ; AVX1-LABEL: shuffle_v8i32_01235467:
1572 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
1575 ; AVX2OR512VL-LABEL: shuffle_v8i32_01235467:
1576 ; AVX2OR512VL: # BB#0:
1577 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
1578 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1579 ; AVX2OR512VL-NEXT: retq
1580 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1581 ret <8 x i32> %shuffle
1584 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
1585 ; AVX1-LABEL: shuffle_v8i32_01235466:
1587 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
1590 ; AVX2OR512VL-LABEL: shuffle_v8i32_01235466:
1591 ; AVX2OR512VL: # BB#0:
1592 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
1593 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1594 ; AVX2OR512VL-NEXT: retq
1595 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
1596 ret <8 x i32> %shuffle
1599 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
1600 ; AVX1-LABEL: shuffle_v8i32_002u6u44:
1602 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
1605 ; AVX2OR512VL-LABEL: shuffle_v8i32_002u6u44:
1606 ; AVX2OR512VL: # BB#0:
1607 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
1608 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1609 ; AVX2OR512VL-NEXT: retq
1610 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
1611 ret <8 x i32> %shuffle
1614 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
1615 ; AVX1-LABEL: shuffle_v8i32_00uu66uu:
1617 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
1620 ; AVX2OR512VL-LABEL: shuffle_v8i32_00uu66uu:
1621 ; AVX2OR512VL: # BB#0:
1622 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
1623 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1624 ; AVX2OR512VL-NEXT: retq
1625 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
1626 ret <8 x i32> %shuffle
1629 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
1630 ; AVX1-LABEL: shuffle_v8i32_103245uu:
1632 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
1635 ; AVX2OR512VL-LABEL: shuffle_v8i32_103245uu:
1636 ; AVX2OR512VL: # BB#0:
1637 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
1638 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1639 ; AVX2OR512VL-NEXT: retq
1640 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
1641 ret <8 x i32> %shuffle
1644 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
1645 ; AVX1-LABEL: shuffle_v8i32_1133uu67:
1647 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
1650 ; AVX2OR512VL-LABEL: shuffle_v8i32_1133uu67:
1651 ; AVX2OR512VL: # BB#0:
1652 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
1653 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1654 ; AVX2OR512VL-NEXT: retq
1655 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
1656 ret <8 x i32> %shuffle
1659 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
1660 ; AVX1-LABEL: shuffle_v8i32_0uu354uu:
1662 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
1665 ; AVX2OR512VL-LABEL: shuffle_v8i32_0uu354uu:
1666 ; AVX2OR512VL: # BB#0:
1667 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
1668 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1669 ; AVX2OR512VL-NEXT: retq
1670 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
1671 ret <8 x i32> %shuffle
1674 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
1675 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
1677 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
1680 ; AVX2OR512VL-LABEL: shuffle_v8i32_uuu3uu66:
1681 ; AVX2OR512VL: # BB#0:
1682 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
1683 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1684 ; AVX2OR512VL-NEXT: retq
1685 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
1686 ret <8 x i32> %shuffle
1689 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
1690 ; AVX1-LABEL: shuffle_v8i32_6caa87e5:
1692 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1693 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1694 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
1695 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
1696 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1699 ; AVX2-LABEL: shuffle_v8i32_6caa87e5:
1701 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,1,3,2]
1702 ; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,2,4,4,6,6]
1703 ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,0,3]
1704 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1707 ; AVX512VL-LABEL: shuffle_v8i32_6caa87e5:
1709 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [14,4,2,2,0,15,6,13]
1710 ; AVX512VL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2
1711 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
1712 ; AVX512VL-NEXT: retq
1713 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
1714 ret <8 x i32> %shuffle
1717 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
1718 ; AVX1-LABEL: shuffle_v8i32_32103210:
1720 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1721 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1724 ; AVX2OR512VL-LABEL: shuffle_v8i32_32103210:
1725 ; AVX2OR512VL: # BB#0:
1726 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1727 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
1728 ; AVX2OR512VL-NEXT: retq
1729 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
1730 ret <8 x i32> %shuffle
1733 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
1734 ; AVX1-LABEL: shuffle_v8i32_76547654:
1736 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1737 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1740 ; AVX2OR512VL-LABEL: shuffle_v8i32_76547654:
1741 ; AVX2OR512VL: # BB#0:
1742 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1743 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
1744 ; AVX2OR512VL-NEXT: retq
1745 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
1746 ret <8 x i32> %shuffle
1749 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
1750 ; AVX1-LABEL: shuffle_v8i32_76543210:
1752 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1753 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1756 ; AVX2OR512VL-LABEL: shuffle_v8i32_76543210:
1757 ; AVX2OR512VL: # BB#0:
1758 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1759 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
1760 ; AVX2OR512VL-NEXT: retq
1761 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1762 ret <8 x i32> %shuffle
1765 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
1766 ; ALL-LABEL: shuffle_v8i32_3210ba98:
1768 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1769 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1771 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
1772 ret <8 x i32> %shuffle
1775 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
1776 ; AVX1-LABEL: shuffle_v8i32_3210fedc:
1778 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1779 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1782 ; AVX2OR512VL-LABEL: shuffle_v8i32_3210fedc:
1783 ; AVX2OR512VL: # BB#0:
1784 ; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1785 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1786 ; AVX2OR512VL-NEXT: retq
1787 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
1788 ret <8 x i32> %shuffle
1791 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
1792 ; AVX1OR2-LABEL: shuffle_v8i32_7654fedc:
1794 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1795 ; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1796 ; AVX1OR2-NEXT: retq
1798 ; AVX512VL-LABEL: shuffle_v8i32_7654fedc:
1800 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1801 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1802 ; AVX512VL-NEXT: retq
1803 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
1804 ret <8 x i32> %shuffle
1807 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
1808 ; AVX1OR2-LABEL: shuffle_v8i32_fedc7654:
1810 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1811 ; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1812 ; AVX1OR2-NEXT: retq
1814 ; AVX512VL-LABEL: shuffle_v8i32_fedc7654:
1816 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1817 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1818 ; AVX512VL-NEXT: retq
1819 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
1820 ret <8 x i32> %shuffle
1823 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
1824 ; AVX1-LABEL: shuffle_v8i32_ba987654:
1826 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1827 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1830 ; AVX2OR512VL-LABEL: shuffle_v8i32_ba987654:
1831 ; AVX2OR512VL: # BB#0:
1832 ; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1833 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1834 ; AVX2OR512VL-NEXT: retq
1835 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1836 ret <8 x i32> %shuffle
1839 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
1840 ; AVX1-LABEL: shuffle_v8i32_ba983210:
1842 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1843 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1846 ; AVX2OR512VL-LABEL: shuffle_v8i32_ba983210:
1847 ; AVX2OR512VL: # BB#0:
1848 ; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1849 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1850 ; AVX2OR512VL-NEXT: retq
1851 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1852 ret <8 x i32> %shuffle
1855 define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
1856 ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
1858 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1859 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
1862 ; AVX2OR512VL-LABEL: shuffle_v8i32_zuu8zuuc:
1863 ; AVX2OR512VL: # BB#0:
1864 ; AVX2OR512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
1865 ; AVX2OR512VL-NEXT: retq
1866 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
1867 ret <8 x i32> %shuffle
1870 define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
1871 ; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
1873 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1874 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
1875 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
1878 ; AVX2OR512VL-LABEL: shuffle_v8i32_9ubzdefz:
1879 ; AVX2OR512VL: # BB#0:
1880 ; AVX2OR512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
1881 ; AVX2OR512VL-NEXT: retq
1882 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
1883 ret <8 x i32> %shuffle
1886 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
1887 ; ALL-LABEL: shuffle_v8i32_80u1b4uu:
1889 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1891 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
1892 ret <8 x i32> %shuffle
1895 define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) {
1896 ; ALL-LABEL: shuffle_v8i32_uuuu1111:
1898 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1899 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1901 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
1902 ret <8 x i32> %shuffle
1905 define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) {
1906 ; ALL-LABEL: shuffle_v8i32_2222uuuu:
1908 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,2,2]
1910 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
1911 ret <8 x i32> %shuffle
1914 define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) {
1915 ; ALL-LABEL: shuffle_v8i32_2A3Buuuu:
1917 ; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1919 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
1920 ret <8 x i32> %shuffle
1923 define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {
1924 ; AVX1-LABEL: shuffle_v8i32_44444444:
1926 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
1927 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1930 ; AVX2OR512VL-LABEL: shuffle_v8i32_44444444:
1931 ; AVX2OR512VL: # BB#0:
1932 ; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
1933 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0
1934 ; AVX2OR512VL-NEXT: retq
1935 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
1936 ret <8 x i32> %shuffle
1939 define <8 x i32> @shuffle_v8i32_44444444_bc(<8 x float> %a, <8 x float> %b) {
1940 ; AVX1-LABEL: shuffle_v8i32_44444444_bc:
1942 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
1943 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1946 ; AVX2OR512VL-LABEL: shuffle_v8i32_44444444_bc:
1947 ; AVX2OR512VL: # BB#0:
1948 ; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
1949 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0
1950 ; AVX2OR512VL-NEXT: retq
1951 %tmp0 = bitcast <8 x float> %a to <8 x i32>
1952 %tmp1 = bitcast <8 x float> %b to <8 x i32>
1953 %shuffle = shufflevector <8 x i32> %tmp0, <8 x i32> %tmp1, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
1954 ret <8 x i32> %shuffle
1957 define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) {
1958 ; ALL-LABEL: shuffle_v8i32_5555uuuu:
1960 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0
1961 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1963 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
1964 ret <8 x i32> %shuffle
1968 define <8 x i32> @shuffle_v8i32_uuuuuu7u(<8 x i32> %a, <8 x i32> %b) nounwind {
1969 ; AVX1-LABEL: shuffle_v8i32_uuuuuu7u:
1971 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1974 ; AVX2OR512VL-LABEL: shuffle_v8i32_uuuuuu7u:
1975 ; AVX2OR512VL: # BB#0:
1976 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,3,3,4,5,7,7]
1977 ; AVX2OR512VL-NEXT: retq
1978 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef>
1979 ret <8 x i32> %shuffle
1982 define <8 x float> @splat_mem_v8f32_2(float* %p) {
1983 ; ALL-LABEL: splat_mem_v8f32_2:
1985 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0
1987 %1 = load float, float* %p
1988 %2 = insertelement <4 x float> undef, float %1, i32 0
1989 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
1993 define <8 x float> @splat_v8f32(<4 x float> %r) {
1994 ; AVX1-LABEL: splat_v8f32:
1996 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1997 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2000 ; AVX2OR512VL-LABEL: splat_v8f32:
2001 ; AVX2OR512VL: # BB#0:
2002 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0
2003 ; AVX2OR512VL-NEXT: retq
2004 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
2009 ; Shuffle to logical bit shifts
2012 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
2013 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
2015 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
2016 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
2017 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
2020 ; AVX2OR512VL-LABEL: shuffle_v8i32_z0U2zUz6:
2021 ; AVX2OR512VL: # BB#0:
2022 ; AVX2OR512VL-NEXT: vpsllq $32, %ymm0, %ymm0
2023 ; AVX2OR512VL-NEXT: retq
2024 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
2025 ret <8 x i32> %shuffle
2028 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
2029 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
2031 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
2032 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
2033 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
2036 ; AVX2OR512VL-LABEL: shuffle_v8i32_1U3z5zUU:
2037 ; AVX2OR512VL: # BB#0:
2038 ; AVX2OR512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
2039 ; AVX2OR512VL-NEXT: retq
2040 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
2041 ret <8 x i32> %shuffle
2044 define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
2045 ; AVX1-LABEL: shuffle_v8i32_B012F456:
2047 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
2048 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
2051 ; AVX2OR512VL-LABEL: shuffle_v8i32_B012F456:
2052 ; AVX2OR512VL: # BB#0:
2053 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
2054 ; AVX2OR512VL-NEXT: retq
2055 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
2056 ret <8 x i32> %shuffle
2059 define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
2060 ; AVX1-LABEL: shuffle_v8i32_1238567C:
2062 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
2063 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
2066 ; AVX2OR512VL-LABEL: shuffle_v8i32_1238567C:
2067 ; AVX2OR512VL: # BB#0:
2068 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
2069 ; AVX2OR512VL-NEXT: retq
2070 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
2071 ret <8 x i32> %shuffle
2074 define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
2075 ; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
2077 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
2078 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
2081 ; AVX2OR512VL-LABEL: shuffle_v8i32_9AB0DEF4:
2082 ; AVX2OR512VL: # BB#0:
2083 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
2084 ; AVX2OR512VL-NEXT: retq
2085 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
2086 ret <8 x i32> %shuffle
2089 define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
2090 ; AVX1-LABEL: shuffle_v8i32_389A7CDE:
2092 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
2093 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
2096 ; AVX2OR512VL-LABEL: shuffle_v8i32_389A7CDE:
2097 ; AVX2OR512VL: # BB#0:
2098 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
2099 ; AVX2OR512VL-NEXT: retq
2100 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
2101 ret <8 x i32> %shuffle
2104 define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
2105 ; ALL-LABEL: shuffle_v8i32_30127456:
2107 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2109 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
2110 ret <8 x i32> %shuffle
2113 define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
2114 ; ALL-LABEL: shuffle_v8i32_12305674:
2116 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2118 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
2119 ret <8 x i32> %shuffle
2122 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2123 ; ALL-LABEL: concat_v2f32_1:
2124 ; ALL: # BB#0: # %entry
2125 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2126 ; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2129 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2130 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2131 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2132 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2133 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
2134 ret <8 x float> %tmp76
2137 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2138 ; ALL-LABEL: concat_v2f32_2:
2139 ; ALL: # BB#0: # %entry
2140 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2141 ; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2144 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2145 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2146 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2147 ret <8 x float> %tmp76
2150 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2151 ; ALL-LABEL: concat_v2f32_3:
2152 ; ALL: # BB#0: # %entry
2153 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2154 ; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2157 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2158 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2159 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2160 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2161 ret <8 x float> %res
2164 define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
2165 ; ALL-LABEL: insert_mem_and_zero_v8i32:
2167 ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2169 %a = load i32, i32* %ptr
2170 %v = insertelement <8 x i32> undef, i32 %a, i32 0
2171 %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2172 ret <8 x i32> %shuffle
2175 define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) {
2176 ; AVX1-LABEL: concat_v8i32_0123CDEF:
2178 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
2181 ; AVX2OR512VL-LABEL: concat_v8i32_0123CDEF:
2182 ; AVX2OR512VL: # BB#0:
2183 ; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2184 ; AVX2OR512VL-NEXT: retq
2185 %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2186 %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2187 %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2191 define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) {
2192 ; AVX1OR2-LABEL: concat_v8i32_4567CDEF_bc:
2194 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2195 ; AVX1OR2-NEXT: retq
2197 ; AVX512VL-LABEL: concat_v8i32_4567CDEF_bc:
2199 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2200 ; AVX512VL-NEXT: retq
2201 %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2202 %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
2203 %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64>
2204 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
2205 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2206 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32>
2207 ret <8 x i32> %shuffle32
2210 define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) {
2211 ; ALL-LABEL: concat_v8f32_4567CDEF_bc:
2213 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2215 %a0 = bitcast <8 x float> %f0 to <4 x i64>
2216 %a1 = bitcast <8 x float> %f1 to <8 x i32>
2217 %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
2218 %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2219 %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64>
2220 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
2221 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2222 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float>
2223 ret <8 x float> %shuffle32
2226 define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) {
2227 ; ALL-LABEL: insert_dup_mem_v8i32:
2229 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0
2231 %tmp = load i32, i32* %ptr, align 4
2232 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2233 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer
2237 define <8 x i32> @shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b) {
2238 ; AVX1-LABEL: shuffle_v8i32_12345678:
2240 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
2241 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
2242 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
2243 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
2246 ; AVX2-LABEL: shuffle_v8i32_12345678:
2248 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
2249 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,3,4,5,6,7,0]
2250 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
2253 ; AVX512VL-LABEL: shuffle_v8i32_12345678:
2255 ; AVX512VL-NEXT: valignd {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7],ymm1[0]
2256 ; AVX512VL-NEXT: retq
2257 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
2258 ret <8 x i32> %shuffle
2261 define <8 x i32> @shuffle_v8i32_12345670(<8 x i32> %a) {
2262 ; AVX1-LABEL: shuffle_v8i32_12345670:
2264 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
2265 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
2266 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
2269 ; AVX2-LABEL: shuffle_v8i32_12345670:
2271 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,3,4,5,6,7,0]
2272 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
2275 ; AVX512VL-LABEL: shuffle_v8i32_12345670:
2277 ; AVX512VL-NEXT: valignd {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,0]
2278 ; AVX512VL-NEXT: retq
2279 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
2280 ret <8 x i32> %shuffle