1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST-ALL
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST-PERLANE
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST --check-prefix=AVX512VL-FAST-ALL
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST --check-prefix=AVX512VL-FAST-PERLANE
10 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
11 ; AVX1-LABEL: shuffle_v8f32_00000000:
13 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
14 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
17 ; AVX2OR512VL-LABEL: shuffle_v8f32_00000000:
18 ; AVX2OR512VL: # %bb.0:
19 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0
20 ; AVX2OR512VL-NEXT: retq
21 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22 ret <8 x float> %shuffle
25 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
26 ; AVX1-LABEL: shuffle_v8f32_00000010:
28 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,0,0,0]
29 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,0]
30 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
33 ; AVX2-SLOW-LABEL: shuffle_v8f32_00000010:
35 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,0]
36 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
37 ; AVX2-SLOW-NEXT: retq
39 ; AVX2-FAST-ALL-LABEL: shuffle_v8f32_00000010:
40 ; AVX2-FAST-ALL: # %bb.0:
41 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
42 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
43 ; AVX2-FAST-ALL-NEXT: retq
45 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8f32_00000010:
46 ; AVX2-FAST-PERLANE: # %bb.0:
47 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,0]
48 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
49 ; AVX2-FAST-PERLANE-NEXT: retq
51 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_00000010:
52 ; AVX512VL-SLOW: # %bb.0:
53 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,0]
54 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
55 ; AVX512VL-SLOW-NEXT: retq
57 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_00000010:
58 ; AVX512VL-FAST-ALL: # %bb.0:
59 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
60 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
61 ; AVX512VL-FAST-ALL-NEXT: retq
63 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_00000010:
64 ; AVX512VL-FAST-PERLANE: # %bb.0:
65 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,0]
66 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
67 ; AVX512VL-FAST-PERLANE-NEXT: retq
68 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
69 ret <8 x float> %shuffle
72 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
73 ; AVX1-LABEL: shuffle_v8f32_00000200:
75 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,0,0,0]
76 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,0,0]
77 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
80 ; AVX2-SLOW-LABEL: shuffle_v8f32_00000200:
82 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,2]
83 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
84 ; AVX2-SLOW-NEXT: retq
86 ; AVX2-FAST-ALL-LABEL: shuffle_v8f32_00000200:
87 ; AVX2-FAST-ALL: # %bb.0:
88 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
89 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
90 ; AVX2-FAST-ALL-NEXT: retq
92 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8f32_00000200:
93 ; AVX2-FAST-PERLANE: # %bb.0:
94 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,2]
95 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
96 ; AVX2-FAST-PERLANE-NEXT: retq
98 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_00000200:
99 ; AVX512VL-SLOW: # %bb.0:
100 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,2]
101 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
102 ; AVX512VL-SLOW-NEXT: retq
104 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_00000200:
105 ; AVX512VL-FAST-ALL: # %bb.0:
106 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
107 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
108 ; AVX512VL-FAST-ALL-NEXT: retq
110 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_00000200:
111 ; AVX512VL-FAST-PERLANE: # %bb.0:
112 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,2]
113 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
114 ; AVX512VL-FAST-PERLANE-NEXT: retq
115 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
116 ret <8 x float> %shuffle
119 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
120 ; AVX1-LABEL: shuffle_v8f32_00003000:
122 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,0,0,0]
123 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,0,0,0]
124 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
127 ; AVX2-SLOW-LABEL: shuffle_v8f32_00003000:
128 ; AVX2-SLOW: # %bb.0:
129 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,3,0]
130 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
131 ; AVX2-SLOW-NEXT: retq
133 ; AVX2-FAST-ALL-LABEL: shuffle_v8f32_00003000:
134 ; AVX2-FAST-ALL: # %bb.0:
135 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
136 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
137 ; AVX2-FAST-ALL-NEXT: retq
139 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8f32_00003000:
140 ; AVX2-FAST-PERLANE: # %bb.0:
141 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,3,0]
142 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
143 ; AVX2-FAST-PERLANE-NEXT: retq
145 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_00003000:
146 ; AVX512VL-SLOW: # %bb.0:
147 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,3,0]
148 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
149 ; AVX512VL-SLOW-NEXT: retq
151 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_00003000:
152 ; AVX512VL-FAST-ALL: # %bb.0:
153 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
154 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
155 ; AVX512VL-FAST-ALL-NEXT: retq
157 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_00003000:
158 ; AVX512VL-FAST-PERLANE: # %bb.0:
159 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,3,0]
160 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
161 ; AVX512VL-FAST-PERLANE-NEXT: retq
162 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
163 ret <8 x float> %shuffle
166 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
167 ; AVX1-LABEL: shuffle_v8f32_00040000:
169 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
170 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
171 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
172 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[2,0],ymm0[4,4],ymm1[6,4]
175 ; AVX2OR512VL-LABEL: shuffle_v8f32_00040000:
176 ; AVX2OR512VL: # %bb.0:
177 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,0,0,4]
178 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
179 ; AVX2OR512VL-NEXT: retq
180 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
181 ret <8 x float> %shuffle
184 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
185 ; AVX1-LABEL: shuffle_v8f32_00500000:
187 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
188 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
189 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
192 ; AVX2OR512VL-LABEL: shuffle_v8f32_00500000:
193 ; AVX2OR512VL: # %bb.0:
194 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,0,5,0]
195 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
196 ; AVX2OR512VL-NEXT: retq
197 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
198 ret <8 x float> %shuffle
201 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
202 ; AVX1-LABEL: shuffle_v8f32_06000000:
204 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
205 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
206 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
209 ; AVX2OR512VL-LABEL: shuffle_v8f32_06000000:
210 ; AVX2OR512VL: # %bb.0:
211 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,6,0,0]
212 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
213 ; AVX2OR512VL-NEXT: retq
214 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
215 ret <8 x float> %shuffle
218 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
219 ; AVX1-LABEL: shuffle_v8f32_70000000:
221 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
222 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
223 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
226 ; AVX2OR512VL-LABEL: shuffle_v8f32_70000000:
227 ; AVX2OR512VL: # %bb.0:
228 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} xmm1 = [7,0,0,0]
229 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
230 ; AVX2OR512VL-NEXT: retq
231 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
232 ret <8 x float> %shuffle
235 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
236 ; ALL-LABEL: shuffle_v8f32_01014545:
238 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
240 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
241 ret <8 x float> %shuffle
244 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
245 ; AVX1-LABEL: shuffle_v8f32_00112233:
247 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,0,1,1]
248 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
249 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
252 ; AVX2OR512VL-LABEL: shuffle_v8f32_00112233:
253 ; AVX2OR512VL: # %bb.0:
254 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
255 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
256 ; AVX2OR512VL-NEXT: retq
257 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
258 ret <8 x float> %shuffle
261 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
262 ; AVX1-LABEL: shuffle_v8f32_00001111:
264 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,0,0,0]
265 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
266 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
269 ; AVX2-SLOW-LABEL: shuffle_v8f32_00001111:
270 ; AVX2-SLOW: # %bb.0:
271 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,1]
272 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
273 ; AVX2-SLOW-NEXT: retq
275 ; AVX2-FAST-ALL-LABEL: shuffle_v8f32_00001111:
276 ; AVX2-FAST-ALL: # %bb.0:
277 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
278 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
279 ; AVX2-FAST-ALL-NEXT: retq
281 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8f32_00001111:
282 ; AVX2-FAST-PERLANE: # %bb.0:
283 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,1]
284 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
285 ; AVX2-FAST-PERLANE-NEXT: retq
287 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_00001111:
288 ; AVX512VL-SLOW: # %bb.0:
289 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,1]
290 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
291 ; AVX512VL-SLOW-NEXT: retq
293 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_00001111:
294 ; AVX512VL-FAST-ALL: # %bb.0:
295 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
296 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
297 ; AVX512VL-FAST-ALL-NEXT: retq
299 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_00001111:
300 ; AVX512VL-FAST-PERLANE: # %bb.0:
301 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,1]
302 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
303 ; AVX512VL-FAST-PERLANE-NEXT: retq
304 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
305 ret <8 x float> %shuffle
308 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
309 ; ALL-LABEL: shuffle_v8f32_81a3c5e7:
311 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
313 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
314 ret <8 x float> %shuffle
317 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
318 ; AVX1-LABEL: shuffle_v8f32_08080808:
320 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
321 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
322 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
325 ; AVX2-LABEL: shuffle_v8f32_08080808:
327 ; AVX2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
328 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
331 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_08080808:
332 ; AVX512VL-SLOW: # %bb.0:
333 ; AVX512VL-SLOW-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
334 ; AVX512VL-SLOW-NEXT: vbroadcastsd %xmm0, %ymm0
335 ; AVX512VL-SLOW-NEXT: retq
337 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_08080808:
338 ; AVX512VL-FAST-ALL: # %bb.0:
339 ; AVX512VL-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,8,0,8,0,8,0,8]
340 ; AVX512VL-FAST-ALL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
341 ; AVX512VL-FAST-ALL-NEXT: retq
343 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_08080808:
344 ; AVX512VL-FAST-PERLANE: # %bb.0:
345 ; AVX512VL-FAST-PERLANE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
346 ; AVX512VL-FAST-PERLANE-NEXT: vbroadcastsd %xmm0, %ymm0
347 ; AVX512VL-FAST-PERLANE-NEXT: retq
348 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
349 ret <8 x float> %shuffle
352 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
353 ; AVX1OR2-LABEL: shuffle_v8f32_08084c4c:
355 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
356 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
359 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_08084c4c:
360 ; AVX512VL-SLOW: # %bb.0:
361 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
362 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
363 ; AVX512VL-SLOW-NEXT: retq
365 ; AVX512VL-FAST-LABEL: shuffle_v8f32_08084c4c:
366 ; AVX512VL-FAST: # %bb.0:
367 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [0,8,0,8,4,12,4,12]
368 ; AVX512VL-FAST-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
369 ; AVX512VL-FAST-NEXT: retq
370 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
371 ret <8 x float> %shuffle
374 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
375 ; ALL-LABEL: shuffle_v8f32_8823cc67:
377 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
379 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
380 ret <8 x float> %shuffle
383 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
384 ; ALL-LABEL: shuffle_v8f32_9832dc76:
386 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
388 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
389 ret <8 x float> %shuffle
392 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
393 ; ALL-LABEL: shuffle_v8f32_9810dc54:
395 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
397 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
398 ret <8 x float> %shuffle
401 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
402 ; ALL-LABEL: shuffle_v8f32_08194c5d:
404 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
406 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
407 ret <8 x float> %shuffle
410 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
411 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
413 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
415 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
416 ret <8 x float> %shuffle
419 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
420 ; AVX1OR2-LABEL: shuffle_v8f32_08192a3b:
422 ; AVX1OR2-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
423 ; AVX1OR2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
424 ; AVX1OR2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
427 ; AVX512VL-LABEL: shuffle_v8f32_08192a3b:
429 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [0,8,1,9,2,10,3,11]
430 ; AVX512VL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
431 ; AVX512VL-NEXT: retq
432 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
433 ret <8 x float> %shuffle
436 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
437 ; AVX1-LABEL: shuffle_v8f32_08991abb:
439 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
440 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
441 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
442 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,2,3,3]
443 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
446 ; AVX2-SLOW-LABEL: shuffle_v8f32_08991abb:
447 ; AVX2-SLOW: # %bb.0:
448 ; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm2 = [u,0,1,1,u,2,3,3]
449 ; AVX2-SLOW-NEXT: vpermps %ymm1, %ymm2, %ymm1
450 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
451 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
452 ; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
453 ; AVX2-SLOW-NEXT: retq
455 ; AVX2-FAST-ALL-LABEL: shuffle_v8f32_08991abb:
456 ; AVX2-FAST-ALL: # %bb.0:
457 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,u,u,u,1,u,u,u]
458 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
459 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [u,0,1,1,u,2,3,3]
460 ; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
461 ; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
462 ; AVX2-FAST-ALL-NEXT: retq
464 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8f32_08991abb:
465 ; AVX2-FAST-PERLANE: # %bb.0:
466 ; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm2 = [u,0,1,1,u,2,3,3]
467 ; AVX2-FAST-PERLANE-NEXT: vpermps %ymm1, %ymm2, %ymm1
468 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
469 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
470 ; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
471 ; AVX2-FAST-PERLANE-NEXT: retq
473 ; AVX512VL-LABEL: shuffle_v8f32_08991abb:
475 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,1,1,9,2,3,3]
476 ; AVX512VL-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2
477 ; AVX512VL-NEXT: vmovaps %ymm2, %ymm0
478 ; AVX512VL-NEXT: retq
479 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
480 ret <8 x float> %shuffle
483 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
484 ; AVX1-LABEL: shuffle_v8f32_091b2d3f:
486 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,1,1,3]
487 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,1,3,3]
488 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
489 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
492 ; AVX2-LABEL: shuffle_v8f32_091b2d3f:
494 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
495 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
498 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_091b2d3f:
499 ; AVX512VL-SLOW: # %bb.0:
500 ; AVX512VL-SLOW-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
501 ; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
502 ; AVX512VL-SLOW-NEXT: retq
504 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_091b2d3f:
505 ; AVX512VL-FAST-ALL: # %bb.0:
506 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,9,1,11,2,13,3,15]
507 ; AVX512VL-FAST-ALL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
508 ; AVX512VL-FAST-ALL-NEXT: retq
510 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_091b2d3f:
511 ; AVX512VL-FAST-PERLANE: # %bb.0:
512 ; AVX512VL-FAST-PERLANE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
513 ; AVX512VL-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
514 ; AVX512VL-FAST-PERLANE-NEXT: retq
515 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
516 ret <8 x float> %shuffle
519 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
520 ; AVX1-LABEL: shuffle_v8f32_09ab1def:
522 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
523 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
524 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
527 ; AVX2-SLOW-LABEL: shuffle_v8f32_09ab1def:
528 ; AVX2-SLOW: # %bb.0:
529 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
530 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
531 ; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
532 ; AVX2-SLOW-NEXT: retq
534 ; AVX2-FAST-ALL-LABEL: shuffle_v8f32_09ab1def:
535 ; AVX2-FAST-ALL: # %bb.0:
536 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,u,u,u,1,u,u,u]
537 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
538 ; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
539 ; AVX2-FAST-ALL-NEXT: retq
541 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8f32_09ab1def:
542 ; AVX2-FAST-PERLANE: # %bb.0:
543 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
544 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
545 ; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
546 ; AVX2-FAST-PERLANE-NEXT: retq
548 ; AVX512VL-LABEL: shuffle_v8f32_09ab1def:
550 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [8,1,2,3,9,5,6,7]
551 ; AVX512VL-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2
552 ; AVX512VL-NEXT: vmovaps %ymm2, %ymm0
553 ; AVX512VL-NEXT: retq
554 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
555 ret <8 x float> %shuffle
558 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
559 ; ALL-LABEL: shuffle_v8f32_00014445:
561 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
563 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
564 ret <8 x float> %shuffle
567 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
568 ; ALL-LABEL: shuffle_v8f32_00204464:
570 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
572 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
573 ret <8 x float> %shuffle
576 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
577 ; ALL-LABEL: shuffle_v8f32_03004744:
579 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
581 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
582 ret <8 x float> %shuffle
585 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
586 ; ALL-LABEL: shuffle_v8f32_10005444:
588 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
590 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
591 ret <8 x float> %shuffle
594 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
595 ; ALL-LABEL: shuffle_v8f32_22006644:
597 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
599 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
600 ret <8 x float> %shuffle
603 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
604 ; ALL-LABEL: shuffle_v8f32_33307774:
606 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
608 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
609 ret <8 x float> %shuffle
612 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
613 ; ALL-LABEL: shuffle_v8f32_32107654:
615 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
617 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
618 ret <8 x float> %shuffle
621 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
622 ; ALL-LABEL: shuffle_v8f32_00234467:
624 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
626 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
627 ret <8 x float> %shuffle
630 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
631 ; ALL-LABEL: shuffle_v8f32_00224466:
633 ; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
635 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
636 ret <8 x float> %shuffle
639 define <8 x float> @shuffle_v8f32_00224466_v4f32(<4 x float> %a, <4 x float> %b) {
640 ; ALL-LABEL: shuffle_v8f32_00224466_v4f32:
642 ; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
643 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
644 ; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
646 %1 = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
647 %2 = shufflevector <4 x float> %b, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
648 %3 = shufflevector <4 x float> %1, <4 x float> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
652 define <8 x float> @shuffle_v8f32_00004444_v4f32(<4 x float> %a, <4 x float> %b) {
653 ; ALL-LABEL: shuffle_v8f32_00004444_v4f32:
655 ; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
656 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
657 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
659 %1 = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
663 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
664 ; ALL-LABEL: shuffle_v8f32_10325476:
666 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
668 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
669 ret <8 x float> %shuffle
672 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
673 ; ALL-LABEL: shuffle_v8f32_11335577:
675 ; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
677 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
678 ret <8 x float> %shuffle
681 define <8 x float> @shuffle_v8f32_11335577_v4f32(<4 x float> %a, <4 x float> %b) {
682 ; ALL-LABEL: shuffle_v8f32_11335577_v4f32:
684 ; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
685 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
686 ; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
688 %1 = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
689 %2 = shufflevector <4 x float> %b, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
690 %3 = shufflevector <4 x float> %1, <4 x float> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
694 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
695 ; ALL-LABEL: shuffle_v8f32_10235467:
697 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
699 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
700 ret <8 x float> %shuffle
703 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
704 ; ALL-LABEL: shuffle_v8f32_10225466:
706 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
708 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
709 ret <8 x float> %shuffle
712 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
713 ; ALL-LABEL: shuffle_v8f32_00015444:
715 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
717 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
718 ret <8 x float> %shuffle
721 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
722 ; ALL-LABEL: shuffle_v8f32_00204644:
724 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
726 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
727 ret <8 x float> %shuffle
730 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
731 ; ALL-LABEL: shuffle_v8f32_03004474:
733 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
735 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
736 ret <8 x float> %shuffle
739 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
740 ; ALL-LABEL: shuffle_v8f32_10004444:
742 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
744 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
745 ret <8 x float> %shuffle
748 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
749 ; ALL-LABEL: shuffle_v8f32_22006446:
751 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
753 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
754 ret <8 x float> %shuffle
757 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
758 ; ALL-LABEL: shuffle_v8f32_33307474:
760 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
762 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
763 ret <8 x float> %shuffle
766 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
767 ; ALL-LABEL: shuffle_v8f32_32104567:
769 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
771 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
772 ret <8 x float> %shuffle
775 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
776 ; ALL-LABEL: shuffle_v8f32_00236744:
778 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
780 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
781 ret <8 x float> %shuffle
784 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
785 ; ALL-LABEL: shuffle_v8f32_00226644:
787 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
789 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
790 ret <8 x float> %shuffle
793 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
794 ; ALL-LABEL: shuffle_v8f32_10324567:
796 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
798 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
799 ret <8 x float> %shuffle
802 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
803 ; ALL-LABEL: shuffle_v8f32_11334567:
805 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
807 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
808 ret <8 x float> %shuffle
811 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
812 ; ALL-LABEL: shuffle_v8f32_01235467:
814 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
816 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
817 ret <8 x float> %shuffle
820 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
821 ; ALL-LABEL: shuffle_v8f32_01235466:
823 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
825 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
826 ret <8 x float> %shuffle
829 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
830 ; ALL-LABEL: shuffle_v8f32_002u6u44:
832 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
834 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
835 ret <8 x float> %shuffle
838 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
839 ; ALL-LABEL: shuffle_v8f32_00uu66uu:
841 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
843 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
844 ret <8 x float> %shuffle
847 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
848 ; ALL-LABEL: shuffle_v8f32_103245uu:
850 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
852 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
853 ret <8 x float> %shuffle
856 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
857 ; ALL-LABEL: shuffle_v8f32_1133uu67:
859 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
861 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
862 ret <8 x float> %shuffle
865 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
866 ; ALL-LABEL: shuffle_v8f32_0uu354uu:
868 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
870 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
871 ret <8 x float> %shuffle
874 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
875 ; ALL-LABEL: shuffle_v8f32_uuu3uu66:
877 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
879 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
880 ret <8 x float> %shuffle
883 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
884 ; AVX1-LABEL: shuffle_v8f32_c348cda0:
886 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,2,3]
887 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
888 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2,0],ymm2[4,5],ymm1[6,4]
889 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
890 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
891 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
894 ; AVX2-SLOW-LABEL: shuffle_v8f32_c348cda0:
895 ; AVX2-SLOW: # %bb.0:
896 ; AVX2-SLOW-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,5,2,0,4,5,2,0]
897 ; AVX2-SLOW-NEXT: # ymm2 = mem[0,1,0,1]
898 ; AVX2-SLOW-NEXT: vpermps %ymm1, %ymm2, %ymm1
899 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3,2,0,4,7,6,4]
900 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,1]
901 ; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
902 ; AVX2-SLOW-NEXT: retq
904 ; AVX2-FAST-ALL-LABEL: shuffle_v8f32_c348cda0:
905 ; AVX2-FAST-ALL: # %bb.0:
906 ; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,3,4,0,0,3,4,0]
907 ; AVX2-FAST-ALL-NEXT: # ymm2 = mem[0,1,0,1]
908 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
909 ; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,5,2,0,4,5,2,0]
910 ; AVX2-FAST-ALL-NEXT: # ymm2 = mem[0,1,0,1]
911 ; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
912 ; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
913 ; AVX2-FAST-ALL-NEXT: retq
915 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8f32_c348cda0:
916 ; AVX2-FAST-PERLANE: # %bb.0:
917 ; AVX2-FAST-PERLANE-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,5,2,0,4,5,2,0]
918 ; AVX2-FAST-PERLANE-NEXT: # ymm2 = mem[0,1,0,1]
919 ; AVX2-FAST-PERLANE-NEXT: vpermps %ymm1, %ymm2, %ymm1
920 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3,2,0,4,7,6,4]
921 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,1]
922 ; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
923 ; AVX2-FAST-PERLANE-NEXT: retq
925 ; AVX512VL-LABEL: shuffle_v8f32_c348cda0:
927 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [4,11,12,0,4,5,2,8]
928 ; AVX512VL-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2
929 ; AVX512VL-NEXT: vmovaps %ymm2, %ymm0
930 ; AVX512VL-NEXT: retq
931 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
932 ret <8 x float> %shuffle
935 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
936 ; AVX1-LABEL: shuffle_v8f32_f511235a:
938 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
939 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
940 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[3],ymm0[3]
941 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
942 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
943 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
946 ; AVX2-SLOW-LABEL: shuffle_v8f32_f511235a:
947 ; AVX2-SLOW: # %bb.0:
948 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,2,2,3,7,6,6,7]
949 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,2,0]
950 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,1,2,3,5,5,6,7]
951 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,1,2]
952 ; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
953 ; AVX2-SLOW-NEXT: retq
955 ; AVX2-FAST-ALL-LABEL: shuffle_v8f32_f511235a:
956 ; AVX2-FAST-ALL: # %bb.0:
957 ; AVX2-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm2 = [7,2,7,2,7,2,7,2]
958 ; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
959 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [u,5,1,1,2,3,5,u]
960 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
961 ; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
962 ; AVX2-FAST-ALL-NEXT: retq
964 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8f32_f511235a:
965 ; AVX2-FAST-PERLANE: # %bb.0:
966 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,2,2,3,7,6,6,7]
967 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,2,0]
968 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,1,2,3,5,5,6,7]
969 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,1,2]
970 ; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
971 ; AVX2-FAST-PERLANE-NEXT: retq
973 ; AVX512VL-LABEL: shuffle_v8f32_f511235a:
975 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [15,5,1,1,2,3,5,10]
976 ; AVX512VL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
977 ; AVX512VL-NEXT: retq
978 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
979 ret <8 x float> %shuffle
982 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
983 ; AVX1-LABEL: shuffle_v8f32_32103210:
985 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
986 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
989 ; AVX2-SLOW-LABEL: shuffle_v8f32_32103210:
990 ; AVX2-SLOW: # %bb.0:
991 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
992 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
993 ; AVX2-SLOW-NEXT: retq
995 ; AVX2-FAST-ALL-LABEL: shuffle_v8f32_32103210:
996 ; AVX2-FAST-ALL: # %bb.0:
997 ; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
998 ; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
999 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1000 ; AVX2-FAST-ALL-NEXT: retq
1002 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8f32_32103210:
1003 ; AVX2-FAST-PERLANE: # %bb.0:
1004 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1005 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
1006 ; AVX2-FAST-PERLANE-NEXT: retq
1008 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_32103210:
1009 ; AVX512VL-SLOW: # %bb.0:
1010 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1011 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
1012 ; AVX512VL-SLOW-NEXT: retq
1014 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_32103210:
1015 ; AVX512VL-FAST-ALL: # %bb.0:
1016 ; AVX512VL-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
1017 ; AVX512VL-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
1018 ; AVX512VL-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
1019 ; AVX512VL-FAST-ALL-NEXT: retq
1021 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_32103210:
1022 ; AVX512VL-FAST-PERLANE: # %bb.0:
1023 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1024 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
1025 ; AVX512VL-FAST-PERLANE-NEXT: retq
1026 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
1027 ret <8 x float> %shuffle
1030 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
1031 ; AVX1-LABEL: shuffle_v8f32_76547654:
1033 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1034 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1037 ; AVX2-SLOW-LABEL: shuffle_v8f32_76547654:
1038 ; AVX2-SLOW: # %bb.0:
1039 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1040 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
1041 ; AVX2-SLOW-NEXT: retq
1043 ; AVX2-FAST-ALL-LABEL: shuffle_v8f32_76547654:
1044 ; AVX2-FAST-ALL: # %bb.0:
1045 ; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
1046 ; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
1047 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1048 ; AVX2-FAST-ALL-NEXT: retq
1050 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8f32_76547654:
1051 ; AVX2-FAST-PERLANE: # %bb.0:
1052 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1053 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
1054 ; AVX2-FAST-PERLANE-NEXT: retq
1056 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_76547654:
1057 ; AVX512VL-SLOW: # %bb.0:
1058 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1059 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
1060 ; AVX512VL-SLOW-NEXT: retq
1062 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_76547654:
1063 ; AVX512VL-FAST-ALL: # %bb.0:
1064 ; AVX512VL-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
1065 ; AVX512VL-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
1066 ; AVX512VL-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
1067 ; AVX512VL-FAST-ALL-NEXT: retq
1069 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_76547654:
1070 ; AVX512VL-FAST-PERLANE: # %bb.0:
1071 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1072 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
1073 ; AVX512VL-FAST-PERLANE-NEXT: retq
1074 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
1075 ret <8 x float> %shuffle
1078 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
1079 ; AVX1-LABEL: shuffle_v8f32_76543210:
1081 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1082 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1085 ; AVX2-SLOW-LABEL: shuffle_v8f32_76543210:
1086 ; AVX2-SLOW: # %bb.0:
1087 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1088 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
1089 ; AVX2-SLOW-NEXT: retq
1091 ; AVX2-FAST-ALL-LABEL: shuffle_v8f32_76543210:
1092 ; AVX2-FAST-ALL: # %bb.0:
1093 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
1094 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1095 ; AVX2-FAST-ALL-NEXT: retq
1097 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8f32_76543210:
1098 ; AVX2-FAST-PERLANE: # %bb.0:
1099 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1100 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
1101 ; AVX2-FAST-PERLANE-NEXT: retq
1103 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_76543210:
1104 ; AVX512VL-SLOW: # %bb.0:
1105 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1106 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
1107 ; AVX512VL-SLOW-NEXT: retq
1109 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_76543210:
1110 ; AVX512VL-FAST-ALL: # %bb.0:
1111 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
1112 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1113 ; AVX512VL-FAST-ALL-NEXT: retq
1115 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_76543210:
1116 ; AVX512VL-FAST-PERLANE: # %bb.0:
1117 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1118 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
1119 ; AVX512VL-FAST-PERLANE-NEXT: retq
1120 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1121 ret <8 x float> %shuffle
1124 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
1125 ; AVX1OR2-LABEL: shuffle_v8f32_3210ba98:
1127 ; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1128 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1129 ; AVX1OR2-NEXT: retq
1131 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_3210ba98:
1132 ; AVX512VL-SLOW: # %bb.0:
1133 ; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1134 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1135 ; AVX512VL-SLOW-NEXT: retq
1137 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_3210ba98:
1138 ; AVX512VL-FAST-ALL: # %bb.0:
1139 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [3,2,1,0,11,10,9,8]
1140 ; AVX512VL-FAST-ALL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
1141 ; AVX512VL-FAST-ALL-NEXT: retq
1143 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_3210ba98:
1144 ; AVX512VL-FAST-PERLANE: # %bb.0:
1145 ; AVX512VL-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1146 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1147 ; AVX512VL-FAST-PERLANE-NEXT: retq
1148 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
1149 ret <8 x float> %shuffle
1152 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
1153 ; AVX1OR2-LABEL: shuffle_v8f32_3210fedc:
1155 ; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1156 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1157 ; AVX1OR2-NEXT: retq
1159 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_3210fedc:
1160 ; AVX512VL-SLOW: # %bb.0:
1161 ; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1162 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1163 ; AVX512VL-SLOW-NEXT: retq
1165 ; AVX512VL-FAST-LABEL: shuffle_v8f32_3210fedc:
1166 ; AVX512VL-FAST: # %bb.0:
1167 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [3,2,1,0,15,14,13,12]
1168 ; AVX512VL-FAST-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
1169 ; AVX512VL-FAST-NEXT: retq
1170 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
1171 ret <8 x float> %shuffle
1174 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
1175 ; AVX1OR2-LABEL: shuffle_v8f32_7654fedc:
1177 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1178 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1179 ; AVX1OR2-NEXT: retq
1181 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_7654fedc:
1182 ; AVX512VL-SLOW: # %bb.0:
1183 ; AVX512VL-SLOW-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1184 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1185 ; AVX512VL-SLOW-NEXT: retq
1187 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_7654fedc:
1188 ; AVX512VL-FAST-ALL: # %bb.0:
1189 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12]
1190 ; AVX512VL-FAST-ALL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
1191 ; AVX512VL-FAST-ALL-NEXT: retq
1193 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_7654fedc:
1194 ; AVX512VL-FAST-PERLANE: # %bb.0:
1195 ; AVX512VL-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1196 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1197 ; AVX512VL-FAST-PERLANE-NEXT: retq
1198 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
1199 ret <8 x float> %shuffle
1202 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
1203 ; AVX1OR2-LABEL: shuffle_v8f32_fedc7654:
1205 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1206 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1207 ; AVX1OR2-NEXT: retq
1209 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_fedc7654:
1210 ; AVX512VL-SLOW: # %bb.0:
1211 ; AVX512VL-SLOW-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1212 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1213 ; AVX512VL-SLOW-NEXT: retq
1215 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_fedc7654:
1216 ; AVX512VL-FAST-ALL: # %bb.0:
1217 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12]
1218 ; AVX512VL-FAST-ALL-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2
1219 ; AVX512VL-FAST-ALL-NEXT: vmovaps %ymm2, %ymm0
1220 ; AVX512VL-FAST-ALL-NEXT: retq
1222 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_fedc7654:
1223 ; AVX512VL-FAST-PERLANE: # %bb.0:
1224 ; AVX512VL-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1225 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1226 ; AVX512VL-FAST-PERLANE-NEXT: retq
1227 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
1228 ret <8 x float> %shuffle
1231 define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
1232 ; AVX1-LABEL: PR21138:
1234 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1235 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1236 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm2[1,3],ymm0[5,7],ymm2[5,7]
1239 ; AVX2-LABEL: PR21138:
1241 ; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
1242 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1245 ; AVX512VL-SLOW-LABEL: PR21138:
1246 ; AVX512VL-SLOW: # %bb.0:
1247 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
1248 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1249 ; AVX512VL-SLOW-NEXT: retq
1251 ; AVX512VL-FAST-ALL-LABEL: PR21138:
1252 ; AVX512VL-FAST-ALL: # %bb.0:
1253 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15]
1254 ; AVX512VL-FAST-ALL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
1255 ; AVX512VL-FAST-ALL-NEXT: retq
1257 ; AVX512VL-FAST-PERLANE-LABEL: PR21138:
1258 ; AVX512VL-FAST-PERLANE: # %bb.0:
1259 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
1260 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1261 ; AVX512VL-FAST-PERLANE-NEXT: retq
1262 %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1263 ret <8 x float> %shuffle
1266 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
1267 ; AVX1OR2-LABEL: shuffle_v8f32_ba987654:
1269 ; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1270 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1271 ; AVX1OR2-NEXT: retq
1273 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_ba987654:
1274 ; AVX512VL-SLOW: # %bb.0:
1275 ; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1276 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1277 ; AVX512VL-SLOW-NEXT: retq
1279 ; AVX512VL-FAST-LABEL: shuffle_v8f32_ba987654:
1280 ; AVX512VL-FAST: # %bb.0:
1281 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [3,2,1,0,15,14,13,12]
1282 ; AVX512VL-FAST-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2
1283 ; AVX512VL-FAST-NEXT: vmovaps %ymm2, %ymm0
1284 ; AVX512VL-FAST-NEXT: retq
1285 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1286 ret <8 x float> %shuffle
1289 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
1290 ; AVX1OR2-LABEL: shuffle_v8f32_ba983210:
1292 ; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1293 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1294 ; AVX1OR2-NEXT: retq
1296 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_ba983210:
1297 ; AVX512VL-SLOW: # %bb.0:
1298 ; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1299 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1300 ; AVX512VL-SLOW-NEXT: retq
1302 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_ba983210:
1303 ; AVX512VL-FAST-ALL: # %bb.0:
1304 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [3,2,1,0,11,10,9,8]
1305 ; AVX512VL-FAST-ALL-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2
1306 ; AVX512VL-FAST-ALL-NEXT: vmovaps %ymm2, %ymm0
1307 ; AVX512VL-FAST-ALL-NEXT: retq
1309 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_ba983210:
1310 ; AVX512VL-FAST-PERLANE: # %bb.0:
1311 ; AVX512VL-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1312 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1313 ; AVX512VL-FAST-PERLANE-NEXT: retq
1314 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 3, i32 2, i32 1, i32 0>
1315 ret <8 x float> %shuffle
1318 define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) {
1319 ; ALL-LABEL: shuffle_v8f32_80u1c4u5:
1321 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1323 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5>
1324 ret <8 x float> %shuffle
1327 define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
1328 ; ALL-LABEL: shuffle_v8f32_a2u3e6f7:
1330 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
1332 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
1333 ret <8 x float> %shuffle
1336 define <8 x float> @shuffle_v8f32_084c195d(<8 x float> %a, <8 x float> %b) {
1337 ; AVX1-LABEL: shuffle_v8f32_084c195d:
1339 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1340 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
1341 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1342 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
1345 ; AVX2-LABEL: shuffle_v8f32_084c195d:
1347 ; AVX2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1348 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1351 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_084c195d:
1352 ; AVX512VL-SLOW: # %bb.0:
1353 ; AVX512VL-SLOW-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1354 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1355 ; AVX512VL-SLOW-NEXT: retq
1357 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_084c195d:
1358 ; AVX512VL-FAST-ALL: # %bb.0:
1359 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,8,4,12,1,9,5,13]
1360 ; AVX512VL-FAST-ALL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
1361 ; AVX512VL-FAST-ALL-NEXT: retq
1363 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_084c195d:
1364 ; AVX512VL-FAST-PERLANE: # %bb.0:
1365 ; AVX512VL-FAST-PERLANE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1366 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1367 ; AVX512VL-FAST-PERLANE-NEXT: retq
1368 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 4, i32 12, i32 1, i32 9, i32 5, i32 13>
1369 ret <8 x float> %shuffle
1372 define <8 x float> @shuffle_v8f32_01452367(<8 x float> %a) {
1373 ; AVX1-LABEL: shuffle_v8f32_01452367:
1375 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,2,3]
1376 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1377 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
1380 ; AVX2OR512VL-LABEL: shuffle_v8f32_01452367:
1381 ; AVX2OR512VL: # %bb.0:
1382 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1383 ; AVX2OR512VL-NEXT: retq
1384 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 2, i32 3, i32 6, i32 7>
1385 ret <8 x float> %shuffle
1389 define <8 x float> @shuffle_v8f32_089abcde(<8 x float> %a, <8 x float> %b) {
1390 ; AVX1-LABEL: shuffle_v8f32_089abcde:
1392 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
1393 ; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm2[3,0],ymm1[0,0],ymm2[7,4],ymm1[4,4]
1394 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,2],ymm1[1,2],ymm2[4,6],ymm1[5,6]
1395 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
1398 ; AVX2-LABEL: shuffle_v8f32_089abcde:
1400 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [u,0,1,2,3,4,5,6]
1401 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
1402 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
1405 ; AVX512VL-LABEL: shuffle_v8f32_089abcde:
1406 ; AVX512VL: # %bb.0:
1407 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,1,2,3,4,5,6]
1408 ; AVX512VL-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2
1409 ; AVX512VL-NEXT: vmovaps %ymm2, %ymm0
1410 ; AVX512VL-NEXT: retq
1411 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
1412 ret <8 x float> %shuffle
1415 define <8 x float> @shuffle_v8f32_0189abcd(<8 x float> %a, <8 x float> %b) {
1416 ; AVX1OR2-LABEL: shuffle_v8f32_0189abcd:
1418 ; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1419 ; AVX1OR2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
1420 ; AVX1OR2-NEXT: retq
1422 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_0189abcd:
1423 ; AVX512VL-SLOW: # %bb.0:
1424 ; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1425 ; AVX512VL-SLOW-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
1426 ; AVX512VL-SLOW-NEXT: retq
1428 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_0189abcd:
1429 ; AVX512VL-FAST-ALL: # %bb.0:
1430 ; AVX512VL-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm2 = [4,0,1,2]
1431 ; AVX512VL-FAST-ALL-NEXT: vpermi2pd %ymm0, %ymm1, %ymm2
1432 ; AVX512VL-FAST-ALL-NEXT: vmovapd %ymm2, %ymm0
1433 ; AVX512VL-FAST-ALL-NEXT: retq
1435 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_0189abcd:
1436 ; AVX512VL-FAST-PERLANE: # %bb.0:
1437 ; AVX512VL-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1438 ; AVX512VL-FAST-PERLANE-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
1439 ; AVX512VL-FAST-PERLANE-NEXT: retq
1440 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
1441 ret <8 x float> %shuffle
1444 define <8 x float> @shuffle_v8f32_01289abc(<8 x float> %a, <8 x float> %b) {
1445 ; AVX1-LABEL: shuffle_v8f32_01289abc:
1447 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
1448 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm2[3,0],ymm1[4,4],ymm2[7,4]
1449 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[1,2],ymm1[2,0],ymm2[5,6],ymm1[6,4]
1450 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
1453 ; AVX2-LABEL: shuffle_v8f32_01289abc:
1455 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [u,u,u,0,1,2,3,4]
1456 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
1457 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
1460 ; AVX512VL-LABEL: shuffle_v8f32_01289abc:
1461 ; AVX512VL: # %bb.0:
1462 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [8,9,10,0,1,2,3,4]
1463 ; AVX512VL-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2
1464 ; AVX512VL-NEXT: vmovaps %ymm2, %ymm0
1465 ; AVX512VL-NEXT: retq
1466 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 9, i32 10, i32 11, i32 12>
1467 ret <8 x float> %shuffle
1470 define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) {
1471 ; ALL-LABEL: shuffle_v8f32_uuuu1111:
1473 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1474 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1476 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
1477 ret <8 x float> %shuffle
1480 define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) {
1481 ; AVX1-LABEL: shuffle_v8f32_44444444:
1483 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1484 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
1487 ; AVX2-SLOW-LABEL: shuffle_v8f32_44444444:
1488 ; AVX2-SLOW: # %bb.0:
1489 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0
1490 ; AVX2-SLOW-NEXT: vbroadcastss %xmm0, %ymm0
1491 ; AVX2-SLOW-NEXT: retq
1493 ; AVX2-FAST-ALL-LABEL: shuffle_v8f32_44444444:
1494 ; AVX2-FAST-ALL: # %bb.0:
1495 ; AVX2-FAST-ALL-NEXT: vbroadcastss {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
1496 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1497 ; AVX2-FAST-ALL-NEXT: retq
1499 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8f32_44444444:
1500 ; AVX2-FAST-PERLANE: # %bb.0:
1501 ; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm0
1502 ; AVX2-FAST-PERLANE-NEXT: vbroadcastss %xmm0, %ymm0
1503 ; AVX2-FAST-PERLANE-NEXT: retq
1505 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_44444444:
1506 ; AVX512VL-SLOW: # %bb.0:
1507 ; AVX512VL-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0
1508 ; AVX512VL-SLOW-NEXT: vbroadcastss %xmm0, %ymm0
1509 ; AVX512VL-SLOW-NEXT: retq
1511 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_44444444:
1512 ; AVX512VL-FAST-ALL: # %bb.0:
1513 ; AVX512VL-FAST-ALL-NEXT: vbroadcastss {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
1514 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1515 ; AVX512VL-FAST-ALL-NEXT: retq
1517 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8f32_44444444:
1518 ; AVX512VL-FAST-PERLANE: # %bb.0:
1519 ; AVX512VL-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm0
1520 ; AVX512VL-FAST-PERLANE-NEXT: vbroadcastss %xmm0, %ymm0
1521 ; AVX512VL-FAST-PERLANE-NEXT: retq
1522 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
1523 ret <8 x float> %shuffle
1526 define <8 x float> @shuffle_v8f32_1188uuuu(<8 x float> %a, <8 x float> %b) {
1527 ; ALL-LABEL: shuffle_v8f32_1188uuuu:
1529 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
1531 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
1532 ret <8 x float> %shuffle
1535 define <8 x float> @shuffle_v8f32_uuuu3210(<8 x float> %a, <8 x float> %b) {
1536 ; ALL-LABEL: shuffle_v8f32_uuuu3210:
1538 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1539 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1541 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 2, i32 1, i32 0>
1542 ret <8 x float> %shuffle
1545 define <8 x float> @shuffle_v8f32_uuuu1188(<8 x float> %a, <8 x float> %b) {
1546 ; ALL-LABEL: shuffle_v8f32_uuuu1188:
1548 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
1549 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1551 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 8, i32 8>
1552 ret <8 x float> %shuffle
1555 define <8 x float> @shuffle_v8f32_1111uuuu(<8 x float> %a, <8 x float> %b) {
1556 ; ALL-LABEL: shuffle_v8f32_1111uuuu:
1558 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1560 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
1561 ret <8 x float> %shuffle
1564 define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {
1565 ; ALL-LABEL: shuffle_v8f32_5555uuuu:
1567 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0
1568 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1570 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
1571 ret <8 x float> %shuffle
1574 define <8 x float> @shuffle_v8f32_32107654_v4f32(<4 x float> %a, <4 x float> %b) {
1575 ; ALL-LABEL: shuffle_v8f32_32107654_v4f32:
1577 ; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1578 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1579 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1581 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1582 %2 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1583 %3 = shufflevector <4 x float> %1, <4 x float> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1587 define <8 x float> @shuffle_mem_v8f32_8BA0CFE4(<8 x float> %a0, ptr %a1) {
1588 ; AVX1OR2-LABEL: shuffle_mem_v8f32_8BA0CFE4:
1590 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm1 = ymm0[2,0],mem[0,0],ymm0[6,4],mem[4,4]
1591 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm1[0,2],ymm0[4,7],ymm1[4,6]
1592 ; AVX1OR2-NEXT: retq
1594 ; AVX512VL-SLOW-LABEL: shuffle_mem_v8f32_8BA0CFE4:
1595 ; AVX512VL-SLOW: # %bb.0:
1596 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm0[2,0],mem[0,0],ymm0[6,4],mem[4,4]
1597 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm1[0,2],ymm0[4,7],ymm1[4,6]
1598 ; AVX512VL-SLOW-NEXT: retq
1600 ; AVX512VL-FAST-LABEL: shuffle_mem_v8f32_8BA0CFE4:
1601 ; AVX512VL-FAST: # %bb.0:
1602 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,3,2,8,4,7,6,12]
1603 ; AVX512VL-FAST-NEXT: vpermt2ps (%rdi), %ymm1, %ymm0
1604 ; AVX512VL-FAST-NEXT: retq
1605 %1 = load <8 x float>, ptr %a1
1606 %2 = shufflevector <8 x float> %1, <8 x float> %a0, <8 x i32> <i32 8, i32 11, i32 10, i32 0, i32 12, i32 15, i32 14, i32 4>
1610 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
1611 ; AVX1-LABEL: shuffle_v8i32_00000000:
1613 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1614 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1617 ; AVX2OR512VL-LABEL: shuffle_v8i32_00000000:
1618 ; AVX2OR512VL: # %bb.0:
1619 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0
1620 ; AVX2OR512VL-NEXT: retq
1621 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1622 ret <8 x i32> %shuffle
1625 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
1626 ; AVX1-LABEL: shuffle_v8i32_00000010:
1628 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,0,0,0]
1629 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,0]
1630 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1633 ; AVX2-SLOW-LABEL: shuffle_v8i32_00000010:
1634 ; AVX2-SLOW: # %bb.0:
1635 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,0]
1636 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
1637 ; AVX2-SLOW-NEXT: retq
1639 ; AVX2-FAST-ALL-LABEL: shuffle_v8i32_00000010:
1640 ; AVX2-FAST-ALL: # %bb.0:
1641 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
1642 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1643 ; AVX2-FAST-ALL-NEXT: retq
1645 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8i32_00000010:
1646 ; AVX2-FAST-PERLANE: # %bb.0:
1647 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,0]
1648 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
1649 ; AVX2-FAST-PERLANE-NEXT: retq
1651 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_00000010:
1652 ; AVX512VL-SLOW: # %bb.0:
1653 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,0]
1654 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
1655 ; AVX512VL-SLOW-NEXT: retq
1657 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_00000010:
1658 ; AVX512VL-FAST-ALL: # %bb.0:
1659 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
1660 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1661 ; AVX512VL-FAST-ALL-NEXT: retq
1663 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_00000010:
1664 ; AVX512VL-FAST-PERLANE: # %bb.0:
1665 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,0]
1666 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
1667 ; AVX512VL-FAST-PERLANE-NEXT: retq
1668 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
1669 ret <8 x i32> %shuffle
1672 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
1673 ; AVX1-LABEL: shuffle_v8i32_00000200:
1675 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,0,0,0]
1676 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,0,0]
1677 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1680 ; AVX2-SLOW-LABEL: shuffle_v8i32_00000200:
1681 ; AVX2-SLOW: # %bb.0:
1682 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,2]
1683 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
1684 ; AVX2-SLOW-NEXT: retq
1686 ; AVX2-FAST-ALL-LABEL: shuffle_v8i32_00000200:
1687 ; AVX2-FAST-ALL: # %bb.0:
1688 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
1689 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1690 ; AVX2-FAST-ALL-NEXT: retq
1692 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8i32_00000200:
1693 ; AVX2-FAST-PERLANE: # %bb.0:
1694 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,2]
1695 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
1696 ; AVX2-FAST-PERLANE-NEXT: retq
1698 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_00000200:
1699 ; AVX512VL-SLOW: # %bb.0:
1700 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,2]
1701 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
1702 ; AVX512VL-SLOW-NEXT: retq
1704 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_00000200:
1705 ; AVX512VL-FAST-ALL: # %bb.0:
1706 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
1707 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1708 ; AVX512VL-FAST-ALL-NEXT: retq
1710 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_00000200:
1711 ; AVX512VL-FAST-PERLANE: # %bb.0:
1712 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,2]
1713 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
1714 ; AVX512VL-FAST-PERLANE-NEXT: retq
1715 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
1716 ret <8 x i32> %shuffle
1719 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
1720 ; AVX1-LABEL: shuffle_v8i32_00003000:
1722 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,0,0,0]
1723 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,0,0,0]
1724 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1727 ; AVX2-SLOW-LABEL: shuffle_v8i32_00003000:
1728 ; AVX2-SLOW: # %bb.0:
1729 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,3,0]
1730 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
1731 ; AVX2-SLOW-NEXT: retq
1733 ; AVX2-FAST-ALL-LABEL: shuffle_v8i32_00003000:
1734 ; AVX2-FAST-ALL: # %bb.0:
1735 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
1736 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1737 ; AVX2-FAST-ALL-NEXT: retq
1739 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8i32_00003000:
1740 ; AVX2-FAST-PERLANE: # %bb.0:
1741 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,3,0]
1742 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
1743 ; AVX2-FAST-PERLANE-NEXT: retq
1745 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_00003000:
1746 ; AVX512VL-SLOW: # %bb.0:
1747 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,3,0]
1748 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
1749 ; AVX512VL-SLOW-NEXT: retq
1751 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_00003000:
1752 ; AVX512VL-FAST-ALL: # %bb.0:
1753 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
1754 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1755 ; AVX512VL-FAST-ALL-NEXT: retq
1757 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_00003000:
1758 ; AVX512VL-FAST-PERLANE: # %bb.0:
1759 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,3,0]
1760 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
1761 ; AVX512VL-FAST-PERLANE-NEXT: retq
1762 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
1763 ret <8 x i32> %shuffle
1766 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
1767 ; AVX1-LABEL: shuffle_v8i32_00040000:
1769 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
1770 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1771 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1772 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[2,0],ymm0[4,4],ymm1[6,4]
1775 ; AVX2OR512VL-LABEL: shuffle_v8i32_00040000:
1776 ; AVX2OR512VL: # %bb.0:
1777 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,0,0,4]
1778 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1779 ; AVX2OR512VL-NEXT: retq
1780 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
1781 ret <8 x i32> %shuffle
1784 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
1785 ; AVX1-LABEL: shuffle_v8i32_00500000:
1787 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
1788 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
1789 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
1792 ; AVX2OR512VL-LABEL: shuffle_v8i32_00500000:
1793 ; AVX2OR512VL: # %bb.0:
1794 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,0,5,0]
1795 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1796 ; AVX2OR512VL-NEXT: retq
1797 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
1798 ret <8 x i32> %shuffle
1801 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
1802 ; AVX1-LABEL: shuffle_v8i32_06000000:
1804 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
1805 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
1806 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
1809 ; AVX2OR512VL-LABEL: shuffle_v8i32_06000000:
1810 ; AVX2OR512VL: # %bb.0:
1811 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,6,0,0]
1812 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1813 ; AVX2OR512VL-NEXT: retq
1814 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1815 ret <8 x i32> %shuffle
1818 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
1819 ; AVX1-LABEL: shuffle_v8i32_70000000:
1821 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
1822 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
1823 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
1826 ; AVX2OR512VL-LABEL: shuffle_v8i32_70000000:
1827 ; AVX2OR512VL: # %bb.0:
1828 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} xmm1 = [7,0,0,0]
1829 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1830 ; AVX2OR512VL-NEXT: retq
1831 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1832 ret <8 x i32> %shuffle
1835 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
1836 ; AVX1-LABEL: shuffle_v8i32_01014545:
1838 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
1841 ; AVX2OR512VL-LABEL: shuffle_v8i32_01014545:
1842 ; AVX2OR512VL: # %bb.0:
1843 ; AVX2OR512VL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1844 ; AVX2OR512VL-NEXT: retq
1845 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
1846 ret <8 x i32> %shuffle
1849 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
1850 ; AVX1-LABEL: shuffle_v8i32_00112233:
1852 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,0,1,1]
1853 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
1854 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1857 ; AVX2-SLOW-LABEL: shuffle_v8i32_00112233:
1858 ; AVX2-SLOW: # %bb.0:
1859 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1860 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
1861 ; AVX2-SLOW-NEXT: retq
1863 ; AVX2-FAST-ALL-LABEL: shuffle_v8i32_00112233:
1864 ; AVX2-FAST-ALL: # %bb.0:
1865 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
1866 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1867 ; AVX2-FAST-ALL-NEXT: retq
1869 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8i32_00112233:
1870 ; AVX2-FAST-PERLANE: # %bb.0:
1871 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1872 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
1873 ; AVX2-FAST-PERLANE-NEXT: retq
1875 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_00112233:
1876 ; AVX512VL-SLOW: # %bb.0:
1877 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1878 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
1879 ; AVX512VL-SLOW-NEXT: retq
1881 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_00112233:
1882 ; AVX512VL-FAST-ALL: # %bb.0:
1883 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
1884 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1885 ; AVX512VL-FAST-ALL-NEXT: retq
1887 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_00112233:
1888 ; AVX512VL-FAST-PERLANE: # %bb.0:
1889 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1890 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
1891 ; AVX512VL-FAST-PERLANE-NEXT: retq
1892 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
1893 ret <8 x i32> %shuffle
1896 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
1897 ; AVX1-LABEL: shuffle_v8i32_00001111:
1899 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,0,0,0]
1900 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1901 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1904 ; AVX2-SLOW-LABEL: shuffle_v8i32_00001111:
1905 ; AVX2-SLOW: # %bb.0:
1906 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,1]
1907 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
1908 ; AVX2-SLOW-NEXT: retq
1910 ; AVX2-FAST-ALL-LABEL: shuffle_v8i32_00001111:
1911 ; AVX2-FAST-ALL: # %bb.0:
1912 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
1913 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1914 ; AVX2-FAST-ALL-NEXT: retq
1916 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8i32_00001111:
1917 ; AVX2-FAST-PERLANE: # %bb.0:
1918 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,1]
1919 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
1920 ; AVX2-FAST-PERLANE-NEXT: retq
1922 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_00001111:
1923 ; AVX512VL-SLOW: # %bb.0:
1924 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,1]
1925 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
1926 ; AVX512VL-SLOW-NEXT: retq
1928 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_00001111:
1929 ; AVX512VL-FAST-ALL: # %bb.0:
1930 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
1931 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
1932 ; AVX512VL-FAST-ALL-NEXT: retq
1934 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_00001111:
1935 ; AVX512VL-FAST-PERLANE: # %bb.0:
1936 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,1]
1937 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
1938 ; AVX512VL-FAST-PERLANE-NEXT: retq
1939 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
1940 ret <8 x i32> %shuffle
1943 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
1944 ; ALL-LABEL: shuffle_v8i32_81a3c5e7:
1946 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1948 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
1949 ret <8 x i32> %shuffle
1952 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
1953 ; AVX1-LABEL: shuffle_v8i32_08080808:
1955 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
1956 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
1957 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1960 ; AVX2-LABEL: shuffle_v8i32_08080808:
1962 ; AVX2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1963 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
1966 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_08080808:
1967 ; AVX512VL-SLOW: # %bb.0:
1968 ; AVX512VL-SLOW-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1969 ; AVX512VL-SLOW-NEXT: vbroadcastsd %xmm0, %ymm0
1970 ; AVX512VL-SLOW-NEXT: retq
1972 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_08080808:
1973 ; AVX512VL-FAST-ALL: # %bb.0:
1974 ; AVX512VL-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,8,0,8,0,8,0,8]
1975 ; AVX512VL-FAST-ALL-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
1976 ; AVX512VL-FAST-ALL-NEXT: retq
1978 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_08080808:
1979 ; AVX512VL-FAST-PERLANE: # %bb.0:
1980 ; AVX512VL-FAST-PERLANE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1981 ; AVX512VL-FAST-PERLANE-NEXT: vbroadcastsd %xmm0, %ymm0
1982 ; AVX512VL-FAST-PERLANE-NEXT: retq
1983 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
1984 ret <8 x i32> %shuffle
1987 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
1988 ; AVX1-LABEL: shuffle_v8i32_08084c4c:
1990 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
1991 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1994 ; AVX2-LABEL: shuffle_v8i32_08084c4c:
1996 ; AVX2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1997 ; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
2000 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_08084c4c:
2001 ; AVX512VL-SLOW: # %bb.0:
2002 ; AVX512VL-SLOW-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2003 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
2004 ; AVX512VL-SLOW-NEXT: retq
2006 ; AVX512VL-FAST-LABEL: shuffle_v8i32_08084c4c:
2007 ; AVX512VL-FAST: # %bb.0:
2008 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,8,0,8,4,12,4,12]
2009 ; AVX512VL-FAST-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
2010 ; AVX512VL-FAST-NEXT: retq
2011 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
2012 ret <8 x i32> %shuffle
2015 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
2016 ; ALL-LABEL: shuffle_v8i32_8823cc67:
2018 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
2020 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
2021 ret <8 x i32> %shuffle
2024 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
2025 ; ALL-LABEL: shuffle_v8i32_9832dc76:
2027 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
2029 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
2030 ret <8 x i32> %shuffle
2033 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
2034 ; ALL-LABEL: shuffle_v8i32_9810dc54:
2036 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
2038 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
2039 ret <8 x i32> %shuffle
2042 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
2043 ; ALL-LABEL: shuffle_v8i32_08194c5d:
2045 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2047 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
2048 ret <8 x i32> %shuffle
2051 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
2052 ; ALL-LABEL: shuffle_v8i32_2a3b6e7f:
2054 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2056 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
2057 ret <8 x i32> %shuffle
2060 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
2061 ; AVX1OR2-LABEL: shuffle_v8i32_08192a3b:
2063 ; AVX1OR2-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2064 ; AVX1OR2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2065 ; AVX1OR2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2066 ; AVX1OR2-NEXT: retq
2068 ; AVX512VL-LABEL: shuffle_v8i32_08192a3b:
2069 ; AVX512VL: # %bb.0:
2070 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,8,1,9,2,10,3,11]
2071 ; AVX512VL-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
2072 ; AVX512VL-NEXT: retq
2073 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
2074 ret <8 x i32> %shuffle
2077 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
2078 ; AVX1-LABEL: shuffle_v8i32_08991abb:
2080 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
2081 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
2082 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
2083 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,2,3,3]
2084 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2087 ; AVX2-SLOW-LABEL: shuffle_v8i32_08991abb:
2088 ; AVX2-SLOW: # %bb.0:
2089 ; AVX2-SLOW-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2090 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
2091 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
2092 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,1,1,4,4,5,5]
2093 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
2094 ; AVX2-SLOW-NEXT: retq
2096 ; AVX2-FAST-ALL-LABEL: shuffle_v8i32_08991abb:
2097 ; AVX2-FAST-ALL: # %bb.0:
2098 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,u,u,u,1,u,u,u]
2099 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
2100 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [u,0,1,1,u,2,3,3]
2101 ; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
2102 ; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
2103 ; AVX2-FAST-ALL-NEXT: retq
2105 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8i32_08991abb:
2106 ; AVX2-FAST-PERLANE: # %bb.0:
2107 ; AVX2-FAST-PERLANE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2108 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
2109 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
2110 ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,1,1,4,4,5,5]
2111 ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
2112 ; AVX2-FAST-PERLANE-NEXT: retq
2114 ; AVX512VL-LABEL: shuffle_v8i32_08991abb:
2115 ; AVX512VL: # %bb.0:
2116 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,0,1,1,9,2,3,3]
2117 ; AVX512VL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2
2118 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
2119 ; AVX512VL-NEXT: retq
2120 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
2121 ret <8 x i32> %shuffle
2124 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
2125 ; AVX1-LABEL: shuffle_v8i32_091b2d3f:
2127 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
2128 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3]
2129 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2130 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
2133 ; AVX2-LABEL: shuffle_v8i32_091b2d3f:
2135 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2136 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
2139 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_091b2d3f:
2140 ; AVX512VL-SLOW: # %bb.0:
2141 ; AVX512VL-SLOW-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2142 ; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
2143 ; AVX512VL-SLOW-NEXT: retq
2145 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_091b2d3f:
2146 ; AVX512VL-FAST-ALL: # %bb.0:
2147 ; AVX512VL-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,9,1,11,2,13,3,15]
2148 ; AVX512VL-FAST-ALL-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
2149 ; AVX512VL-FAST-ALL-NEXT: retq
2151 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_091b2d3f:
2152 ; AVX512VL-FAST-PERLANE: # %bb.0:
2153 ; AVX512VL-FAST-PERLANE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2154 ; AVX512VL-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
2155 ; AVX512VL-FAST-PERLANE-NEXT: retq
2156 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
2157 ret <8 x i32> %shuffle
2160 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
2161 ; AVX1-LABEL: shuffle_v8i32_09ab1def:
2163 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[1,1,3,3]
2164 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2165 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
2168 ; AVX2-SLOW-LABEL: shuffle_v8i32_09ab1def:
2169 ; AVX2-SLOW: # %bb.0:
2170 ; AVX2-SLOW-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2171 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
2172 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
2173 ; AVX2-SLOW-NEXT: retq
2175 ; AVX2-FAST-ALL-LABEL: shuffle_v8i32_09ab1def:
2176 ; AVX2-FAST-ALL: # %bb.0:
2177 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,u,u,u,1,u,u,u]
2178 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
2179 ; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
2180 ; AVX2-FAST-ALL-NEXT: retq
2182 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8i32_09ab1def:
2183 ; AVX2-FAST-PERLANE: # %bb.0:
2184 ; AVX2-FAST-PERLANE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2185 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
2186 ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
2187 ; AVX2-FAST-PERLANE-NEXT: retq
2189 ; AVX512VL-LABEL: shuffle_v8i32_09ab1def:
2190 ; AVX512VL: # %bb.0:
2191 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,1,2,3,9,5,6,7]
2192 ; AVX512VL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2
2193 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
2194 ; AVX512VL-NEXT: retq
2195 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
2196 ret <8 x i32> %shuffle
2199 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
2200 ; ALL-LABEL: shuffle_v8i32_00014445:
2202 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
2204 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
2205 ret <8 x i32> %shuffle
2208 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
2209 ; ALL-LABEL: shuffle_v8i32_00204464:
2211 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
2213 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
2214 ret <8 x i32> %shuffle
2217 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
2218 ; ALL-LABEL: shuffle_v8i32_03004744:
2220 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
2222 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
2223 ret <8 x i32> %shuffle
2226 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
2227 ; ALL-LABEL: shuffle_v8i32_10005444:
2229 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
2231 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
2232 ret <8 x i32> %shuffle
2235 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
2236 ; ALL-LABEL: shuffle_v8i32_22006644:
2238 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
2240 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
2241 ret <8 x i32> %shuffle
2244 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
2245 ; ALL-LABEL: shuffle_v8i32_33307774:
2247 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
2249 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
2250 ret <8 x i32> %shuffle
2253 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
2254 ; ALL-LABEL: shuffle_v8i32_32107654:
2256 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2258 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
2259 ret <8 x i32> %shuffle
2262 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
2263 ; ALL-LABEL: shuffle_v8i32_00234467:
2265 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
2267 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
2268 ret <8 x i32> %shuffle
2271 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
2272 ; AVX1-LABEL: shuffle_v8i32_00224466:
2274 ; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
2277 ; AVX2OR512VL-LABEL: shuffle_v8i32_00224466:
2278 ; AVX2OR512VL: # %bb.0:
2279 ; AVX2OR512VL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
2280 ; AVX2OR512VL-NEXT: retq
2281 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
2282 ret <8 x i32> %shuffle
2285 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
2286 ; ALL-LABEL: shuffle_v8i32_10325476:
2288 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
2290 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
2291 ret <8 x i32> %shuffle
2294 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
2295 ; AVX1-LABEL: shuffle_v8i32_11335577:
2297 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
2300 ; AVX2OR512VL-LABEL: shuffle_v8i32_11335577:
2301 ; AVX2OR512VL: # %bb.0:
2302 ; AVX2OR512VL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
2303 ; AVX2OR512VL-NEXT: retq
2304 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
2305 ret <8 x i32> %shuffle
2308 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
2309 ; ALL-LABEL: shuffle_v8i32_10235467:
2311 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
2313 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
2314 ret <8 x i32> %shuffle
2317 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
2318 ; ALL-LABEL: shuffle_v8i32_10225466:
2320 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
2322 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
2323 ret <8 x i32> %shuffle
2326 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
2327 ; AVX1-LABEL: shuffle_v8i32_00015444:
2329 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
2332 ; AVX2OR512VL-LABEL: shuffle_v8i32_00015444:
2333 ; AVX2OR512VL: # %bb.0:
2334 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
2335 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2336 ; AVX2OR512VL-NEXT: retq
2337 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
2338 ret <8 x i32> %shuffle
2341 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
2342 ; AVX1-LABEL: shuffle_v8i32_00204644:
2344 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
2347 ; AVX2OR512VL-LABEL: shuffle_v8i32_00204644:
2348 ; AVX2OR512VL: # %bb.0:
2349 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
2350 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2351 ; AVX2OR512VL-NEXT: retq
2352 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
2353 ret <8 x i32> %shuffle
2356 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
2357 ; AVX1-LABEL: shuffle_v8i32_03004474:
2359 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
2362 ; AVX2OR512VL-LABEL: shuffle_v8i32_03004474:
2363 ; AVX2OR512VL: # %bb.0:
2364 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
2365 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2366 ; AVX2OR512VL-NEXT: retq
2367 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
2368 ret <8 x i32> %shuffle
2371 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
2372 ; AVX1-LABEL: shuffle_v8i32_10004444:
2374 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
2377 ; AVX2OR512VL-LABEL: shuffle_v8i32_10004444:
2378 ; AVX2OR512VL: # %bb.0:
2379 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
2380 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2381 ; AVX2OR512VL-NEXT: retq
2382 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
2383 ret <8 x i32> %shuffle
2386 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
2387 ; AVX1-LABEL: shuffle_v8i32_22006446:
2389 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
2392 ; AVX2OR512VL-LABEL: shuffle_v8i32_22006446:
2393 ; AVX2OR512VL: # %bb.0:
2394 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
2395 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2396 ; AVX2OR512VL-NEXT: retq
2397 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
2398 ret <8 x i32> %shuffle
2401 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
2402 ; AVX1-LABEL: shuffle_v8i32_33307474:
2404 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
2407 ; AVX2OR512VL-LABEL: shuffle_v8i32_33307474:
2408 ; AVX2OR512VL: # %bb.0:
2409 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
2410 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2411 ; AVX2OR512VL-NEXT: retq
2412 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
2413 ret <8 x i32> %shuffle
2416 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
2417 ; AVX1-LABEL: shuffle_v8i32_32104567:
2419 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
2422 ; AVX2OR512VL-LABEL: shuffle_v8i32_32104567:
2423 ; AVX2OR512VL: # %bb.0:
2424 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
2425 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2426 ; AVX2OR512VL-NEXT: retq
2427 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
2428 ret <8 x i32> %shuffle
2431 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
2432 ; AVX1-LABEL: shuffle_v8i32_00236744:
2434 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
2437 ; AVX2OR512VL-LABEL: shuffle_v8i32_00236744:
2438 ; AVX2OR512VL: # %bb.0:
2439 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
2440 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2441 ; AVX2OR512VL-NEXT: retq
2442 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
2443 ret <8 x i32> %shuffle
2446 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
2447 ; AVX1-LABEL: shuffle_v8i32_00226644:
2449 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
2452 ; AVX2OR512VL-LABEL: shuffle_v8i32_00226644:
2453 ; AVX2OR512VL: # %bb.0:
2454 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
2455 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2456 ; AVX2OR512VL-NEXT: retq
2457 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
2458 ret <8 x i32> %shuffle
2461 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
2462 ; AVX1-LABEL: shuffle_v8i32_10324567:
2464 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
2467 ; AVX2OR512VL-LABEL: shuffle_v8i32_10324567:
2468 ; AVX2OR512VL: # %bb.0:
2469 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
2470 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2471 ; AVX2OR512VL-NEXT: retq
2472 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
2473 ret <8 x i32> %shuffle
2476 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
2477 ; AVX1-LABEL: shuffle_v8i32_11334567:
2479 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
2482 ; AVX2OR512VL-LABEL: shuffle_v8i32_11334567:
2483 ; AVX2OR512VL: # %bb.0:
2484 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
2485 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2486 ; AVX2OR512VL-NEXT: retq
2487 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
2488 ret <8 x i32> %shuffle
2491 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
2492 ; AVX1-LABEL: shuffle_v8i32_01235467:
2494 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
2497 ; AVX2OR512VL-LABEL: shuffle_v8i32_01235467:
2498 ; AVX2OR512VL: # %bb.0:
2499 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
2500 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2501 ; AVX2OR512VL-NEXT: retq
2502 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
2503 ret <8 x i32> %shuffle
2506 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
2507 ; AVX1-LABEL: shuffle_v8i32_01235466:
2509 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
2512 ; AVX2OR512VL-LABEL: shuffle_v8i32_01235466:
2513 ; AVX2OR512VL: # %bb.0:
2514 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
2515 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2516 ; AVX2OR512VL-NEXT: retq
2517 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
2518 ret <8 x i32> %shuffle
2521 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
2522 ; AVX1-LABEL: shuffle_v8i32_002u6u44:
2524 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
2527 ; AVX2OR512VL-LABEL: shuffle_v8i32_002u6u44:
2528 ; AVX2OR512VL: # %bb.0:
2529 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,u,6,u,4,4]
2530 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2531 ; AVX2OR512VL-NEXT: retq
2532 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
2533 ret <8 x i32> %shuffle
2536 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
2537 ; AVX1-LABEL: shuffle_v8i32_00uu66uu:
2539 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
2542 ; AVX2OR512VL-LABEL: shuffle_v8i32_00uu66uu:
2543 ; AVX2OR512VL: # %bb.0:
2544 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,u,u,6,6,u,u]
2545 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2546 ; AVX2OR512VL-NEXT: retq
2547 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
2548 ret <8 x i32> %shuffle
2551 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
2552 ; AVX1-LABEL: shuffle_v8i32_103245uu:
2554 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
2557 ; AVX2OR512VL-LABEL: shuffle_v8i32_103245uu:
2558 ; AVX2OR512VL: # %bb.0:
2559 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,3,2,4,5,u,u]
2560 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2561 ; AVX2OR512VL-NEXT: retq
2562 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
2563 ret <8 x i32> %shuffle
2566 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
2567 ; AVX1-LABEL: shuffle_v8i32_1133uu67:
2569 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
2572 ; AVX2OR512VL-LABEL: shuffle_v8i32_1133uu67:
2573 ; AVX2OR512VL: # %bb.0:
2574 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,3,3,u,u,6,7]
2575 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2576 ; AVX2OR512VL-NEXT: retq
2577 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
2578 ret <8 x i32> %shuffle
2581 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
2582 ; AVX1-LABEL: shuffle_v8i32_0uu354uu:
2584 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
2587 ; AVX2OR512VL-LABEL: shuffle_v8i32_0uu354uu:
2588 ; AVX2OR512VL: # %bb.0:
2589 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,u,u,3,5,4,u,u]
2590 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2591 ; AVX2OR512VL-NEXT: retq
2592 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
2593 ret <8 x i32> %shuffle
2596 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
2597 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
2599 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
2602 ; AVX2OR512VL-LABEL: shuffle_v8i32_uuu3uu66:
2603 ; AVX2OR512VL: # %bb.0:
2604 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [u,u,u,3,u,u,6,6]
2605 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2606 ; AVX2OR512VL-NEXT: retq
2607 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
2608 ret <8 x i32> %shuffle
2611 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
2612 ; AVX1-LABEL: shuffle_v8i32_6caa87e5:
2614 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
2615 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
2616 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
2617 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
2618 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
2621 ; AVX2-SLOW-LABEL: shuffle_v8i32_6caa87e5:
2622 ; AVX2-SLOW: # %bb.0:
2623 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,1,3,2]
2624 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0,2,2,4,4,6,6]
2625 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,0,3]
2626 ; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
2627 ; AVX2-SLOW-NEXT: retq
2629 ; AVX2-FAST-ALL-LABEL: shuffle_v8i32_6caa87e5:
2630 ; AVX2-FAST-ALL: # %bb.0:
2631 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [u,4,2,2,0,u,6,u]
2632 ; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
2633 ; AVX2-FAST-ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,1,3,2]
2634 ; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
2635 ; AVX2-FAST-ALL-NEXT: retq
2637 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8i32_6caa87e5:
2638 ; AVX2-FAST-PERLANE: # %bb.0:
2639 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,1,3,2]
2640 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0,2,2,4,4,6,6]
2641 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,0,3]
2642 ; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
2643 ; AVX2-FAST-PERLANE-NEXT: retq
2645 ; AVX512VL-LABEL: shuffle_v8i32_6caa87e5:
2646 ; AVX512VL: # %bb.0:
2647 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [14,4,2,2,0,15,6,13]
2648 ; AVX512VL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2
2649 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
2650 ; AVX512VL-NEXT: retq
2651 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
2652 ret <8 x i32> %shuffle
2655 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
2656 ; AVX1-LABEL: shuffle_v8i32_32103210:
2658 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
2659 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2662 ; AVX2-SLOW-LABEL: shuffle_v8i32_32103210:
2663 ; AVX2-SLOW: # %bb.0:
2664 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
2665 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
2666 ; AVX2-SLOW-NEXT: retq
2668 ; AVX2-FAST-ALL-LABEL: shuffle_v8i32_32103210:
2669 ; AVX2-FAST-ALL: # %bb.0:
2670 ; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
2671 ; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
2672 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2673 ; AVX2-FAST-ALL-NEXT: retq
2675 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8i32_32103210:
2676 ; AVX2-FAST-PERLANE: # %bb.0:
2677 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
2678 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
2679 ; AVX2-FAST-PERLANE-NEXT: retq
2681 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_32103210:
2682 ; AVX512VL-SLOW: # %bb.0:
2683 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
2684 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
2685 ; AVX512VL-SLOW-NEXT: retq
2687 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_32103210:
2688 ; AVX512VL-FAST-ALL: # %bb.0:
2689 ; AVX512VL-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
2690 ; AVX512VL-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
2691 ; AVX512VL-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
2692 ; AVX512VL-FAST-ALL-NEXT: retq
2694 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_32103210:
2695 ; AVX512VL-FAST-PERLANE: # %bb.0:
2696 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
2697 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
2698 ; AVX512VL-FAST-PERLANE-NEXT: retq
2699 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
2700 ret <8 x i32> %shuffle
2703 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
2704 ; AVX1-LABEL: shuffle_v8i32_76547654:
2706 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
2707 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2710 ; AVX2-SLOW-LABEL: shuffle_v8i32_76547654:
2711 ; AVX2-SLOW: # %bb.0:
2712 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2713 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
2714 ; AVX2-SLOW-NEXT: retq
2716 ; AVX2-FAST-ALL-LABEL: shuffle_v8i32_76547654:
2717 ; AVX2-FAST-ALL: # %bb.0:
2718 ; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
2719 ; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
2720 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2721 ; AVX2-FAST-ALL-NEXT: retq
2723 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8i32_76547654:
2724 ; AVX2-FAST-PERLANE: # %bb.0:
2725 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2726 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
2727 ; AVX2-FAST-PERLANE-NEXT: retq
2729 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_76547654:
2730 ; AVX512VL-SLOW: # %bb.0:
2731 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2732 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
2733 ; AVX512VL-SLOW-NEXT: retq
2735 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_76547654:
2736 ; AVX512VL-FAST-ALL: # %bb.0:
2737 ; AVX512VL-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
2738 ; AVX512VL-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
2739 ; AVX512VL-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
2740 ; AVX512VL-FAST-ALL-NEXT: retq
2742 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_76547654:
2743 ; AVX512VL-FAST-PERLANE: # %bb.0:
2744 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2745 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
2746 ; AVX512VL-FAST-PERLANE-NEXT: retq
2747 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
2748 ret <8 x i32> %shuffle
2751 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
2752 ; AVX1-LABEL: shuffle_v8i32_76543210:
2754 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
2755 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2758 ; AVX2-SLOW-LABEL: shuffle_v8i32_76543210:
2759 ; AVX2-SLOW: # %bb.0:
2760 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2761 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
2762 ; AVX2-SLOW-NEXT: retq
2764 ; AVX2-FAST-ALL-LABEL: shuffle_v8i32_76543210:
2765 ; AVX2-FAST-ALL: # %bb.0:
2766 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
2767 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2768 ; AVX2-FAST-ALL-NEXT: retq
2770 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8i32_76543210:
2771 ; AVX2-FAST-PERLANE: # %bb.0:
2772 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2773 ; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
2774 ; AVX2-FAST-PERLANE-NEXT: retq
2776 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_76543210:
2777 ; AVX512VL-SLOW: # %bb.0:
2778 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2779 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
2780 ; AVX512VL-SLOW-NEXT: retq
2782 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_76543210:
2783 ; AVX512VL-FAST-ALL: # %bb.0:
2784 ; AVX512VL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
2785 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
2786 ; AVX512VL-FAST-ALL-NEXT: retq
2788 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_76543210:
2789 ; AVX512VL-FAST-PERLANE: # %bb.0:
2790 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2791 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
2792 ; AVX512VL-FAST-PERLANE-NEXT: retq
2793 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2794 ret <8 x i32> %shuffle
2797 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
2798 ; AVX1OR2-LABEL: shuffle_v8i32_3210ba98:
2800 ; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2801 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2802 ; AVX1OR2-NEXT: retq
2804 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_3210ba98:
2805 ; AVX512VL-SLOW: # %bb.0:
2806 ; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2807 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2808 ; AVX512VL-SLOW-NEXT: retq
2810 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_3210ba98:
2811 ; AVX512VL-FAST-ALL: # %bb.0:
2812 ; AVX512VL-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,11,10,9,8]
2813 ; AVX512VL-FAST-ALL-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
2814 ; AVX512VL-FAST-ALL-NEXT: retq
2816 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_3210ba98:
2817 ; AVX512VL-FAST-PERLANE: # %bb.0:
2818 ; AVX512VL-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2819 ; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2820 ; AVX512VL-FAST-PERLANE-NEXT: retq
2821 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
2822 ret <8 x i32> %shuffle
2825 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
2826 ; AVX1OR2-LABEL: shuffle_v8i32_3210fedc:
2828 ; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2829 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2830 ; AVX1OR2-NEXT: retq
2832 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_3210fedc:
2833 ; AVX512VL-SLOW: # %bb.0:
2834 ; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2835 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2836 ; AVX512VL-SLOW-NEXT: retq
2838 ; AVX512VL-FAST-LABEL: shuffle_v8i32_3210fedc:
2839 ; AVX512VL-FAST: # %bb.0:
2840 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,15,14,13,12]
2841 ; AVX512VL-FAST-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
2842 ; AVX512VL-FAST-NEXT: retq
2843 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
2844 ret <8 x i32> %shuffle
2847 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
2848 ; AVX1OR2-LABEL: shuffle_v8i32_7654fedc:
2850 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2851 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2852 ; AVX1OR2-NEXT: retq
2854 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_7654fedc:
2855 ; AVX512VL-SLOW: # %bb.0:
2856 ; AVX512VL-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2857 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2858 ; AVX512VL-SLOW-NEXT: retq
2860 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_7654fedc:
2861 ; AVX512VL-FAST-ALL: # %bb.0:
2862 ; AVX512VL-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12]
2863 ; AVX512VL-FAST-ALL-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
2864 ; AVX512VL-FAST-ALL-NEXT: retq
2866 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_7654fedc:
2867 ; AVX512VL-FAST-PERLANE: # %bb.0:
2868 ; AVX512VL-FAST-PERLANE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2869 ; AVX512VL-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2870 ; AVX512VL-FAST-PERLANE-NEXT: retq
2871 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
2872 ret <8 x i32> %shuffle
2875 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
2876 ; AVX1OR2-LABEL: shuffle_v8i32_fedc7654:
2878 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
2879 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2880 ; AVX1OR2-NEXT: retq
2882 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_fedc7654:
2883 ; AVX512VL-SLOW: # %bb.0:
2884 ; AVX512VL-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
2885 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2886 ; AVX512VL-SLOW-NEXT: retq
2888 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_fedc7654:
2889 ; AVX512VL-FAST-ALL: # %bb.0:
2890 ; AVX512VL-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12]
2891 ; AVX512VL-FAST-ALL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2
2892 ; AVX512VL-FAST-ALL-NEXT: vmovdqa %ymm2, %ymm0
2893 ; AVX512VL-FAST-ALL-NEXT: retq
2895 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_fedc7654:
2896 ; AVX512VL-FAST-PERLANE: # %bb.0:
2897 ; AVX512VL-FAST-PERLANE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
2898 ; AVX512VL-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2899 ; AVX512VL-FAST-PERLANE-NEXT: retq
2900 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
2901 ret <8 x i32> %shuffle
2904 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
2905 ; AVX1OR2-LABEL: shuffle_v8i32_ba987654:
2907 ; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2908 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2909 ; AVX1OR2-NEXT: retq
2911 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_ba987654:
2912 ; AVX512VL-SLOW: # %bb.0:
2913 ; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2914 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2915 ; AVX512VL-SLOW-NEXT: retq
2917 ; AVX512VL-FAST-LABEL: shuffle_v8i32_ba987654:
2918 ; AVX512VL-FAST: # %bb.0:
2919 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,15,14,13,12]
2920 ; AVX512VL-FAST-NEXT: vpermi2d %ymm0, %ymm1, %ymm2
2921 ; AVX512VL-FAST-NEXT: vmovdqa %ymm2, %ymm0
2922 ; AVX512VL-FAST-NEXT: retq
2923 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
2924 ret <8 x i32> %shuffle
2927 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
2928 ; AVX1OR2-LABEL: shuffle_v8i32_ba983210:
2930 ; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2931 ; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2932 ; AVX1OR2-NEXT: retq
2934 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_ba983210:
2935 ; AVX512VL-SLOW: # %bb.0:
2936 ; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2937 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2938 ; AVX512VL-SLOW-NEXT: retq
2940 ; AVX512VL-FAST-LABEL: shuffle_v8i32_ba983210:
2941 ; AVX512VL-FAST: # %bb.0:
2942 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,15,14,13,12]
2943 ; AVX512VL-FAST-NEXT: vpermi2d %ymm0, %ymm1, %ymm2
2944 ; AVX512VL-FAST-NEXT: vmovdqa %ymm2, %ymm0
2945 ; AVX512VL-FAST-NEXT: retq
2946 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
2947 ret <8 x i32> %shuffle
2950 define <8 x i32> @shuffle_v8i32_089abcde(<8 x i32> %a, <8 x i32> %b) {
2951 ; AVX1-LABEL: shuffle_v8i32_089abcde:
2953 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
2954 ; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm2[3,0],ymm1[0,0],ymm2[7,4],ymm1[4,4]
2955 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,2],ymm1[1,2],ymm2[4,6],ymm1[5,6]
2956 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
2959 ; AVX2-LABEL: shuffle_v8i32_089abcde:
2961 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [u,0,1,2,3,4,5,6]
2962 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
2963 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
2966 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_089abcde:
2967 ; AVX512VL-SLOW: # %bb.0:
2968 ; AVX512VL-SLOW-NEXT: valignd {{.*#+}} ymm1 = ymm1[7,0,1,2,3,4,5,6]
2969 ; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
2970 ; AVX512VL-SLOW-NEXT: retq
2972 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_089abcde:
2973 ; AVX512VL-FAST-ALL: # %bb.0:
2974 ; AVX512VL-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,0,1,2,3,4,5,6]
2975 ; AVX512VL-FAST-ALL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2
2976 ; AVX512VL-FAST-ALL-NEXT: vmovdqa %ymm2, %ymm0
2977 ; AVX512VL-FAST-ALL-NEXT: retq
2979 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_089abcde:
2980 ; AVX512VL-FAST-PERLANE: # %bb.0:
2981 ; AVX512VL-FAST-PERLANE-NEXT: valignd {{.*#+}} ymm1 = ymm1[7,0,1,2,3,4,5,6]
2982 ; AVX512VL-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
2983 ; AVX512VL-FAST-PERLANE-NEXT: retq
2984 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
2985 ret <8 x i32> %shuffle
2988 define <8 x i32> @shuffle_v8i32_0189abcd(<8 x i32> %a, <8 x i32> %b) {
2989 ; AVX1-LABEL: shuffle_v8i32_0189abcd:
2991 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2992 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
2995 ; AVX2-LABEL: shuffle_v8i32_0189abcd:
2997 ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,2]
2998 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
3001 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_0189abcd:
3002 ; AVX512VL-SLOW: # %bb.0:
3003 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,2]
3004 ; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
3005 ; AVX512VL-SLOW-NEXT: retq
3007 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_0189abcd:
3008 ; AVX512VL-FAST-ALL: # %bb.0:
3009 ; AVX512VL-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,0,1,2]
3010 ; AVX512VL-FAST-ALL-NEXT: vpermi2q %ymm0, %ymm1, %ymm2
3011 ; AVX512VL-FAST-ALL-NEXT: vmovdqa %ymm2, %ymm0
3012 ; AVX512VL-FAST-ALL-NEXT: retq
3014 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_0189abcd:
3015 ; AVX512VL-FAST-PERLANE: # %bb.0:
3016 ; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,2]
3017 ; AVX512VL-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
3018 ; AVX512VL-FAST-PERLANE-NEXT: retq
3019 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
3020 ret <8 x i32> %shuffle
3023 define <8 x i32> @shuffle_v8i32_01289abc(<8 x i32> %a, <8 x i32> %b) {
3024 ; AVX1-LABEL: shuffle_v8i32_01289abc:
3026 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
3027 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm2[3,0],ymm1[4,4],ymm2[7,4]
3028 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[1,2],ymm1[2,0],ymm2[5,6],ymm1[6,4]
3029 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
3032 ; AVX2-LABEL: shuffle_v8i32_01289abc:
3034 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [u,u,u,0,1,2,3,4]
3035 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
3036 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
3039 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_01289abc:
3040 ; AVX512VL-SLOW: # %bb.0:
3041 ; AVX512VL-SLOW-NEXT: valignd {{.*#+}} ymm1 = ymm1[5,6,7,0,1,2,3,4]
3042 ; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
3043 ; AVX512VL-SLOW-NEXT: retq
3045 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_01289abc:
3046 ; AVX512VL-FAST-ALL: # %bb.0:
3047 ; AVX512VL-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,9,10,0,1,2,3,4]
3048 ; AVX512VL-FAST-ALL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2
3049 ; AVX512VL-FAST-ALL-NEXT: vmovdqa %ymm2, %ymm0
3050 ; AVX512VL-FAST-ALL-NEXT: retq
3052 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_01289abc:
3053 ; AVX512VL-FAST-PERLANE: # %bb.0:
3054 ; AVX512VL-FAST-PERLANE-NEXT: valignd {{.*#+}} ymm1 = ymm1[5,6,7,0,1,2,3,4]
3055 ; AVX512VL-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
3056 ; AVX512VL-FAST-PERLANE-NEXT: retq
3057 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 9, i32 10, i32 11, i32 12>
3058 ret <8 x i32> %shuffle
3061 define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
3062 ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
3064 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
3065 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[0,0],ymm1[4,4],ymm0[4,4]
3068 ; AVX2OR512VL-LABEL: shuffle_v8i32_zuu8zuuc:
3069 ; AVX2OR512VL: # %bb.0:
3070 ; AVX2OR512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
3071 ; AVX2OR512VL-NEXT: retq
3072 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
3073 ret <8 x i32> %shuffle
3076 define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
3077 ; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
3079 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
3080 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
3081 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
3084 ; AVX2OR512VL-LABEL: shuffle_v8i32_9ubzdefz:
3085 ; AVX2OR512VL: # %bb.0:
3086 ; AVX2OR512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
3087 ; AVX2OR512VL-NEXT: retq
3088 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
3089 ret <8 x i32> %shuffle
3092 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
3093 ; ALL-LABEL: shuffle_v8i32_80u1b4uu:
3095 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
3097 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
3098 ret <8 x i32> %shuffle
3101 define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) {
3102 ; ALL-LABEL: shuffle_v8i32_uuuu1111:
3104 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
3105 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3107 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
3108 ret <8 x i32> %shuffle
3111 define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) {
3112 ; ALL-LABEL: shuffle_v8i32_2222uuuu:
3114 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
3116 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
3117 ret <8 x i32> %shuffle
3120 define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) {
3121 ; ALL-LABEL: shuffle_v8i32_2A3Buuuu:
3123 ; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3125 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
3126 ret <8 x i32> %shuffle
3129 define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {
3130 ; AVX1-LABEL: shuffle_v8i32_44444444:
3132 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
3133 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
3136 ; AVX2-SLOW-LABEL: shuffle_v8i32_44444444:
3137 ; AVX2-SLOW: # %bb.0:
3138 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0
3139 ; AVX2-SLOW-NEXT: vbroadcastss %xmm0, %ymm0
3140 ; AVX2-SLOW-NEXT: retq
3142 ; AVX2-FAST-ALL-LABEL: shuffle_v8i32_44444444:
3143 ; AVX2-FAST-ALL: # %bb.0:
3144 ; AVX2-FAST-ALL-NEXT: vbroadcastss {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
3145 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
3146 ; AVX2-FAST-ALL-NEXT: retq
3148 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8i32_44444444:
3149 ; AVX2-FAST-PERLANE: # %bb.0:
3150 ; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm0
3151 ; AVX2-FAST-PERLANE-NEXT: vbroadcastss %xmm0, %ymm0
3152 ; AVX2-FAST-PERLANE-NEXT: retq
3154 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_44444444:
3155 ; AVX512VL-SLOW: # %bb.0:
3156 ; AVX512VL-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0
3157 ; AVX512VL-SLOW-NEXT: vbroadcastss %xmm0, %ymm0
3158 ; AVX512VL-SLOW-NEXT: retq
3160 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_44444444:
3161 ; AVX512VL-FAST-ALL: # %bb.0:
3162 ; AVX512VL-FAST-ALL-NEXT: vbroadcastss {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
3163 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
3164 ; AVX512VL-FAST-ALL-NEXT: retq
3166 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_44444444:
3167 ; AVX512VL-FAST-PERLANE: # %bb.0:
3168 ; AVX512VL-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm0
3169 ; AVX512VL-FAST-PERLANE-NEXT: vbroadcastss %xmm0, %ymm0
3170 ; AVX512VL-FAST-PERLANE-NEXT: retq
3171 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3172 ret <8 x i32> %shuffle
3175 define <8 x i32> @shuffle_v8i32_44444444_bc(<8 x float> %a, <8 x float> %b) {
3176 ; AVX1-LABEL: shuffle_v8i32_44444444_bc:
3178 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
3179 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
3182 ; AVX2-SLOW-LABEL: shuffle_v8i32_44444444_bc:
3183 ; AVX2-SLOW: # %bb.0:
3184 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0
3185 ; AVX2-SLOW-NEXT: vbroadcastss %xmm0, %ymm0
3186 ; AVX2-SLOW-NEXT: retq
3188 ; AVX2-FAST-ALL-LABEL: shuffle_v8i32_44444444_bc:
3189 ; AVX2-FAST-ALL: # %bb.0:
3190 ; AVX2-FAST-ALL-NEXT: vbroadcastss {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
3191 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
3192 ; AVX2-FAST-ALL-NEXT: retq
3194 ; AVX2-FAST-PERLANE-LABEL: shuffle_v8i32_44444444_bc:
3195 ; AVX2-FAST-PERLANE: # %bb.0:
3196 ; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm0
3197 ; AVX2-FAST-PERLANE-NEXT: vbroadcastss %xmm0, %ymm0
3198 ; AVX2-FAST-PERLANE-NEXT: retq
3200 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_44444444_bc:
3201 ; AVX512VL-SLOW: # %bb.0:
3202 ; AVX512VL-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0
3203 ; AVX512VL-SLOW-NEXT: vbroadcastss %xmm0, %ymm0
3204 ; AVX512VL-SLOW-NEXT: retq
3206 ; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_44444444_bc:
3207 ; AVX512VL-FAST-ALL: # %bb.0:
3208 ; AVX512VL-FAST-ALL-NEXT: vbroadcastss {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
3209 ; AVX512VL-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
3210 ; AVX512VL-FAST-ALL-NEXT: retq
3212 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i32_44444444_bc:
3213 ; AVX512VL-FAST-PERLANE: # %bb.0:
3214 ; AVX512VL-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm0
3215 ; AVX512VL-FAST-PERLANE-NEXT: vbroadcastss %xmm0, %ymm0
3216 ; AVX512VL-FAST-PERLANE-NEXT: retq
3217 %tmp0 = bitcast <8 x float> %a to <8 x i32>
3218 %tmp1 = bitcast <8 x float> %b to <8 x i32>
3219 %shuffle = shufflevector <8 x i32> %tmp0, <8 x i32> %tmp1, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3220 ret <8 x i32> %shuffle
3223 define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) {
3224 ; ALL-LABEL: shuffle_v8i32_5555uuuu:
3226 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0
3227 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
3229 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
3230 ret <8 x i32> %shuffle
3233 define <8 x i32> @shuffle_v8i32_0dcd3f14(<8 x i32> %a, <8 x i32> %b) {
3234 ; AVX1-LABEL: shuffle_v8i32_0dcd3f14:
3236 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3237 ; AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
3238 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,1,1,0]
3239 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3240 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,2,3]
3241 ; AVX1-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[0,0,3,2]
3242 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5],ymm0[6,7]
3245 ; AVX2-LABEL: shuffle_v8i32_0dcd3f14:
3247 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [0,u,u,u,3,u,1,4]
3248 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
3249 ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,2,3,3]
3250 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5],ymm0[6,7]
3253 ; AVX512VL-LABEL: shuffle_v8i32_0dcd3f14:
3254 ; AVX512VL: # %bb.0:
3255 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,5,4,5,11,7,9,12]
3256 ; AVX512VL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2
3257 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
3258 ; AVX512VL-NEXT: retq
3259 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 13, i32 12, i32 13, i32 3, i32 15, i32 1, i32 4>
3260 ret <8 x i32> %shuffle
3264 define <8 x i32> @shuffle_v8i32_uuuuuu7u(<8 x i32> %a, <8 x i32> %b) nounwind {
3265 ; AVX1-LABEL: shuffle_v8i32_uuuuuu7u:
3267 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
3270 ; AVX2OR512VL-LABEL: shuffle_v8i32_uuuuuu7u:
3271 ; AVX2OR512VL: # %bb.0:
3272 ; AVX2OR512VL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,3,3,3,7,7,7,7]
3273 ; AVX2OR512VL-NEXT: retq
3274 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef>
3275 ret <8 x i32> %shuffle
3278 define <8 x i32> @shuffle_v8i32_32107654_v4i32(<4 x i32> %a, <4 x i32> %b) {
3279 ; ALL-LABEL: shuffle_v8i32_32107654_v4i32:
3281 ; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
3282 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3283 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
3285 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3286 %2 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3287 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3291 define <8 x i32> @shuffle_v8i32_00004444_v4f32(<4 x i32> %a, <4 x i32> %b) {
3292 ; ALL-LABEL: shuffle_v8i32_00004444_v4f32:
3294 ; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
3295 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3296 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
3298 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
3302 define <8 x float> @splat_mem_v8f32_2(ptr %p) {
3303 ; ALL-LABEL: splat_mem_v8f32_2:
3305 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0
3307 %1 = load float, ptr %p
3308 %2 = insertelement <4 x float> undef, float %1, i32 0
3309 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
3313 define <8 x float> @splat_v8f32(<4 x float> %r) {
3314 ; AVX1-LABEL: splat_v8f32:
3316 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
3317 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3320 ; AVX2OR512VL-LABEL: splat_v8f32:
3321 ; AVX2OR512VL: # %bb.0:
3322 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0
3323 ; AVX2OR512VL-NEXT: retq
3324 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
3329 ; Shuffle to logical bit shifts
3332 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
3333 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
3335 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
3336 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
3337 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
3340 ; AVX2OR512VL-LABEL: shuffle_v8i32_z0U2zUz6:
3341 ; AVX2OR512VL: # %bb.0:
3342 ; AVX2OR512VL-NEXT: vpsllq $32, %ymm0, %ymm0
3343 ; AVX2OR512VL-NEXT: retq
3344 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
3345 ret <8 x i32> %shuffle
3348 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
3349 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
3351 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
3352 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
3353 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
3356 ; AVX2OR512VL-LABEL: shuffle_v8i32_1U3z5zUU:
3357 ; AVX2OR512VL: # %bb.0:
3358 ; AVX2OR512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
3359 ; AVX2OR512VL-NEXT: retq
3360 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
3361 ret <8 x i32> %shuffle
3364 define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
3365 ; AVX1-LABEL: shuffle_v8i32_B012F456:
3367 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
3368 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
3371 ; AVX2OR512VL-LABEL: shuffle_v8i32_B012F456:
3372 ; AVX2OR512VL: # %bb.0:
3373 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
3374 ; AVX2OR512VL-NEXT: retq
3375 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
3376 ret <8 x i32> %shuffle
3379 define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
3380 ; AVX1-LABEL: shuffle_v8i32_1238567C:
3382 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
3383 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
3386 ; AVX2OR512VL-LABEL: shuffle_v8i32_1238567C:
3387 ; AVX2OR512VL: # %bb.0:
3388 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
3389 ; AVX2OR512VL-NEXT: retq
3390 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
3391 ret <8 x i32> %shuffle
3394 define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
3395 ; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
3397 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
3398 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
3401 ; AVX2OR512VL-LABEL: shuffle_v8i32_9AB0DEF4:
3402 ; AVX2OR512VL: # %bb.0:
3403 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
3404 ; AVX2OR512VL-NEXT: retq
3405 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
3406 ret <8 x i32> %shuffle
3409 define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
3410 ; AVX1-LABEL: shuffle_v8i32_389A7CDE:
3412 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
3413 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
3416 ; AVX2OR512VL-LABEL: shuffle_v8i32_389A7CDE:
3417 ; AVX2OR512VL: # %bb.0:
3418 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
3419 ; AVX2OR512VL-NEXT: retq
3420 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
3421 ret <8 x i32> %shuffle
3424 define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
3425 ; ALL-LABEL: shuffle_v8i32_30127456:
3427 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
3429 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
3430 ret <8 x i32> %shuffle
3433 define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
3434 ; ALL-LABEL: shuffle_v8i32_12305674:
3436 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
3438 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
3439 ret <8 x i32> %shuffle
3442 define <8x float> @concat_v2f32_1(ptr %tmp64, ptr %tmp65) {
3443 ; ALL-LABEL: concat_v2f32_1:
3444 ; ALL: # %bb.0: # %entry
3445 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
3446 ; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
3449 %tmp74 = load <2 x float>, ptr %tmp65, align 8
3450 %tmp72 = load <2 x float>, ptr %tmp64, align 8
3451 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
3452 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
3453 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
3454 ret <8 x float> %tmp76
3457 define <8x float> @concat_v2f32_2(ptr %tmp64, ptr %tmp65) {
3458 ; ALL-LABEL: concat_v2f32_2:
3459 ; ALL: # %bb.0: # %entry
3460 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
3461 ; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
3464 %tmp74 = load <2 x float>, ptr %tmp65, align 8
3465 %tmp72 = load <2 x float>, ptr %tmp64, align 8
3466 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
3467 ret <8 x float> %tmp76
3470 define <8x float> @concat_v2f32_3(ptr %tmp64, ptr %tmp65) {
3471 ; ALL-LABEL: concat_v2f32_3:
3472 ; ALL: # %bb.0: # %entry
3473 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
3474 ; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
3477 %tmp74 = load <2 x float>, ptr %tmp65, align 8
3478 %tmp72 = load <2 x float>, ptr %tmp64, align 8
3479 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3480 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
3481 ret <8 x float> %res
3484 define <8 x i32> @insert_mem_and_zero_v8i32(ptr %ptr) {
3485 ; ALL-LABEL: insert_mem_and_zero_v8i32:
3487 ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3489 %a = load i32, ptr %ptr
3490 %v = insertelement <8 x i32> undef, i32 %a, i32 0
3491 %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3492 ret <8 x i32> %shuffle
3495 define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) {
3496 ; ALL-LABEL: concat_v8i32_0123CDEF:
3498 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
3500 %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3501 %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3502 %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3506 define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) {
3507 ; AVX1OR2-LABEL: concat_v8i32_4567CDEF_bc:
3509 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
3510 ; AVX1OR2-NEXT: retq
3512 ; AVX512VL-LABEL: concat_v8i32_4567CDEF_bc:
3513 ; AVX512VL: # %bb.0:
3514 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
3515 ; AVX512VL-NEXT: retq
3516 %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3517 %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
3518 %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64>
3519 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
3520 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3521 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32>
3522 ret <8 x i32> %shuffle32
3525 define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) {
3526 ; ALL-LABEL: concat_v8f32_4567CDEF_bc:
3528 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
3530 %a0 = bitcast <8 x float> %f0 to <4 x i64>
3531 %a1 = bitcast <8 x float> %f1 to <8 x i32>
3532 %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
3533 %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3534 %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64>
3535 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
3536 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3537 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float>
3538 ret <8 x float> %shuffle32
3541 define <8 x i32> @insert_dup_mem_v8i32(ptr %ptr) {
3542 ; ALL-LABEL: insert_dup_mem_v8i32:
3544 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0
3546 %tmp = load i32, ptr %ptr, align 4
3547 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
3548 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer
3552 define <8 x i32> @shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b) {
3553 ; AVX1-LABEL: shuffle_v8i32_12345678:
3555 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1]
3556 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
3557 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
3560 ; AVX2-LABEL: shuffle_v8i32_12345678:
3562 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1]
3563 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
3566 ; AVX512VL-LABEL: shuffle_v8i32_12345678:
3567 ; AVX512VL: # %bb.0:
3568 ; AVX512VL-NEXT: valignd {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7],ymm1[0]
3569 ; AVX512VL-NEXT: retq
3570 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
3571 ret <8 x i32> %shuffle
3574 define <8 x i32> @shuffle_v8i32_12345670(<8 x i32> %a) {
3575 ; AVX1-LABEL: shuffle_v8i32_12345670:
3577 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
3578 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
3579 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
3582 ; AVX2-LABEL: shuffle_v8i32_12345670:
3584 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,3,4,5,6,7,0]
3585 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
3588 ; AVX512VL-LABEL: shuffle_v8i32_12345670:
3589 ; AVX512VL: # %bb.0:
3590 ; AVX512VL-NEXT: valignd {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,0]
3591 ; AVX512VL-NEXT: retq
3592 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
3593 ret <8 x i32> %shuffle
3596 define <8 x float> @add_v8f32_02468ACE_13579BDF(<8 x float> %a, <8 x float> %b) {
3597 ; AVX1-LABEL: add_v8f32_02468ACE_13579BDF:
3598 ; AVX1: # %bb.0: # %entry
3599 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
3600 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3601 ; AVX1-NEXT: vhaddps %ymm2, %ymm0, %ymm0
3604 ; AVX2OR512VL-LABEL: add_v8f32_02468ACE_13579BDF:
3605 ; AVX2OR512VL: # %bb.0: # %entry
3606 ; AVX2OR512VL-NEXT: vhaddps %ymm1, %ymm0, %ymm0
3607 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
3608 ; AVX2OR512VL-NEXT: retq
3610 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
3611 %shuffle1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
3612 %add = fadd <8 x float> %shuffle, %shuffle1
3613 ret <8 x float> %add
3616 define <8 x float> @add_v8f32_8ACE0246_9BDF1357(<8 x float> %a, <8 x float> %b) {
3617 ; AVX1-LABEL: add_v8f32_8ACE0246_9BDF1357:
3618 ; AVX1: # %bb.0: # %entry
3619 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm0[2,3]
3620 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3621 ; AVX1-NEXT: vhaddps %ymm2, %ymm0, %ymm0
3624 ; AVX2OR512VL-LABEL: add_v8f32_8ACE0246_9BDF1357:
3625 ; AVX2OR512VL: # %bb.0: # %entry
3626 ; AVX2OR512VL-NEXT: vhaddps %ymm1, %ymm0, %ymm0
3627 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,3,0,2]
3628 ; AVX2OR512VL-NEXT: retq
3630 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6>
3631 %shuffle1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7>
3632 %add = fadd <8 x float> %shuffle, %shuffle1
3633 ret <8 x float> %add
3636 define <8 x i32> @add_v8i32_02468ACE_13579BDF(<8 x i32> %a, <8 x i32> %b) {
3637 ; AVX1-LABEL: add_v8i32_02468ACE_13579BDF:
3638 ; AVX1: # %bb.0: # %entry
3639 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3640 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3641 ; AVX1-NEXT: vphaddd %xmm2, %xmm3, %xmm2
3642 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
3643 ; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0
3644 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3645 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[3],ymm2[3]
3648 ; AVX2OR512VL-LABEL: add_v8i32_02468ACE_13579BDF:
3649 ; AVX2OR512VL: # %bb.0: # %entry
3650 ; AVX2OR512VL-NEXT: vphaddd %ymm1, %ymm0, %ymm0
3651 ; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3652 ; AVX2OR512VL-NEXT: retq
3654 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
3655 %shuffle1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
3656 %add = add <8 x i32> %shuffle, %shuffle1
3660 define <8 x i32> @add_v8i32_8ACE0246_9BDF1357(<8 x i32> %a, <8 x i32> %b) {
3661 ; AVX1-LABEL: add_v8i32_8ACE0246_9BDF1357:
3662 ; AVX1: # %bb.0: # %entry
3663 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3664 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3665 ; AVX1-NEXT: vphaddd %xmm2, %xmm3, %xmm2
3666 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
3667 ; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0
3668 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3669 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[2],ymm2[2]
3672 ; AVX2OR512VL-LABEL: add_v8i32_8ACE0246_9BDF1357:
3673 ; AVX2OR512VL: # %bb.0: # %entry
3674 ; AVX2OR512VL-NEXT: vphaddd %ymm1, %ymm0, %ymm0
3675 ; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,0,2]
3676 ; AVX2OR512VL-NEXT: retq
3678 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6>
3679 %shuffle1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7>
3680 %add = add <8 x i32> %shuffle, %shuffle1
3684 ; This test used to crash due to bad handling of concat_vectors after a bitcast
3685 ; in lowerVectorShuffleAsBroadcast.
3686 define <8 x float> @broadcast_concat_crash(<4 x float> %x, <4 x float> %y, float %z) {
3687 ; AVX1-LABEL: broadcast_concat_crash:
3688 ; AVX1: # %bb.0: # %entry
3689 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm1[3,3,3,3]
3690 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
3691 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3694 ; AVX2-LABEL: broadcast_concat_crash:
3695 ; AVX2: # %bb.0: # %entry
3696 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3697 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
3698 ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
3699 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
3700 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3703 ; AVX512VL-SLOW-LABEL: broadcast_concat_crash:
3704 ; AVX512VL-SLOW: # %bb.0: # %entry
3705 ; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3706 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
3707 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
3708 ; AVX512VL-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
3709 ; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3710 ; AVX512VL-SLOW-NEXT: retq
3712 ; AVX512VL-FAST-LABEL: broadcast_concat_crash:
3713 ; AVX512VL-FAST: # %bb.0: # %entry
3714 ; AVX512VL-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3715 ; AVX512VL-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
3716 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} xmm1 = [1,4,3,3]
3717 ; AVX512VL-FAST-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1
3718 ; AVX512VL-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3719 ; AVX512VL-FAST-NEXT: retq
3721 %tmp = shufflevector <4 x float> %x, <4 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3722 %bc = bitcast <8 x float> %tmp to <4 x i64>
3723 %tmp1 = extractelement <4 x i64> %bc, i32 3
3724 %tmp2 = bitcast i64 %tmp1 to <2 x float>
3725 %tmp4 = extractelement <2 x float> %tmp2, i32 1
3726 %tmp5 = insertelement <8 x float> undef, float %tmp4, i32 4
3727 %tmp6 = insertelement <8 x float> %tmp5, float %z, i32 5
3728 ret <8 x float> %tmp6
3731 ; PR40434: https://bugs.llvm.org/show_bug.cgi?id=40434
3733 define <8 x i32> @unpckh_v8i32(<8 x i32> %x, <8 x i32> %y) {
3734 ; ALL-LABEL: unpckh_v8i32:
3736 ; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1
3737 ; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3739 %unpckh = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 2, i32 14, i32 3, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
3740 ret <8 x i32> %unpckh
3743 ; Same as above but with floats.
3745 define <8 x float> @unpckh_v8f32(<8 x float> %x, <8 x float> %y) {
3746 ; ALL-LABEL: unpckh_v8f32:
3748 ; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1
3749 ; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3751 %unpckh = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 2, i32 14, i32 3, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
3752 ret <8 x float> %unpckh
3755 ; Alternate form of the above - make sure we don't have conflicting transforms.
3757 define <8 x i32> @blend_perm_v8i32(<8 x i32> %x, <8 x i32> %y) {
3758 ; ALL-LABEL: blend_perm_v8i32:
3760 ; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1
3761 ; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3763 %unpckh = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 15>
3764 %r = shufflevector <8 x i32> %unpckh, <8 x i32> undef, <8 x i32> <i32 2, i32 6, i32 3, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
3768 ; Same as above but with floats.
3770 define <8 x float> @blend_perm_v8f32(<8 x float> %x, <8 x float> %y) {
3771 ; ALL-LABEL: blend_perm_v8f32:
3773 ; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1
3774 ; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3776 %unpckh = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 15>
3777 %r = shufflevector <8 x float> %unpckh, <8 x float> undef, <8 x i32> <i32 2, i32 6, i32 3, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
3781 ; Another variation of the above - make sure we don't have conflicting transforms.
3783 define <8 x i32> @unpckh_v8i32_unary(<8 x i32> %x) {
3784 ; ALL-LABEL: unpckh_v8i32_unary:
3786 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm1
3787 ; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3789 %r = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 2, i32 6, i32 3, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
3793 ; Same as above but with floats.
3795 define <8 x float> @unpckh_v8f32_unary(<8 x float> %x) {
3796 ; ALL-LABEL: unpckh_v8f32_unary:
3798 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm1
3799 ; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3801 %r = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 2, i32 6, i32 3, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
3805 ; FIXME: Why are integer and FP (below) lowering different for AVX1?
3807 define <8 x i32> @lowhalf_v8i32(<8 x i32> %x, <8 x i32> %y) {
3808 ; AVX1-LABEL: lowhalf_v8i32:
3810 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
3811 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,2,2,2]
3812 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3815 ; AVX2-LABEL: lowhalf_v8i32:
3817 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
3818 ; AVX2-NEXT: vmovaps {{.*#+}} xmm1 = [2,6,3,6]
3819 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
3822 ; AVX512VL-LABEL: lowhalf_v8i32:
3823 ; AVX512VL: # %bb.0:
3824 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [2,14,3,14]
3825 ; AVX512VL-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
3826 ; AVX512VL-NEXT: retq
3827 %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 2, i32 14, i32 3, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
3831 ; FIXME: AVX1 lowering is better than AVX2 (and AVX512?)
3833 define <8 x float> @lowhalf_v8f32(<8 x float> %x, <8 x float> %y) {
3834 ; AVX1-LABEL: lowhalf_v8f32:
3836 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
3837 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[2,2]
3838 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
3841 ; AVX2-LABEL: lowhalf_v8f32:
3843 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
3844 ; AVX2-NEXT: vmovaps {{.*#+}} xmm1 = [2,6,3,6]
3845 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
3848 ; AVX512VL-LABEL: lowhalf_v8f32:
3849 ; AVX512VL: # %bb.0:
3850 ; AVX512VL-NEXT: vmovaps {{.*#+}} xmm2 = [2,14,3,14]
3851 ; AVX512VL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
3852 ; AVX512VL-NEXT: retq
3853 %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 2, i32 14, i32 3, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>