1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,CHECK-SLOW
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-FAST
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-SLOW
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-FAST
7 ; Shuffle lowest element of some subvector into highest element of some subvector.
8 ; Mainly this is testing how well we avoid subvector extractions/insertions.
9 ; https://bugs.llvm.org/show_bug.cgi?id=50971
11 define <2 x double> @vec128_eltty_double_source_subvec_0_target_subvec_mask_1_unary(<2 x double> %x) nounwind {
12 ; CHECK-LABEL: vec128_eltty_double_source_subvec_0_target_subvec_mask_1_unary:
14 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
16 %r = shufflevector <2 x double> %x, <2 x double> poison, <2 x i32> zeroinitializer
20 define <2 x double> @vec128_eltty_double_source_subvec_0_target_subvec_mask_1_binary(<2 x double> %x, <2 x double> %y) nounwind {
21 ; CHECK-LABEL: vec128_eltty_double_source_subvec_0_target_subvec_mask_1_binary:
23 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
25 %r = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
29 define <2 x i64> @vec128_eltty_i64_source_subvec_0_target_subvec_mask_1_unary(<2 x i64> %x) nounwind {
30 ; CHECK-LABEL: vec128_eltty_i64_source_subvec_0_target_subvec_mask_1_unary:
32 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
34 %r = shufflevector <2 x i64> %x, <2 x i64> poison, <2 x i32> zeroinitializer
38 define <2 x i64> @vec128_eltty_i64_source_subvec_0_target_subvec_mask_1_binary(<2 x i64> %x, <2 x i64> %y) nounwind {
39 ; CHECK-LABEL: vec128_eltty_i64_source_subvec_0_target_subvec_mask_1_binary:
41 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
43 %r = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
47 define <4 x float> @vec128_eltty_float_source_subvec_0_target_subvec_mask_1_unary(<4 x float> %x) nounwind {
48 ; CHECK-LABEL: vec128_eltty_float_source_subvec_0_target_subvec_mask_1_unary:
50 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,2,0]
52 %r = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
56 define <4 x float> @vec128_eltty_float_source_subvec_0_target_subvec_mask_1_binary(<4 x float> %x, <4 x float> %y) nounwind {
57 ; CHECK-LABEL: vec128_eltty_float_source_subvec_0_target_subvec_mask_1_binary:
59 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
61 %r = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
65 define <4 x i32> @vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_unary(<4 x i32> %x) nounwind {
66 ; CHECK-LABEL: vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_unary:
68 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,2,0]
70 %r = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
74 define <4 x i32> @vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_binary(<4 x i32> %x, <4 x i32> %y) nounwind {
75 ; CHECK-LABEL: vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_binary:
77 ; CHECK-NEXT: vbroadcastss %xmm1, %xmm1
78 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
80 %r = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
84 define <8 x i16> @vec128_eltty_i16_source_subvec_0_target_subvec_mask_1_unary(<8 x i16> %x) nounwind {
85 ; CHECK-LABEL: vec128_eltty_i16_source_subvec_0_target_subvec_mask_1_unary:
87 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,0,1]
89 %r = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0>
93 define <8 x i16> @vec128_eltty_i16_source_subvec_0_target_subvec_mask_1_binary(<8 x i16> %x, <8 x i16> %y) nounwind {
94 ; CHECK-LABEL: vec128_eltty_i16_source_subvec_0_target_subvec_mask_1_binary:
96 ; CHECK-NEXT: vpbroadcastw %xmm1, %xmm1
97 ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
99 %r = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8>
103 define <16 x i8> @vec128_eltty_i8_source_subvec_0_target_subvec_mask_1_unary(<16 x i8> %x) nounwind {
104 ; CHECK-LABEL: vec128_eltty_i8_source_subvec_0_target_subvec_mask_1_unary:
106 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0]
108 %r = shufflevector <16 x i8> %x, <16 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
112 define <16 x i8> @vec128_eltty_i8_source_subvec_0_target_subvec_mask_1_binary(<16 x i8> %x, <16 x i8> %y) nounwind {
113 ; CHECK-LABEL: vec128_eltty_i8_source_subvec_0_target_subvec_mask_1_binary:
115 ; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
116 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero
117 ; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
119 %r = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
123 define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_1_unary(<4 x double> %x) nounwind {
124 ; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_1_unary:
126 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
128 %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
132 define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_1_binary(<4 x double> %x, <4 x double> %y) nounwind {
133 ; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_1_binary:
135 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
136 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
138 %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
142 define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_2_unary(<4 x double> %x) nounwind {
143 ; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_2_unary:
145 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,0]
147 %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
151 define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_2_binary(<4 x double> %x, <4 x double> %y) nounwind {
152 ; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_2_binary:
154 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
155 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[2]
157 %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
161 define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_3_unary(<4 x double> %x) nounwind {
162 ; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_3_unary:
164 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
166 %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
170 define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_3_binary(<4 x double> %x, <4 x double> %y) nounwind {
171 ; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_3_binary:
173 ; CHECK-NEXT: vbroadcastsd %xmm1, %ymm1
174 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
176 %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
180 define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_1_unary(<4 x double> %x) nounwind {
181 ; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_1_unary:
183 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
185 %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
189 define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_1_binary(<4 x double> %x, <4 x double> %y) nounwind {
190 ; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_1_binary:
192 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[2,3]
193 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[3]
195 %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
199 define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_2_unary(<4 x double> %x) nounwind {
200 ; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_2_unary:
202 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
204 %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
208 define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_2_binary(<4 x double> %x, <4 x double> %y) nounwind {
209 ; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_2_binary:
211 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
212 ; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
214 %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
218 define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_3_unary(<4 x double> %x) nounwind {
219 ; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_3_unary:
221 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,2]
223 %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
227 define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_3_binary(<4 x double> %x, <4 x double> %y) nounwind {
228 ; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_3_binary:
230 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,2,2,2]
231 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
233 %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 6>
237 define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_1_unary(<4 x i64> %x) nounwind {
238 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_1_unary:
240 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
242 %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
246 define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_1_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
247 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_1_binary:
249 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
250 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
252 %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
256 define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_2_unary(<4 x i64> %x) nounwind {
257 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_2_unary:
259 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,0]
261 %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
265 define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_2_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
266 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_2_binary:
268 ; CHECK-NEXT: vbroadcastsd %xmm1, %ymm1
269 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
271 %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
275 define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_unary(<4 x i64> %x) nounwind {
276 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_unary:
278 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
280 %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
284 define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
285 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary:
287 ; CHECK-NEXT: vbroadcastsd %xmm1, %ymm1
288 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
290 %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
294 define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_1_unary(<4 x i64> %x) nounwind {
295 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_1_unary:
297 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
299 %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
303 define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_1_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
304 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_1_binary:
306 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,2,2,2]
307 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
309 %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
313 define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_unary(<4 x i64> %x) nounwind {
314 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_unary:
316 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
318 %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
322 define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
323 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_binary:
325 ; CHECK-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
326 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
328 %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
332 define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_unary(<4 x i64> %x) nounwind {
333 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_unary:
335 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,2]
337 %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
341 define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
342 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_binary:
344 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,2,2,2]
345 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
347 %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 6>
351 define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_1_unary(<8 x float> %x) nounwind {
352 ; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_1_unary:
354 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,0,4,5,6,7]
356 %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
360 define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_1_binary(<8 x float> %x, <8 x float> %y) nounwind {
361 ; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_1_binary:
363 ; CHECK-NEXT: vbroadcastss %xmm1, %xmm1
364 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7]
366 %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6, i32 7>
370 define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_2_unary(<8 x float> %x) nounwind {
371 ; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_2_unary:
373 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,4,5,6,0]
374 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
376 %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0>
380 define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_2_binary(<8 x float> %x, <8 x float> %y) nounwind {
381 ; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_2_binary:
383 ; CHECK-NEXT: vbroadcastss %xmm1, %ymm1
384 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
386 %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8>
390 define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_3_unary(<8 x float> %x) nounwind {
391 ; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_3_unary:
393 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,0,4,5,6,0]
394 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
396 %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 0>
400 define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_3_binary(<8 x float> %x, <8 x float> %y) nounwind {
401 ; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_3_binary:
403 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,1]
404 ; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
405 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,0],ymm0[4,5],ymm1[6,4]
407 %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6, i32 8>
411 define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_1_unary(<8 x float> %x) nounwind {
412 ; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_1_unary:
414 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,4,4,5,6,7]
415 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
417 %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 7>
421 define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_1_binary(<8 x float> %x, <8 x float> %y) nounwind {
422 ; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_1_binary:
424 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
425 ; CHECK-NEXT: vbroadcastss %xmm1, %xmm1
426 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7]
428 %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 12, i32 4, i32 5, i32 6, i32 7>
432 define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_2_unary(<8 x float> %x) nounwind {
433 ; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_2_unary:
435 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,4]
437 %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>
441 define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_2_binary(<8 x float> %x, <8 x float> %y) nounwind {
442 ; CHECK-SLOW-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_2_binary:
443 ; CHECK-SLOW: # %bb.0:
444 ; CHECK-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm1
445 ; CHECK-SLOW-NEXT: vbroadcastss %xmm1, %ymm1
446 ; CHECK-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
447 ; CHECK-SLOW-NEXT: retq
449 ; CHECK-FAST-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_2_binary:
450 ; CHECK-FAST: # %bb.0:
451 ; CHECK-FAST-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
452 ; CHECK-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
453 ; CHECK-FAST-NEXT: retq
454 %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 12>
458 define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_3_unary(<8 x float> %x) nounwind {
459 ; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_3_unary:
461 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,4,4,5,6,4]
462 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
464 %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 4>
468 define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_3_binary(<8 x float> %x, <8 x float> %y) nounwind {
469 ; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_3_binary:
471 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,3,2,3]
472 ; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
473 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,0],ymm0[4,5],ymm1[6,4]
475 %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 12, i32 4, i32 5, i32 6, i32 12>
479 define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_1_unary(<8 x i32> %x) nounwind {
480 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_1_unary:
482 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,0,4,5,6,7]
483 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
485 %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
489 define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_1_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
490 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_1_binary:
492 ; CHECK-NEXT: vbroadcastss %xmm1, %xmm1
493 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7]
495 %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6, i32 7>
499 define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_2_unary(<8 x i32> %x) nounwind {
500 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_2_unary:
502 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,4,5,6,0]
503 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
505 %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0>
509 define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_2_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
510 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_2_binary:
512 ; CHECK-NEXT: vbroadcastss %xmm1, %ymm1
513 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
515 %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8>
519 define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_3_unary(<8 x i32> %x) nounwind {
520 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_3_unary:
522 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,0,4,5,6,0]
523 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
525 %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 0>
529 define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_3_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
530 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_3_binary:
532 ; CHECK-NEXT: vbroadcastss %xmm1, %ymm1
533 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6],ymm1[7]
535 %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6, i32 8>
539 define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_1_unary(<8 x i32> %x) nounwind {
540 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_1_unary:
542 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,4,4,5,6,7]
543 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
545 %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 7>
549 define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_1_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
550 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_1_binary:
552 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
553 ; CHECK-NEXT: vbroadcastss %xmm1, %xmm1
554 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7]
556 %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 12, i32 4, i32 5, i32 6, i32 7>
560 define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_unary(<8 x i32> %x) nounwind {
561 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_unary:
563 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,4,5,6,4]
564 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
566 %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>
570 define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
571 ; CHECK-SLOW-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_binary:
572 ; CHECK-SLOW: # %bb.0:
573 ; CHECK-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm1
574 ; CHECK-SLOW-NEXT: vbroadcastss %xmm1, %ymm1
575 ; CHECK-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
576 ; CHECK-SLOW-NEXT: retq
578 ; CHECK-FAST-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_binary:
579 ; CHECK-FAST: # %bb.0:
580 ; CHECK-FAST-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
581 ; CHECK-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
582 ; CHECK-FAST-NEXT: retq
583 %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 12>
587 define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_unary(<8 x i32> %x) nounwind {
588 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_unary:
590 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,4,4,5,6,4]
591 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
593 %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 4>
597 define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
598 ; CHECK-SLOW-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary:
599 ; CHECK-SLOW: # %bb.0:
600 ; CHECK-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,3,2,3]
601 ; CHECK-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
602 ; CHECK-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6],ymm1[7]
603 ; CHECK-SLOW-NEXT: retq
605 ; CHECK-FAST-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary:
606 ; CHECK-FAST: # %bb.0:
607 ; CHECK-FAST-NEXT: vbroadcastss {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
608 ; CHECK-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1
609 ; CHECK-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6],ymm1[7]
610 ; CHECK-FAST-NEXT: retq
611 %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 12, i32 4, i32 5, i32 6, i32 12>
615 define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_1_unary(<16 x i16> %x) nounwind {
616 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_1_unary:
618 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,0,1,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
620 %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
624 define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_1_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
625 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_1_binary:
627 ; CHECK-NEXT: vpbroadcastw %xmm1, %xmm1
628 ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
629 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
631 %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
635 define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_2_unary(<16 x i16> %x) nounwind {
636 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_2_unary:
638 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm1
639 ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
640 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
642 %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
646 define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_2_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
647 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_2_binary:
649 ; CHECK-NEXT: vpbroadcastw %xmm1, %ymm1
650 ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
651 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
653 %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
657 define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_3_unary(<16 x i16> %x) nounwind {
658 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_3_unary:
660 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm1
661 ; CHECK-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
663 %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
667 define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_3_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
668 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_3_binary:
670 ; CHECK-NEXT: vpbroadcastw %xmm1, %ymm1
671 ; CHECK-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
673 %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
677 define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_1_unary(<16 x i16> %x) nounwind {
678 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_1_unary:
680 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
681 ; CHECK-NEXT: vpbroadcastw %xmm1, %xmm1
682 ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
683 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
685 %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
689 define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_1_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
690 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_1_binary:
692 ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
693 ; CHECK-NEXT: vpbroadcastw %xmm1, %xmm1
694 ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
695 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
697 %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 24, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
701 define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_2_unary(<16 x i16> %x) nounwind {
702 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_2_unary:
704 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,16,17]
706 %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
710 define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_2_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
711 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_2_binary:
713 ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
714 ; CHECK-NEXT: vpbroadcastw %xmm1, %ymm1
715 ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
716 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
718 %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 24>
722 define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_3_unary(<16 x i16> %x) nounwind {
723 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_3_unary:
725 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
726 ; CHECK-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17]
727 ; CHECK-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
729 %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
733 define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_3_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
734 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_3_binary:
736 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
737 ; CHECK-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17]
738 ; CHECK-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
740 %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 24, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 24>
744 define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_1_unary(<32 x i8> %x) nounwind {
745 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_1_unary:
747 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
749 %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
753 define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_1_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
754 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_1_binary:
756 ; CHECK-NEXT: vpbroadcastb %xmm1, %ymm1
757 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
758 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
760 %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
764 define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_2_unary(<32 x i8> %x) nounwind {
765 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_2_unary:
767 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm1
768 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
769 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
771 %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 0>
775 define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_2_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
776 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_2_binary:
778 ; CHECK-NEXT: vpbroadcastb %xmm1, %ymm1
779 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
780 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
782 %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 32>
786 define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_3_unary(<32 x i8> %x) nounwind {
787 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_3_unary:
789 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
790 ; CHECK-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16]
791 ; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
792 ; CHECK-NEXT: # ymm2 = mem[0,1,0,1]
793 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
795 %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 0>
799 define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_3_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
800 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_3_binary:
802 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,0,1]
803 ; CHECK-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16]
804 ; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
805 ; CHECK-NEXT: # ymm2 = mem[0,1,0,1]
806 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
808 %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 32>
812 define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_1_unary(<32 x i8> %x) nounwind {
813 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_1_unary:
815 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
816 ; CHECK-NEXT: vpbroadcastb %xmm1, %ymm1
817 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
818 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
820 %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
824 define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_1_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
825 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_1_binary:
827 ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
828 ; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
829 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
830 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
832 %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 48, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
836 define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_2_unary(<32 x i8> %x) nounwind {
837 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_2_unary:
839 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,16]
841 %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
845 define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_2_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
846 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_2_binary:
848 ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
849 ; CHECK-NEXT: vpbroadcastb %xmm1, %ymm1
850 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
851 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
853 %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 48>
857 define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_3_unary(<32 x i8> %x) nounwind {
858 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_3_unary:
860 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
861 ; CHECK-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16]
862 ; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
863 ; CHECK-NEXT: # ymm2 = mem[0,1,0,1]
864 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
866 %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
870 define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_3_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
871 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_3_binary:
873 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
874 ; CHECK-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16]
875 ; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
876 ; CHECK-NEXT: # ymm2 = mem[0,1,0,1]
877 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
879 %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 48, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 48>