1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,CHECK-SLOW
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-FAST
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-SLOW
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-FAST
7 ; Shuffle lowest element of some subvector into highest element of some subvector.
8 ; Mainly this is testing how well we avoid subvector extractions/insertions.
9 ; https://bugs.llvm.org/show_bug.cgi?id=50971
11 define <2 x double> @vec128_eltty_double_source_subvec_0_target_subvec_mask_1_unary(<2 x double> %x) nounwind {
12 ; CHECK-LABEL: vec128_eltty_double_source_subvec_0_target_subvec_mask_1_unary:
14 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
16 %r = shufflevector <2 x double> %x, <2 x double> poison, <2 x i32> zeroinitializer
20 define <2 x double> @vec128_eltty_double_source_subvec_0_target_subvec_mask_1_binary(<2 x double> %x, <2 x double> %y) nounwind {
21 ; CHECK-LABEL: vec128_eltty_double_source_subvec_0_target_subvec_mask_1_binary:
23 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
25 %r = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
29 define <2 x i64> @vec128_eltty_i64_source_subvec_0_target_subvec_mask_1_unary(<2 x i64> %x) nounwind {
30 ; CHECK-LABEL: vec128_eltty_i64_source_subvec_0_target_subvec_mask_1_unary:
32 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
34 %r = shufflevector <2 x i64> %x, <2 x i64> poison, <2 x i32> zeroinitializer
38 define <2 x i64> @vec128_eltty_i64_source_subvec_0_target_subvec_mask_1_binary(<2 x i64> %x, <2 x i64> %y) nounwind {
39 ; CHECK-LABEL: vec128_eltty_i64_source_subvec_0_target_subvec_mask_1_binary:
41 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
43 %r = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
47 define <4 x float> @vec128_eltty_float_source_subvec_0_target_subvec_mask_1_unary(<4 x float> %x) nounwind {
48 ; CHECK-LABEL: vec128_eltty_float_source_subvec_0_target_subvec_mask_1_unary:
50 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,0]
52 %r = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
56 define <4 x float> @vec128_eltty_float_source_subvec_0_target_subvec_mask_1_binary(<4 x float> %x, <4 x float> %y) nounwind {
57 ; CHECK-LABEL: vec128_eltty_float_source_subvec_0_target_subvec_mask_1_binary:
59 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
61 %r = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
65 define <4 x i32> @vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_unary(<4 x i32> %x) nounwind {
66 ; CHECK-LABEL: vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_unary:
68 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,0]
70 %r = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
74 define <4 x i32> @vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_binary(<4 x i32> %x, <4 x i32> %y) nounwind {
75 ; CHECK-LABEL: vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_binary:
77 ; CHECK-NEXT: vbroadcastss %xmm1, %xmm1
78 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
80 %r = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
84 define <8 x i16> @vec128_eltty_i16_source_subvec_0_target_subvec_mask_1_unary(<8 x i16> %x) nounwind {
85 ; CHECK-LABEL: vec128_eltty_i16_source_subvec_0_target_subvec_mask_1_unary:
87 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,0,1]
89 %r = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0>
93 define <8 x i16> @vec128_eltty_i16_source_subvec_0_target_subvec_mask_1_binary(<8 x i16> %x, <8 x i16> %y) nounwind {
94 ; CHECK-LABEL: vec128_eltty_i16_source_subvec_0_target_subvec_mask_1_binary:
96 ; CHECK-NEXT: vpbroadcastw %xmm1, %xmm1
97 ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
99 %r = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8>
103 define <16 x i8> @vec128_eltty_i8_source_subvec_0_target_subvec_mask_1_unary(<16 x i8> %x) nounwind {
104 ; CHECK-LABEL: vec128_eltty_i8_source_subvec_0_target_subvec_mask_1_unary:
106 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0]
108 %r = shufflevector <16 x i8> %x, <16 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
112 define <16 x i8> @vec128_eltty_i8_source_subvec_0_target_subvec_mask_1_binary(<16 x i8> %x, <16 x i8> %y) nounwind {
113 ; CHECK-LABEL: vec128_eltty_i8_source_subvec_0_target_subvec_mask_1_binary:
115 ; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
116 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero
117 ; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
119 %r = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
123 define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_1_unary(<4 x double> %x) nounwind {
124 ; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_1_unary:
126 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
128 %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
132 define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_1_binary(<4 x double> %x, <4 x double> %y) nounwind {
133 ; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_1_binary:
135 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
136 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
138 %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
142 define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_2_unary(<4 x double> %x) nounwind {
143 ; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_2_unary:
145 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,0]
147 %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
151 define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_2_binary(<4 x double> %x, <4 x double> %y) nounwind {
152 ; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_2_binary:
154 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
155 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[2]
157 %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
161 define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_3_unary(<4 x double> %x) nounwind {
162 ; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_3_unary:
164 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
166 %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
170 define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_3_binary(<4 x double> %x, <4 x double> %y) nounwind {
171 ; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_3_binary:
173 ; CHECK-NEXT: vbroadcastsd %xmm1, %ymm1
174 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
176 %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
180 define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_1_unary(<4 x double> %x) nounwind {
181 ; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_1_unary:
183 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
185 %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
189 define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_1_binary(<4 x double> %x, <4 x double> %y) nounwind {
190 ; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_1_binary:
192 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[2,3]
193 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[3]
195 %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
199 define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_2_unary(<4 x double> %x) nounwind {
200 ; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_2_unary:
202 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
204 %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
208 define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_2_binary(<4 x double> %x, <4 x double> %y) nounwind {
209 ; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_2_binary:
211 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
212 ; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
214 %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
218 define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_3_unary(<4 x double> %x) nounwind {
219 ; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_3_unary:
221 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,2]
223 %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
227 define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_3_binary(<4 x double> %x, <4 x double> %y) nounwind {
228 ; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_3_binary:
230 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,2,2,2]
231 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
233 %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 6>
237 define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_1_unary(<4 x i64> %x) nounwind {
238 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_1_unary:
240 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
242 %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
246 define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_1_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
247 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_1_binary:
249 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
250 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
252 %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
256 define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_2_unary(<4 x i64> %x) nounwind {
257 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_2_unary:
259 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,0]
261 %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
265 define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_2_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
266 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_2_binary:
268 ; CHECK-NEXT: vbroadcastsd %xmm1, %ymm1
269 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
271 %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
275 define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_unary(<4 x i64> %x) nounwind {
276 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_unary:
278 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
280 %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
284 define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
285 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary:
287 ; CHECK-NEXT: vbroadcastsd %xmm1, %ymm1
288 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
290 %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
294 define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_1_unary(<4 x i64> %x) nounwind {
295 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_1_unary:
297 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
299 %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
303 define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_1_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
304 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_1_binary:
306 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,2,2,2]
307 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
309 %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
313 define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_unary(<4 x i64> %x) nounwind {
314 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_unary:
316 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
318 %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
322 define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
323 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_binary:
325 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
326 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
328 %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
332 define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_unary(<4 x i64> %x) nounwind {
333 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_unary:
335 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,2]
337 %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
341 define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
342 ; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_binary:
344 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,2,2,2]
345 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
347 %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 6>
351 define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_1_unary(<8 x float> %x) nounwind {
352 ; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_1_unary:
354 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,0,4,5,6,7]
356 %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
360 define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_1_binary(<8 x float> %x, <8 x float> %y) nounwind {
361 ; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_1_binary:
363 ; CHECK-NEXT: vbroadcastss %xmm1, %xmm1
364 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7]
366 %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6, i32 7>
370 define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_2_unary(<8 x float> %x) nounwind {
371 ; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_2_unary:
373 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,4,5,6,0]
374 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
376 %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0>
380 define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_2_binary(<8 x float> %x, <8 x float> %y) nounwind {
381 ; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_2_binary:
383 ; CHECK-NEXT: vbroadcastss %xmm1, %ymm1
384 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
386 %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8>
390 define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_3_unary(<8 x float> %x) nounwind {
391 ; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_3_unary:
393 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,0,4,5,6,0]
394 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
396 %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 0>
400 define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_3_binary(<8 x float> %x, <8 x float> %y) nounwind {
401 ; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_3_binary:
403 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,1]
404 ; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
405 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,0],ymm0[4,5],ymm1[6,4]
407 %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6, i32 8>
411 define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_1_unary(<8 x float> %x) nounwind {
412 ; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_1_unary:
414 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,4,4,5,6,7]
415 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
417 %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 7>
421 define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_1_binary(<8 x float> %x, <8 x float> %y) nounwind {
422 ; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_1_binary:
424 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
425 ; CHECK-NEXT: vbroadcastss %xmm1, %xmm1
426 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7]
428 %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 12, i32 4, i32 5, i32 6, i32 7>
432 define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_2_unary(<8 x float> %x) nounwind {
433 ; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_2_unary:
435 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,4]
437 %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>
441 define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_2_binary(<8 x float> %x, <8 x float> %y) nounwind {
442 ; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_2_binary:
444 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
445 ; CHECK-NEXT: vbroadcastss %xmm1, %ymm1
446 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
448 %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 12>
452 define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_3_unary(<8 x float> %x) nounwind {
453 ; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_3_unary:
455 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,4,4,5,6,4]
456 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
458 %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 4>
462 define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_3_binary(<8 x float> %x, <8 x float> %y) nounwind {
463 ; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_3_binary:
465 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,3,2,3]
466 ; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
467 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,0],ymm0[4,5],ymm1[6,4]
469 %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 12, i32 4, i32 5, i32 6, i32 12>
473 define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_1_unary(<8 x i32> %x) nounwind {
474 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_1_unary:
476 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,0,4,5,6,7]
477 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
479 %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
483 define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_1_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
484 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_1_binary:
486 ; CHECK-NEXT: vbroadcastss %xmm1, %xmm1
487 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7]
489 %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6, i32 7>
493 define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_2_unary(<8 x i32> %x) nounwind {
494 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_2_unary:
496 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,4,5,6,0]
497 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
499 %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0>
503 define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_2_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
504 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_2_binary:
506 ; CHECK-NEXT: vbroadcastss %xmm1, %ymm1
507 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
509 %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8>
513 define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_3_unary(<8 x i32> %x) nounwind {
514 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_3_unary:
516 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,0,4,5,6,0]
517 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
519 %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 0>
523 define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_3_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
524 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_3_binary:
526 ; CHECK-NEXT: vbroadcastss %xmm1, %ymm1
527 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6],ymm1[7]
529 %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6, i32 8>
533 define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_1_unary(<8 x i32> %x) nounwind {
534 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_1_unary:
536 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,4,4,5,6,7]
537 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
539 %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 7>
543 define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_1_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
544 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_1_binary:
546 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
547 ; CHECK-NEXT: vbroadcastss %xmm1, %xmm1
548 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7]
550 %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 12, i32 4, i32 5, i32 6, i32 7>
554 define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_unary(<8 x i32> %x) nounwind {
555 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_unary:
557 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,4,5,6,4]
558 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
560 %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>
564 define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
565 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_binary:
567 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
568 ; CHECK-NEXT: vbroadcastss %xmm1, %ymm1
569 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
571 %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 12>
575 define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_unary(<8 x i32> %x) nounwind {
576 ; CHECK-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_unary:
578 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,4,4,5,6,4]
579 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
581 %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 4>
585 define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
586 ; CHECK-SLOW-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary:
587 ; CHECK-SLOW: # %bb.0:
588 ; CHECK-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,3,2,3]
589 ; CHECK-SLOW-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
590 ; CHECK-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6],ymm1[7]
591 ; CHECK-SLOW-NEXT: retq
593 ; CHECK-FAST-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary:
594 ; CHECK-FAST: # %bb.0:
595 ; CHECK-FAST-NEXT: vbroadcastss {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
596 ; CHECK-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1
597 ; CHECK-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6],ymm1[7]
598 ; CHECK-FAST-NEXT: retq
599 %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 12, i32 4, i32 5, i32 6, i32 12>
603 define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_1_unary(<16 x i16> %x) nounwind {
604 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_1_unary:
606 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,0,1,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
608 %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
612 define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_1_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
613 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_1_binary:
615 ; CHECK-NEXT: vpbroadcastw %xmm1, %xmm1
616 ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
617 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
619 %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
623 define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_2_unary(<16 x i16> %x) nounwind {
624 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_2_unary:
626 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm1
627 ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
628 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
630 %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
634 define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_2_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
635 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_2_binary:
637 ; CHECK-NEXT: vpbroadcastw %xmm1, %ymm1
638 ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
639 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
641 %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
645 define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_3_unary(<16 x i16> %x) nounwind {
646 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_3_unary:
648 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm1
649 ; CHECK-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
651 %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
655 define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_3_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
656 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_3_binary:
658 ; CHECK-NEXT: vpbroadcastw %xmm1, %ymm1
659 ; CHECK-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
661 %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
665 define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_1_unary(<16 x i16> %x) nounwind {
666 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_1_unary:
668 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
669 ; CHECK-NEXT: vpbroadcastw %xmm1, %xmm1
670 ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
671 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
673 %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
677 define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_1_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
678 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_1_binary:
680 ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
681 ; CHECK-NEXT: vpbroadcastw %xmm1, %xmm1
682 ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
683 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
685 %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 24, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
689 define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_2_unary(<16 x i16> %x) nounwind {
690 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_2_unary:
692 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,16,17]
694 %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
698 define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_2_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
699 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_2_binary:
701 ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
702 ; CHECK-NEXT: vpbroadcastw %xmm1, %ymm1
703 ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
704 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
706 %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 24>
710 define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_3_unary(<16 x i16> %x) nounwind {
711 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_3_unary:
713 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
714 ; CHECK-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17]
715 ; CHECK-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
717 %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
721 define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_3_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
722 ; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_3_binary:
724 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
725 ; CHECK-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17]
726 ; CHECK-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
728 %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 24, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 24>
732 define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_1_unary(<32 x i8> %x) nounwind {
733 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_1_unary:
735 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
737 %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
741 define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_1_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
742 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_1_binary:
744 ; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
745 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
746 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
748 %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
752 define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_2_unary(<32 x i8> %x) nounwind {
753 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_2_unary:
755 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm1
756 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
757 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
759 %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 0>
763 define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_2_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
764 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_2_binary:
766 ; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
767 ; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
768 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
769 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
771 %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 32>
775 define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_3_unary(<32 x i8> %x) nounwind {
776 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_3_unary:
778 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
779 ; CHECK-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16]
780 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
781 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
783 %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 0>
787 define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_3_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
788 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_3_binary:
790 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,0,1]
791 ; CHECK-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16]
792 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
793 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
795 %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 32>
799 define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_1_unary(<32 x i8> %x) nounwind {
800 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_1_unary:
802 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
803 ; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
804 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
805 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
807 %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
811 define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_1_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
812 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_1_binary:
814 ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
815 ; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
816 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
817 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
819 %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 48, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
823 define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_2_unary(<32 x i8> %x) nounwind {
824 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_2_unary:
826 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,16]
828 %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
832 define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_2_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
833 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_2_binary:
835 ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
836 ; CHECK-NEXT: vpbroadcastb %xmm1, %ymm1
837 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
838 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
840 %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 48>
844 define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_3_unary(<32 x i8> %x) nounwind {
845 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_3_unary:
847 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
848 ; CHECK-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16]
849 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
850 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
852 %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
856 define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_3_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
857 ; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_3_binary:
859 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
860 ; CHECK-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16]
861 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
862 ; CHECK-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
864 %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 48, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 48>