1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512F
3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw,+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512BW
5 target triple = "x86_64-unknown-unknown"
7 define <16 x float> @shuffle_v16f32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x float> %a, <16 x float> %b) {
8 ; ALL-LABEL: shuffle_v16f32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
10 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0
12 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
13 ret <16 x float> %shuffle
16 define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08(<16 x float> %a, <16 x float> %b) {
17 ; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
19 ; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm0
20 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0
22 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
23 ret <16 x float> %shuffle
26 define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc(<16 x i32> %a, <16 x i32> %b) {
27 ; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc:
29 ; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm0
30 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0
32 %tmp0 = bitcast <16 x i32> %a to <16 x float>
33 %tmp1 = bitcast <16 x i32> %b to <16 x float>
34 %shuffle = shufflevector <16 x float> %tmp0, <16 x float> %tmp1, <16 x i32><i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
35 ret <16 x float> %shuffle
38 define <16 x float> @shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x float> %a, <16 x float> %b) {
39 ; ALL-LABEL: shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d:
41 ; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
43 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
44 ret <16 x float> %shuffle
47 define <16 x float> @shuffle_v16f32_00_zz_01_zz_04_zz_05_zz_08_zz_09_zz_0c_zz_0d_zz(<16 x float> %a, <16 x float> %b) {
48 ; ALL-LABEL: shuffle_v16f32_00_zz_01_zz_04_zz_05_zz_08_zz_09_zz_0c_zz_0d_zz:
50 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
51 ; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
53 %shuffle = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <16 x i32><i32 0, i32 16, i32 1, i32 16, i32 4, i32 16, i32 5, i32 16, i32 8, i32 16, i32 9, i32 16, i32 12, i32 16, i32 13, i32 16>
54 ret <16 x float> %shuffle
57 define <16 x float> @shuffle_v16f32_vunpcklps_swap(<16 x float> %a, <16 x float> %b) {
58 ; ALL-LABEL: shuffle_v16f32_vunpcklps_swap:
60 ; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[12],zmm0[12],zmm1[13],zmm0[13]
62 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 20, i32 4, i32 21, i32 5, i32 24, i32 8, i32 25, i32 9, i32 28, i32 12, i32 29, i32 13>
63 ret <16 x float> %shuffle
67 define <16 x float> @shuffle_v16f32_01_01_03_00_06_04_05_07_08_08_09_09_15_14_14_12(<16 x float> %a0) {
68 ; ALL-LABEL: shuffle_v16f32_01_01_03_00_06_04_05_07_08_08_09_09_15_14_14_12:
70 ; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,1,3,0,6,4,5,7,8,8,9,9,15,14,14,12]
72 %shuffle = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 0, i32 6, i32 4, i32 5, i32 7, i32 8, i32 8, i32 9, i32 9, i32 15, i32 14, i32 14, i32 12>
73 ret <16 x float> %shuffle
76 define <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x i32> %a, <16 x i32> %b) {
77 ; ALL-LABEL: shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d:
79 ; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
81 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
82 ret <16 x i32> %shuffle
85 define <16 x i32> @shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d(<16 x i32> %a, <16 x i32> %b) {
86 ; ALL-LABEL: shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d:
88 ; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0
89 ; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
91 %shuffle = shufflevector <16 x i32> zeroinitializer, <16 x i32> %b, <16 x i32><i32 15, i32 16, i32 13, i32 17, i32 11, i32 20, i32 9, i32 21, i32 7, i32 24, i32 5, i32 25, i32 3, i32 28, i32 1, i32 29>
92 ret <16 x i32> %shuffle
95 define <16 x float> @shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x float> %a, <16 x float> %b) {
96 ; ALL-LABEL: shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f:
98 ; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
100 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
101 ret <16 x float> %shuffle
104 define <16 x float> @shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz_1f(<16 x float> %a, <16 x float> %b) {
105 ; ALL-LABEL: shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz_1f:
107 ; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0
108 ; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
110 %shuffle = shufflevector <16 x float> zeroinitializer, <16 x float> %b, <16 x i32><i32 0, i32 18, i32 0, i32 19, i32 4, i32 22, i32 4, i32 23, i32 6, i32 26, i32 6, i32 27, i32 8, i32 30, i32 8, i32 31>
111 ret <16 x float> %shuffle
114 define <16 x float> @shuffle_v16f32_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x float> %a, <16 x float> %b) {
115 ; ALL-LABEL: shuffle_v16f32_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
117 ; ALL-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
119 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
120 ret <16 x float> %shuffle
123 define <16 x float> @shuffle_v16f32_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x float> %a, <16 x float> %b) {
124 ; ALL-LABEL: shuffle_v16f32_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
126 ; ALL-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
128 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
129 ret <16 x float> %shuffle
132 define <16 x float> @shuffle_v16f32_00_01_00_01_06_07_06_07_08_09_10_11_12_13_12_13(<16 x float> %a, <16 x float> %b) {
133 ; ALL-LABEL: shuffle_v16f32_00_01_00_01_06_07_06_07_08_09_10_11_12_13_12_13:
135 ; ALL-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,3,3,4,5,6,6]
137 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 12, i32 13>
138 ret <16 x float> %shuffle
141 define <16 x float> @shuffle_v16f32_00_00_02_00_04_04_06_04_08_08_10_08_12_12_14_12(<16 x float> %a, <16 x float> %b) {
142 ; ALL-LABEL: shuffle_v16f32_00_00_02_00_04_04_06_04_08_08_10_08_12_12_14_12:
144 ; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4,8,8,10,8,12,12,14,12]
146 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4, i32 8, i32 8, i32 10, i32 8, i32 12, i32 12, i32 14, i32 12>
147 ret <16 x float> %shuffle
150 define <16 x float> @shuffle_v16f32_03_uu_uu_uu_uu_04_uu_uu_uu_uu_11_uu_uu_uu_uu_12(<16 x float> %a, <16 x float> %b) {
151 ; ALL-LABEL: shuffle_v16f32_03_uu_uu_uu_uu_04_uu_uu_uu_uu_11_uu_uu_uu_uu_12:
153 ; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,0,3,0,7,4,7,4,11,8,11,8,15,12,15,12]
155 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12>
156 ret <16 x float> %shuffle
160 define <16 x float> @shuffle_v16f32_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x float> %a) {
161 ; ALL-LABEL: shuffle_v16f32_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
163 ; ALL-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
165 %tmp1 = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 undef, i32 17, i32 undef, i32 19, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 9, i32 undef, i32 11, i32 undef, i32 13, i32 undef, i32 15>
166 %tmp2 = shufflevector <16 x float> %tmp1, <16 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
167 ret <16 x float> %tmp2
171 define <16 x float> @shuffle_v16f32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29(<16 x float> %a, <16 x float> %b) {
172 ; ALL-LABEL: shuffle_v16f32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29:
174 ; ALL-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1],zmm1[0],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[7],zmm1[6]
176 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 20, i32 21, i32 10, i32 11, i32 24, i32 25, i32 14, i32 15, i32 28, i32 29>
177 ret <16 x float> %shuffle
180 define <16 x i32> @shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i32> %a, <16 x i32> %b) {
181 ; ALL-LABEL: shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
183 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0
185 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
186 ret <16 x i32> %shuffle
189 define <16 x i32> @shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<16 x i32> %a, <16 x i32> %b) {
190 ; ALL-LABEL: shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04:
192 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0
193 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0
195 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
196 ret <16 x i32> %shuffle
199 define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x i32> %a, <16 x i32> %b) {
200 ; ALL-LABEL: shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f:
202 ; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
204 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
205 ret <16 x i32> %shuffle
208 define <16 x i32> @shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz(<16 x i32> %a, <16 x i32> %b) {
209 ; ALL-LABEL: shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz:
211 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
212 ; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
214 %shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32><i32 2, i32 30, i32 3, i32 28, i32 6, i32 26, i32 7, i32 24, i32 10, i32 22, i32 11, i32 20, i32 14, i32 18, i32 15, i32 16>
215 ret <16 x i32> %shuffle
218 define <16 x i32> @shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28(<16 x i32> %a, <16 x i32> %b) {
219 ; AVX512F-LABEL: shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28:
221 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,2,3,16,5,6,7,20,9,10,11,24,13,14,15,28]
222 ; AVX512F-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
225 ; AVX512BW-LABEL: shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28:
227 ; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1,2,3],zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zmm1[16,17,18,19],zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zmm1[32,33,34,35],zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zmm1[48,49,50,51]
228 ; AVX512BW-NEXT: retq
229 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 1, i32 2, i32 3, i32 16, i32 5, i32 6, i32 7, i32 20, i32 9, i32 10, i32 11, i32 24, i32 13, i32 14, i32 15, i32 28>
230 ret <16 x i32> %shuffle
233 define <16 x float> @shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x float> %a) {
234 ; ALL-LABEL: shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01:
236 ; ALL-NEXT: vmovaps {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
237 ; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
239 %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
243 define <16 x i32> @shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x i32> %a) {
244 ; ALL-LABEL: shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01:
246 ; ALL-NEXT: vmovaps {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
247 ; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
249 %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
253 define <16 x i32> @shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32> %b) {
254 ; ALL-LABEL: shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
256 ; ALL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
257 ; ALL-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
259 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
263 define <16 x float> @shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x float> %a, <16 x float> %b) {
264 ; ALL-LABEL: shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
266 ; ALL-NEXT: vmovaps {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
267 ; ALL-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
269 %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
274 define <16 x i32> @shuffle_v16i32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04(<16 x i32> %a) {
275 ; ALL-LABEL: shuffle_v16i32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04:
277 ; ALL-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
278 ; ALL-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3]
280 %1 = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
284 define <16 x float> @shuffle_v16f32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04(<16 x float> %a) {
285 ; ALL-LABEL: shuffle_v16f32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04:
287 ; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
288 ; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3]
290 %1 = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
294 define <16 x float> @shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x float> %a, <16 x float>* %b) {
295 ; ALL-LABEL: shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
297 ; ALL-NEXT: vmovaps {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
298 ; ALL-NEXT: vpermt2ps (%rdi), %zmm1, %zmm0
300 %c = load <16 x float>, <16 x float>* %b
301 %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
305 define <16 x float> @shuffle_v16f32_load_08_11_10_00_12_15_14_04(<16 x float> %a0, <16 x float>* %a1) {
306 ; ALL-LABEL: shuffle_v16f32_load_08_11_10_00_12_15_14_04:
308 ; ALL-NEXT: vshufps {{.*#+}} zmm1 = zmm0[2,0],mem[0,0],zmm0[6,4],mem[4,4],zmm0[10,8],mem[8,8],zmm0[14,12],mem[12,12]
309 ; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,3],zmm1[0,2],zmm0[4,7],zmm1[4,6],zmm0[8,11],zmm1[8,10],zmm0[12,15],zmm1[12,14]
311 %1 = load <16 x float>, <16 x float>* %a1
312 %2 = shufflevector <16 x float> %1, <16 x float> %a0, <16 x i32> <i32 16, i32 19, i32 18, i32 0, i32 20, i32 23, i32 22, i32 4, i32 24, i32 27, i32 26, i32 8, i32 28, i32 31, i32 30, i32 12>
316 define <16 x i32> @shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32>* %b) {
317 ; ALL-LABEL: shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
319 ; ALL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
320 ; ALL-NEXT: vpermt2d (%rdi), %zmm1, %zmm0
322 %c = load <16 x i32>, <16 x i32>* %b
323 %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
327 define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b) {
328 ; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
330 ; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
332 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
336 ;FIXME: can do better with vpcompress
337 define <8 x i32> @test_v16i32_1_3_5_7_9_11_13_15(<16 x i32> %v) {
338 ; ALL-LABEL: test_v16i32_1_3_5_7_9_11_13_15:
340 ; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
341 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
342 ; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
344 %res = shufflevector <16 x i32> %v, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
348 ;FIXME: can do better with vpcompress
349 define <4 x i32> @test_v16i32_0_1_2_12 (<16 x i32> %v) {
350 ; ALL-LABEL: test_v16i32_0_1_2_12:
352 ; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
353 ; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1
354 ; ALL-NEXT: vbroadcastss %xmm1, %xmm1
355 ; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
356 ; ALL-NEXT: vzeroupper
358 %res = shufflevector <16 x i32> %v, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 12>
363 ;FIXME: can do better with vpcompress
364 define <4 x i32> @test_v16i32_0_4_8_12(<16 x i32> %v) {
365 ; ALL-LABEL: test_v16i32_0_4_8_12:
367 ; ALL-NEXT: vmovaps {{.*#+}} xmm1 = [0,4,8,12]
368 ; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
369 ; ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
370 ; ALL-NEXT: vzeroupper
372 %res = shufflevector <16 x i32> %v, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
376 define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) {
377 ; ALL-LABEL: shuffle_v16f32_extract_256:
379 ; ALL-NEXT: vmovups 32(%rsi), %ymm0
381 %ptr_a = bitcast float* %a to <16 x float>*
382 %v_a = load <16 x float>, <16 x float>* %ptr_a, align 4
383 %v2 = shufflevector <16 x float> %v_a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
387 ;FIXME: can do better with vcompressp
388 define <8 x float> @test_v16f32_0_1_2_3_4_6_7_10 (<16 x float> %v) {
389 ; ALL-LABEL: test_v16f32_0_1_2_3_4_6_7_10:
391 ; ALL-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,4,6,7,10]
392 ; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
393 ; ALL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
395 %res = shufflevector <16 x float> %v, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 7, i32 10>
399 ;FIXME: can do better with vcompressp
400 define <4 x float> @test_v16f32_0_1_3_6 (<16 x float> %v) {
401 ; ALL-LABEL: test_v16f32_0_1_3_6:
403 ; ALL-NEXT: vmovaps {{.*#+}} xmm1 = [0,1,3,6]
404 ; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
405 ; ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
406 ; ALL-NEXT: vzeroupper
408 %res = shufflevector <16 x float> %v, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 6>
412 define <16 x i32> @shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12(<16 x i32> %a, <16 x i32> %b) {
413 ; ALL-LABEL: shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12:
415 ; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
417 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
421 define <16 x i32> @shuffle_v16i16_3_3_0_0_7_7_4_4_11_11_8_8_15_15_12_12(<16 x i32> %a, <16 x i32> %b) {
422 ; ALL-LABEL: shuffle_v16i16_3_3_0_0_7_7_4_4_11_11_8_8_15_15_12_12:
424 ; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
426 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
430 define <16 x float> @shuffle_v16f32_00_01_10_10_04_05_14_14_08_09_18_18_0c_0d_1c_1c(<16 x float> %a, <16 x float> %b) {
431 ; ALL-LABEL: shuffle_v16f32_00_01_10_10_04_05_14_14_08_09_18_18_0c_0d_1c_1c:
433 ; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,1],zmm1[0,0],zmm0[4,5],zmm1[4,4],zmm0[8,9],zmm1[8,8],zmm0[12,13],zmm1[12,12]
435 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
436 ret <16 x float> %shuffle
439 define <16 x i32> @insert_mem_and_zero_v16i32(i32* %ptr) {
440 ; ALL-LABEL: insert_mem_and_zero_v16i32:
442 ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
444 %a = load i32, i32* %ptr
445 %v = insertelement <16 x i32> undef, i32 %a, i32 0
446 %shuffle = shufflevector <16 x i32> %v, <16 x i32> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
447 ret <16 x i32> %shuffle
451 define <16 x i32> @shuffle_v16i32_0zzzzzzzzzzzzzzz(<16 x i32> %a) {
452 ; ALL-LABEL: shuffle_v16i32_0zzzzzzzzzzzzzzz:
454 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
455 ; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
457 %shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
458 ret <16 x i32> %shuffle
461 define <16 x float> @shuffle_v16f32_0zzzzzzzzzzzzzzz(<16 x float> %a) {
462 ; ALL-LABEL: shuffle_v16f32_0zzzzzzzzzzzzzzz:
464 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
465 ; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
467 %shuffle = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
468 ret <16 x float> %shuffle
471 define <16 x i32> @shuffle_v16i32_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_23_zz(<16 x i32> %a) {
472 ; ALL-LABEL: shuffle_v16i32_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_23_zz:
474 ; ALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
476 %shuffle = shufflevector <16 x i32> zeroinitializer, <16 x i32> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0>
477 ret <16 x i32> %shuffle
480 define <16 x i32> @shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i32> %a, <16 x i32> %b) {
481 ; ALL-LABEL: shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
483 ; ALL-NEXT: valignd {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0]
485 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
486 ret <16 x i32> %shuffle
489 define <16 x i32> @shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i32> %a) {
490 ; ALL-LABEL: shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
492 ; ALL-NEXT: valignd {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
494 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0>
495 ret <16 x i32> %shuffle
498 define <16 x i32> @shuffle_v16i32_00_03_16_19_04_07_20_23_08_11_24_27_12_15_28_31(<16 x i32> %a, <16 x i32> %b) {
499 ; ALL-LABEL: shuffle_v16i32_00_03_16_19_04_07_20_23_08_11_24_27_12_15_28_31:
501 ; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,3],zmm1[0,3],zmm0[4,7],zmm1[4,7],zmm0[8,11],zmm1[8,11],zmm0[12,15],zmm1[12,15]
503 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 3, i32 16, i32 19, i32 4, i32 7, i32 20, i32 23, i32 8, i32 11, i32 24, i32 27, i32 12, i32 15, i32 28, i32 31>
504 ret <16 x i32> %shuffle
507 define <16 x i32> @shuffle_v16i32_16_16_02_03_20_20_06_07_24_24_10_11_28_28_uu_uu(<16 x i32> %a, <16 x i32> %b) {
508 ; ALL-LABEL: shuffle_v16i32_16_16_02_03_20_20_06_07_24_24_10_11_28_28_uu_uu:
510 ; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm1[0,0],zmm0[2,3],zmm1[4,4],zmm0[6,7],zmm1[8,8],zmm0[10,11],zmm1[12,12],zmm0[14,15]
512 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 16, i32 16, i32 02, i32 03, i32 20, i32 20, i32 06, i32 07, i32 24, i32 24, i32 10, i32 11, i32 28, i32 28, i32 undef, i32 undef>
513 ret <16 x i32> %shuffle
517 define <16 x i32> @shuffle_v16i32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29(<16 x i32> %a, <16 x i32> %b) {
518 ; AVX512F-LABEL: shuffle_v16i32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29:
520 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,3,10,5,12,7,14]
521 ; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
524 ; AVX512BW-LABEL: shuffle_v16i32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29:
526 ; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,4,5,6,7],zmm0[24,25,26,27,28,29,30,31],zmm1[16,17,18,19,20,21,22,23],zmm0[40,41,42,43,44,45,46,47],zmm1[32,33,34,35,36,37,38,39],zmm0[56,57,58,59,60,61,62,63],zmm1[48,49,50,51,52,53,54,55]
527 ; AVX512BW-NEXT: retq
528 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 20, i32 21, i32 10, i32 11, i32 24, i32 25, i32 14, i32 15, i32 28, i32 29>
529 ret <16 x i32> %shuffle
532 define <16 x i32> @shuffle_v8i32_17_16_01_00_21_20_05_04_25_24_09_08_29_28_13_12(<16 x i32> %a, <16 x i32> %b) {
533 ; ALL-LABEL: shuffle_v8i32_17_16_01_00_21_20_05_04_25_24_09_08_29_28_13_12:
535 ; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm1[1,0],zmm0[1,0],zmm1[5,4],zmm0[5,4],zmm1[9,8],zmm0[9,8],zmm1[13,12],zmm0[13,12]
537 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 17, i32 16, i32 01, i32 00, i32 21, i32 20, i32 05, i32 04, i32 25, i32 24, i32 09, i32 08, i32 29, i32 28, i32 13, i32 12>
538 ret <16 x i32> %shuffle
541 define <16 x float> @shuffle_v8f32_v16f32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<8 x float> %a) {
542 ; ALL-LABEL: shuffle_v8f32_v16f32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04:
544 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0
545 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0
547 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
548 ret <16 x float> %shuffle
551 define <16 x float> @insert_sub0_0(<16 x float> %base, <4 x float> %sub1, <4 x float> %sub2, <4 x float> %sub3, <4 x float> %sub4) {
552 ; ALL-LABEL: insert_sub0_0:
554 ; ALL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
556 %sub12 = shufflevector <4 x float> %sub1, <4 x float> %sub2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
557 %sub34 = shufflevector <4 x float> %sub3, <4 x float> %sub4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
558 %sub1234 = shufflevector <8 x float> %sub12, <8 x float> %sub34, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
559 %res = shufflevector <16 x float> %base, <16 x float> %sub1234, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
560 ret <16 x float> %res
563 define <16 x float> @insert_sub1_12(<16 x float> %base, <4 x float> %sub1, <4 x float> %sub2, <4 x float> %sub3, <4 x float> %sub4) {
564 ; ALL-LABEL: insert_sub1_12:
566 ; ALL-NEXT: vinsertf32x4 $3, %xmm2, %zmm0, %zmm0
568 %sub12 = shufflevector <4 x float> %sub1, <4 x float> %sub2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
569 %sub34 = shufflevector <4 x float> %sub3, <4 x float> %sub4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
570 %sub1234 = shufflevector <8 x float> %sub12, <8 x float> %sub34, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
571 %res = shufflevector <16 x float> %base, <16 x float> %sub1234, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 20, i32 21, i32 22, i32 23>
572 ret <16 x float> %res
575 define <16 x float> @insert_sub2_4(<16 x float> %base, <4 x float> %sub1, <4 x float> %sub2, <4 x float> %sub3, <4 x float> %sub4) {
576 ; ALL-LABEL: insert_sub2_4:
578 ; ALL-NEXT: vinsertf32x4 $1, %xmm3, %zmm0, %zmm0
580 %sub12 = shufflevector <4 x float> %sub1, <4 x float> %sub2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
581 %sub34 = shufflevector <4 x float> %sub3, <4 x float> %sub4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
582 %sub1234 = shufflevector <8 x float> %sub12, <8 x float> %sub34, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
583 %res = shufflevector <16 x float> %base, <16 x float> %sub1234, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
584 ret <16 x float> %res
587 define <16 x float> @insert_sub01_8(<16 x float> %base, <4 x float> %sub1, <4 x float> %sub2, <4 x float> %sub3, <4 x float> %sub4) {
588 ; ALL-LABEL: insert_sub01_8:
590 ; ALL-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
591 ; ALL-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
592 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
594 %sub12 = shufflevector <4 x float> %sub1, <4 x float> %sub2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
595 %sub34 = shufflevector <4 x float> %sub3, <4 x float> %sub4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
596 %sub1234 = shufflevector <8 x float> %sub12, <8 x float> %sub34, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
597 %res = shufflevector <16 x float> %base, <16 x float> %sub1234, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
598 ret <16 x float> %res
601 define <16 x float> @insert_sub23_0(<16 x float> %base, <4 x float> %sub1, <4 x float> %sub2, <4 x float> %sub3, <4 x float> %sub4) {
602 ; ALL-LABEL: insert_sub23_0:
604 ; ALL-NEXT: # kill: def $xmm3 killed $xmm3 def $ymm3
605 ; ALL-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm1
606 ; ALL-NEXT: vinsertf64x4 $0, %ymm1, %zmm0, %zmm0
608 %sub12 = shufflevector <4 x float> %sub1, <4 x float> %sub2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
609 %sub34 = shufflevector <4 x float> %sub3, <4 x float> %sub4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
610 %sub1234 = shufflevector <8 x float> %sub12, <8 x float> %sub34, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
611 %res = shufflevector <16 x float> %base, <16 x float> %sub1234, <16 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
612 ret <16 x float> %res
615 define <16 x i32> @mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01(<16 x i32> %a, <16 x i32> %passthru, i16 %mask) {
616 ; AVX512F-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01:
618 ; AVX512F-NEXT: kmovw %edi, %k1
619 ; AVX512F-NEXT: valignd {{.*#+}} zmm1 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
620 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
623 ; AVX512BW-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01:
625 ; AVX512BW-NEXT: kmovd %edi, %k1
626 ; AVX512BW-NEXT: valignd {{.*#+}} zmm1 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
627 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
628 ; AVX512BW-NEXT: retq
629 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1>
630 %mask.cast = bitcast i16 %mask to <16 x i1>
631 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru
635 define <16 x i32> @mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) {
636 ; AVX512F-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17:
638 ; AVX512F-NEXT: kmovw %edi, %k1
639 ; AVX512F-NEXT: valignd {{.*#+}} zmm2 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1]
640 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
643 ; AVX512BW-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17:
645 ; AVX512BW-NEXT: kmovd %edi, %k1
646 ; AVX512BW-NEXT: valignd {{.*#+}} zmm2 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1]
647 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
648 ; AVX512BW-NEXT: retq
649 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
650 %mask.cast = bitcast i16 %mask to <16 x i1>
651 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru
655 define <16 x i32> @maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01(<16 x i32> %a, i16 %mask) {
656 ; AVX512F-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01:
658 ; AVX512F-NEXT: kmovw %edi, %k1
659 ; AVX512F-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
662 ; AVX512BW-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01:
664 ; AVX512BW-NEXT: kmovd %edi, %k1
665 ; AVX512BW-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
666 ; AVX512BW-NEXT: retq
667 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1>
668 %mask.cast = bitcast i16 %mask to <16 x i1>
669 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> zeroinitializer
673 define <16 x i32> @maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
674 ; AVX512F-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17:
676 ; AVX512F-NEXT: kmovw %edi, %k1
677 ; AVX512F-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1]
680 ; AVX512BW-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17:
682 ; AVX512BW-NEXT: kmovd %edi, %k1
683 ; AVX512BW-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1]
684 ; AVX512BW-NEXT: retq
685 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
686 %mask.cast = bitcast i16 %mask to <16 x i1>
687 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> zeroinitializer
691 define <16 x float> @test_vshuff32x4_512(<16 x float> %x, <16 x float> %x1) nounwind {
692 ; ALL-LABEL: test_vshuff32x4_512:
694 ; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
696 %res = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19>
697 ret <16 x float> %res
700 define <16 x i32> @test_vshufi32x4_512(<16 x i32> %x, <16 x i32> %x1) nounwind {
701 ; ALL-LABEL: test_vshufi32x4_512:
703 ; ALL-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
705 %res = shufflevector <16 x i32> %x, <16 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19>
709 define <16 x float> @test_vshuff32x4_512_mask(<16 x float> %x, <16 x float> %x1, <16 x float> %y, <16 x i1> %mask) nounwind {
710 ; AVX512F-LABEL: test_vshuff32x4_512_mask:
712 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
713 ; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3
714 ; AVX512F-NEXT: vpmovd2m %zmm3, %k1
715 ; AVX512F-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
716 ; AVX512F-NEXT: vmovaps %zmm2, %zmm0
719 ; AVX512BW-LABEL: test_vshuff32x4_512_mask:
721 ; AVX512BW-NEXT: vpsllw $7, %xmm3, %xmm3
722 ; AVX512BW-NEXT: vpmovb2m %zmm3, %k1
723 ; AVX512BW-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
724 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
725 ; AVX512BW-NEXT: retq
726 %x2 = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19>
727 %res = select <16 x i1> %mask, <16 x float> %x2, <16 x float> %y
728 ret <16 x float> %res
731 define <16 x i32> @test_vshufi32x4_512_mask(<16 x i32> %x, <16 x i32> %x1, <16 x i32> %y, <16 x i1> %mask) nounwind {
732 ; AVX512F-LABEL: test_vshufi32x4_512_mask:
734 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
735 ; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3
736 ; AVX512F-NEXT: vpmovd2m %zmm3, %k1
737 ; AVX512F-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
738 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
741 ; AVX512BW-LABEL: test_vshufi32x4_512_mask:
743 ; AVX512BW-NEXT: vpsllw $7, %xmm3, %xmm3
744 ; AVX512BW-NEXT: vpmovb2m %zmm3, %k1
745 ; AVX512BW-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
746 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
747 ; AVX512BW-NEXT: retq
748 %x2 = shufflevector <16 x i32> %x, <16 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19>
749 %res = select <16 x i1> %mask, <16 x i32> %x2, <16 x i32> %y
753 define <16 x float> @mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23(<16 x float> %a, <16 x float> %b, <16 x float> %passthru, i16 %mask) {
754 ; AVX512F-LABEL: mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23:
756 ; AVX512F-NEXT: kmovw %edi, %k1
757 ; AVX512F-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
758 ; AVX512F-NEXT: vmovaps %zmm2, %zmm0
761 ; AVX512BW-LABEL: mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23:
763 ; AVX512BW-NEXT: kmovd %edi, %k1
764 ; AVX512BW-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
765 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
766 ; AVX512BW-NEXT: retq
767 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
768 %mask.cast = bitcast i16 %mask to <16 x i1>
769 %res = select <16 x i1> %mask.cast, <16 x float> %shuffle, <16 x float> %passthru
770 ret <16 x float> %res
773 define <16 x float> @mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15(<16 x float> %a, <16 x float> %b, <16 x float> %passthru, i16 %mask) {
774 ; AVX512F-LABEL: mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15:
776 ; AVX512F-NEXT: kmovw %edi, %k1
777 ; AVX512F-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
778 ; AVX512F-NEXT: vmovaps %zmm2, %zmm0
781 ; AVX512BW-LABEL: mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15:
783 ; AVX512BW-NEXT: kmovd %edi, %k1
784 ; AVX512BW-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
785 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
786 ; AVX512BW-NEXT: retq
787 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
788 %mask.cast = bitcast i16 %mask to <16 x i1>
789 %res = select <16 x i1> %mask.cast, <16 x float> %shuffle, <16 x float> %passthru
790 ret <16 x float> %res
793 define <16 x i32> @mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) {
794 ; AVX512F-LABEL: mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23:
796 ; AVX512F-NEXT: kmovw %edi, %k1
797 ; AVX512F-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
798 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
801 ; AVX512BW-LABEL: mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23:
803 ; AVX512BW-NEXT: kmovd %edi, %k1
804 ; AVX512BW-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
805 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
806 ; AVX512BW-NEXT: retq
807 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
808 %mask.cast = bitcast i16 %mask to <16 x i1>
809 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru
813 define <16 x i32> @mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) {
814 ; AVX512F-LABEL: mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15:
816 ; AVX512F-NEXT: kmovw %edi, %k1
817 ; AVX512F-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
818 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
821 ; AVX512BW-LABEL: mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15:
823 ; AVX512BW-NEXT: kmovd %edi, %k1
824 ; AVX512BW-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
825 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
826 ; AVX512BW-NEXT: retq
827 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
828 %mask.cast = bitcast i16 %mask to <16 x i1>
829 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru
833 define <16 x i32> @mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03(<4 x i32> %a) {
834 ; ALL-LABEL: mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03:
836 ; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
837 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
838 ; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
840 %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
844 define <16 x float> @mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03(<4 x float> %a) {
845 ; ALL-LABEL: mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03:
847 ; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
848 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
849 ; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
851 %res = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
852 ret <16 x float> %res
855 %struct.foo = type { [4 x double], [3 x [4 x double]], [4 x double] }
857 ; This test previously hung in shuffle combining. https://github.com/ispc/ispc/issues/1864
858 define void @ispc_1864(<16 x float>* %arg) {
859 ; ALL-LABEL: ispc_1864:
860 ; ALL: # %bb.0: # %bb
861 ; ALL-NEXT: pushq %rbp
862 ; ALL-NEXT: .cfi_def_cfa_offset 16
863 ; ALL-NEXT: .cfi_offset %rbp, -16
864 ; ALL-NEXT: movq %rsp, %rbp
865 ; ALL-NEXT: .cfi_def_cfa_register %rbp
866 ; ALL-NEXT: andq $-64, %rsp
867 ; ALL-NEXT: subq $4864, %rsp # imm = 0x1300
868 ; ALL-NEXT: vbroadcastss {{.*#+}} ymm0 = [-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0]
869 ; ALL-NEXT: vmulps 32(%rdi), %ymm0, %ymm0
870 ; ALL-NEXT: vcvtps2pd %ymm0, %zmm0
871 ; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,4,5,0,1,0,1]
872 ; ALL-NEXT: vmovapd %ymm0, {{[0-9]+}}(%rsp)
873 ; ALL-NEXT: movq %rbp, %rsp
874 ; ALL-NEXT: popq %rbp
875 ; ALL-NEXT: .cfi_def_cfa %rsp, 8
876 ; ALL-NEXT: vzeroupper
879 %tmp = alloca [30 x %struct.foo], align 64
880 %tmp1 = load <16 x float>, <16 x float>* %arg, align 4
881 %tmp2 = fmul <16 x float> %tmp1, <float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00>
882 %tmp3 = fpext <16 x float> %tmp2 to <16 x double>
883 %tmp4 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 0
884 %tmp5 = extractelement <16 x double> %tmp3, i32 10
885 store double %tmp5, double* %tmp4, align 32
886 %tmp6 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 1
887 %tmp7 = extractelement <16 x double> %tmp3, i32 11
888 store double %tmp7, double* %tmp6, align 8
889 %tmp8 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 2
890 %tmp9 = extractelement <16 x double> %tmp3, i32 12
891 store double %tmp9, double* %tmp8, align 16
892 %tmp10 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 3
893 %tmp11 = extractelement <16 x double> %tmp3, i32 13
894 store double %tmp11, double* %tmp10, align 8