1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefixes=ALL,KNL %s
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefixes=ALL,SKX %s
5 target triple = "x86_64-unknown-unknown"
7 define <32 x i16> @shuffle_v32i16(<32 x i16> %a) {
8 ; KNL-LABEL: shuffle_v32i16:
10 ; KNL-NEXT: vpbroadcastw %xmm0, %ymm0
11 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
14 ; SKX-LABEL: shuffle_v32i16:
16 ; SKX-NEXT: vpbroadcastw %xmm0, %zmm0
18 %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> zeroinitializer
22 define <32 x i16> @shuffle_v32i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08(<32 x i16> %a) {
23 ; KNL-LABEL: shuffle_v32i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
25 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
26 ; KNL-NEXT: vpbroadcastw %xmm0, %ymm0
27 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
30 ; SKX-LABEL: shuffle_v32i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
32 ; SKX-NEXT: vpbroadcastw {{.*#+}} zmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
33 ; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0
35 %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
39 define <32 x i16> @shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f(<32 x i16> %a) {
40 ; KNL-LABEL: shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f:
42 ; KNL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[4,5,10,11,4,5,6,7,14,15,2,3,4,5,2,3,20,21,26,27,20,21,22,23,30,31,18,19,20,21,18,19]
43 ; KNL-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1]
44 ; KNL-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[0,1,10,11,8,9,8,9,14,15,6,7,4,5,14,15,16,17,26,27,24,25,24,25,30,31,22,23,20,21,30,31]
45 ; KNL-NEXT: vmovdqa {{.*#+}} ymm4 = <255,255,255,255,u,u,u,u,255,255,u,u,0,0,255,255,0,0,0,0,u,u,0,0,0,0,u,u,255,255,u,u>
46 ; KNL-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm3
47 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
48 ; KNL-NEXT: vpblendw {{.*#+}} ymm0 = ymm3[0,1,2,3,4,5,6],ymm0[7],ymm3[8,9,10,11,12,13,14],ymm0[15]
49 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
50 ; KNL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,10,11,8,9,8,9,14,15,2,3,4,5,2,3,16,17,26,27,24,25,24,25,30,31,18,19,20,21,18,19]
51 ; KNL-NEXT: vmovdqa {{.*#+}} ymm3 = <0,0,0,0,u,u,u,u,0,0,u,u,255,255,0,0,255,255,255,255,u,u,255,255,255,255,u,u,0,0,255,255>
52 ; KNL-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
53 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
56 ; SKX-LABEL: shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f:
58 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1,2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,31>
59 ; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0
61 %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1, i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 31>
65 define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38(<32 x i16> %a, <32 x i16> %b) {
66 ; KNL-LABEL: shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38:
68 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
69 ; KNL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[3,1,2,3]
70 ; KNL-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[6,7,12,13,2,3,0,1,u,u,u,u,u,u,u,u,22,23,20,21,18,19,u,u,u,u,u,u,u,u,u,u]
71 ; KNL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,2,3]
72 ; KNL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,8,9,u,u,u,u,u,u,u,u,22,23,20,21,18,19,16,17,u,u,u,u,u,u,u,u]
73 ; KNL-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[8],ymm3[8],ymm0[9],ymm3[9],ymm0[10],ymm3[10],ymm0[11],ymm3[11]
74 ; KNL-NEXT: vextracti32x4 $3, %zmm1, %xmm1
75 ; KNL-NEXT: vpbroadcastw %xmm1, %ymm1
76 ; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm3[0,1,2,3,4,5,6],ymm1[7],ymm3[8,9,10,11,12,13,14],ymm1[15]
77 ; KNL-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
78 ; KNL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[6,7,12,13,2,3,0,1,u,u,u,u,u,u,u,u,22,23,20,21,18,19,16,17,u,u,u,u,u,u,u,u]
79 ; KNL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11]
80 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
83 ; SKX-LABEL: shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38:
85 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24,15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,56]
86 ; SKX-NEXT: vpermt2w %zmm1, %zmm2, %zmm0
88 %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24, i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 56>
92 define <32 x i16> @shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u(<32 x i16> %a, <32 x i16> %b) {
93 ; ALL-LABEL: shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u:
95 ; ALL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
97 %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
101 define <32 x i16> @shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u(<32 x i16> %a, <32 x i16> %b) {
102 ; ALL-LABEL: shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u:
104 ; ALL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
106 %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
110 define <32 x i16> @shuffle_v32i16_1_z_3_z_5_z_7_z_9_z_11_z_13_z_15_z_17_z_19_z_21_z_23_z_25_z_27_z_29_z_31_z(<32 x i16> %a, <32 x i16> %b) {
111 ; ALL-LABEL: shuffle_v32i16_1_z_3_z_5_z_7_z_9_z_11_z_13_z_15_z_17_z_19_z_21_z_23_z_25_z_27_z_29_z_31_z:
113 ; ALL-NEXT: vpsrld $16, %zmm0, %zmm0
115 %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 1, i32 34, i32 3, i32 34, i32 5, i32 34, i32 7, i32 34, i32 9, i32 34, i32 11, i32 34, i32 13, i32 34, i32 15, i32 34, i32 17, i32 34, i32 19, i32 34, i32 21, i32 34, i32 23, i32 34, i32 25, i32 34, i32 27, i32 34, i32 29, i32 34, i32 31, i32 34>
119 define <32 x i16> @shuffle_v32i16_z_0_z_2_z_4_z_6_z_8_z_10_z_12_z_14_z_16_z_18_z_20_z_22_z_24_z_26_z_28_z_30(<32 x i16> %a, <32 x i16> %b) {
120 ; ALL-LABEL: shuffle_v32i16_z_0_z_2_z_4_z_6_z_8_z_10_z_12_z_14_z_16_z_18_z_20_z_22_z_24_z_26_z_28_z_30:
122 ; ALL-NEXT: vpslld $16, %zmm0, %zmm0
124 %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 34, i32 0, i32 34, i32 2, i32 34, i32 4, i32 34, i32 6, i32 34, i32 8, i32 34, i32 10, i32 34, i32 12, i32 34, i32 14, i32 34, i32 16, i32 34, i32 18, i32 34, i32 20, i32 34, i32 22, i32 34, i32 24, i32 34, i32 26, i32 34, i32 28, i32 34, i32 30>
128 define <32 x i16> @shuffle_v32i16_1_1_0_0_4_5_6_7_9_9_8_8_12_13_14_15_17_17_16_16_20_21_22_23_25_25_24_24_28_29_30_31(<32 x i16> %a, <32 x i16> %b) {
129 ; KNL-LABEL: shuffle_v32i16_1_1_0_0_4_5_6_7_9_9_8_8_12_13_14_15_17_17_16_16_20_21_22_23_25_25_24_24_28_29_30_31:
131 ; KNL-NEXT: vpshuflw {{.*#+}} ymm1 = ymm0[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15]
132 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
133 ; KNL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15]
134 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
137 ; SKX-LABEL: shuffle_v32i16_1_1_0_0_4_5_6_7_9_9_8_8_12_13_14_15_17_17_16_16_20_21_22_23_25_25_24_24_28_29_30_31:
139 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15,17,17,16,16,20,21,22,23,25,25,24,24,28,29,30,31]
141 %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
145 define <32 x i16> @shuffle_v32i16_0_1_2_3_5_5_4_4_8_9_10_11_13_13_12_12_16_17_18_19_21_21_20_20_24_25_26_27_29_29_28_28(<32 x i16> %a, <32 x i16> %b) {
146 ; KNL-LABEL: shuffle_v32i16_0_1_2_3_5_5_4_4_8_9_10_11_13_13_12_12_16_17_18_19_21_21_20_20_24_25_26_27_29_29_28_28:
148 ; KNL-NEXT: vpshufhw {{.*#+}} ymm1 = ymm0[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12]
149 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
150 ; KNL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12]
151 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
154 ; SKX-LABEL: shuffle_v32i16_0_1_2_3_5_5_4_4_8_9_10_11_13_13_12_12_16_17_18_19_21_21_20_20_24_25_26_27_29_29_28_28:
156 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12,16,17,18,19,21,21,20,20,24,25,26,27,29,29,28,28]
158 %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
162 define <32 x i16> @shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19_19_21_21_20_20_25_25_27_27_29_29_28_28(<32 x i16> %a, <32 x i16> %b) {
163 ; KNL-LABEL: shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19_19_21_21_20_20_25_25_27_27_29_29_28_28:
165 ; KNL-NEXT: vpshuflw {{.*#+}} ymm1 = ymm0[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15]
166 ; KNL-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12]
167 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
168 ; KNL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15]
169 ; KNL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12]
170 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
173 ; SKX-LABEL: shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19_19_21_21_20_20_25_25_27_27_29_29_28_28:
175 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[2,3,2,3,0,1,0,1,10,11,10,11,8,9,8,9,18,19,18,19,16,17,16,17,26,27,26,27,24,25,24,25,34,35,34,35,32,33,32,33,42,43,42,43,40,41,40,41,50,51,50,51,48,49,48,49,58,59,58,59,56,57,56,57]
177 %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 5, i32 5, i32 4, i32 4, i32 9, i32 9, i32 8, i32 8, i32 13, i32 13, i32 12, i32 12, i32 17, i32 17, i32 16, i32 16, i32 21, i32 21, i32 20, i32 20, i32 25, i32 25, i32 24, i32 24, i32 29, i32 29, i32 28, i32 28>
181 define <32 x i16> @shuffle_v32i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14_17_16_19_18_21_20_23_22_25_24_27_26_29_28_31_30(<32 x i16> %a) {
182 ; ALL-LABEL: shuffle_v32i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14_17_16_19_18_21_20_23_22_25_24_27_26_29_28_31_30:
184 ; ALL-NEXT: vprold $16, %zmm0, %zmm0
186 %shuffle = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14, i32 17, i32 16, i32 19, i32 18, i32 21, i32 20, i32 23, i32 22, i32 25, i32 24, i32 27, i32 26, i32 29, i32 28, i32 31, i32 30>
187 ret <32 x i16> %shuffle
190 define <32 x i16> @shuffle_v32i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30(<32 x i16> %a) {
191 ; ALL-LABEL: shuffle_v32i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30:
193 ; ALL-NEXT: vprolq $16, %zmm0, %zmm0
195 %shuffle = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 19, i32 16, i32 17, i32 18, i32 23, i32 20, i32 21, i32 22, i32 27, i32 24, i32 25, i32 26, i32 31, i32 28, i32 29, i32 30>
196 ret <32 x i16> %shuffle
199 define <32 x i16> @shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<32 x i16> %a) {
200 ; KNL-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
202 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,0,0,0]
203 ; KNL-NEXT: vpandq %zmm1, %zmm0, %zmm0
206 ; SKX-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
208 ; SKX-NEXT: vmovaps {{.*#+}} xmm1 = [65535,0,0,0]
209 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0
211 %shuffle = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
212 ret <32 x i16> %shuffle
215 define <32 x i16> @shuffle_v32i16_ashr_00_02_04_06_32_34_36_38_08_10_12_14_40_42_44_46_16_18_20_22_48_50_52_54_24_26_28_30_56_58_60_62(<16 x i32> %a0, <16 x i32> %a1) nounwind {
216 ; KNL-LABEL: shuffle_v32i16_ashr_00_02_04_06_32_34_36_38_08_10_12_14_40_42_44_46_16_18_20_22_48_50_52_54_24_26_28_30_56_58_60_62:
218 ; KNL-NEXT: vpsrad $25, %zmm0, %zmm0
219 ; KNL-NEXT: vpsrad $25, %zmm1, %zmm1
220 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
221 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
222 ; KNL-NEXT: vpackssdw %ymm2, %ymm3, %ymm2
223 ; KNL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
224 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
227 ; SKX-LABEL: shuffle_v32i16_ashr_00_02_04_06_32_34_36_38_08_10_12_14_40_42_44_46_16_18_20_22_48_50_52_54_24_26_28_30_56_58_60_62:
229 ; SKX-NEXT: vpsrad $25, %zmm0, %zmm0
230 ; SKX-NEXT: vpsrad $25, %zmm1, %zmm1
231 ; SKX-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
233 %1 = ashr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
234 %2 = ashr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
235 %3 = bitcast <16 x i32> %1 to <32 x i16>
236 %4 = bitcast <16 x i32> %2 to <32 x i16>
237 %5 = shufflevector <32 x i16> %3, <32 x i16> %4, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 32, i32 34, i32 36, i32 38, i32 8, i32 10, i32 12, i32 14, i32 40, i32 42, i32 44, i32 46, i32 16, i32 18, i32 20, i32 22, i32 48, i32 50, i32 52, i32 54, i32 24, i32 26, i32 28, i32 30, i32 56, i32 58, i32 60, i32 62>
241 define <32 x i16> @shuffle_v32i16_lshr_00_02_04_06_32_34_36_38_08_10_12_14_40_42_44_46_16_18_20_22_48_50_52_54_24_26_28_30_56_58_60_62(<16 x i32> %a0, <16 x i32> %a1) nounwind {
242 ; KNL-LABEL: shuffle_v32i16_lshr_00_02_04_06_32_34_36_38_08_10_12_14_40_42_44_46_16_18_20_22_48_50_52_54_24_26_28_30_56_58_60_62:
244 ; KNL-NEXT: vpsrld $25, %zmm0, %zmm0
245 ; KNL-NEXT: vpsrld $25, %zmm1, %zmm1
246 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
247 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
248 ; KNL-NEXT: vpackusdw %ymm2, %ymm3, %ymm2
249 ; KNL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
250 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
253 ; SKX-LABEL: shuffle_v32i16_lshr_00_02_04_06_32_34_36_38_08_10_12_14_40_42_44_46_16_18_20_22_48_50_52_54_24_26_28_30_56_58_60_62:
255 ; SKX-NEXT: vpsrld $25, %zmm0, %zmm0
256 ; SKX-NEXT: vpsrld $25, %zmm1, %zmm1
257 ; SKX-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
259 %1 = lshr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
260 %2 = lshr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
261 %3 = bitcast <16 x i32> %1 to <32 x i16>
262 %4 = bitcast <16 x i32> %2 to <32 x i16>
263 %5 = shufflevector <32 x i16> %3, <32 x i16> %4, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 32, i32 34, i32 36, i32 38, i32 8, i32 10, i32 12, i32 14, i32 40, i32 42, i32 44, i32 46, i32 16, i32 18, i32 20, i32 22, i32 48, i32 50, i32 52, i32 54, i32 24, i32 26, i32 28, i32 30, i32 56, i32 58, i32 60, i32 62>
267 define <32 x i16> @insert_dup_mem_v32i16_i32(ptr %ptr) {
268 ; KNL-LABEL: insert_dup_mem_v32i16_i32:
270 ; KNL-NEXT: vpbroadcastw (%rdi), %ymm0
271 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
274 ; SKX-LABEL: insert_dup_mem_v32i16_i32:
276 ; SKX-NEXT: vpbroadcastw (%rdi), %zmm0
278 %tmp = load i32, ptr %ptr, align 4
279 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
280 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
281 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> zeroinitializer
285 define <32 x i16> @insert_dup_mem_v32i16_sext_i16(ptr %ptr) {
286 ; KNL-LABEL: insert_dup_mem_v32i16_sext_i16:
288 ; KNL-NEXT: vpbroadcastw (%rdi), %ymm0
289 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
292 ; SKX-LABEL: insert_dup_mem_v32i16_sext_i16:
294 ; SKX-NEXT: vpbroadcastw (%rdi), %zmm0
296 %tmp = load i16, ptr %ptr, align 2
297 %tmp1 = sext i16 %tmp to i32
298 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
299 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
300 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <32 x i32> zeroinitializer
304 define <32 x i16> @insert_dup_elt1_mem_v32i16_i32(ptr %ptr) #0 {
305 ; KNL-LABEL: insert_dup_elt1_mem_v32i16_i32:
307 ; KNL-NEXT: vpbroadcastw 2(%rdi), %ymm0
308 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
311 ; SKX-LABEL: insert_dup_elt1_mem_v32i16_i32:
313 ; SKX-NEXT: vpbroadcastw 2(%rdi), %zmm0
315 %tmp = load i32, ptr %ptr, align 4
316 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
317 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
318 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
322 define <32 x i16> @insert_dup_elt3_mem_v32i16_i32(ptr %ptr) #0 {
323 ; KNL-LABEL: insert_dup_elt3_mem_v32i16_i32:
325 ; KNL-NEXT: vpbroadcastw 2(%rdi), %ymm0
326 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
329 ; SKX-LABEL: insert_dup_elt3_mem_v32i16_i32:
331 ; SKX-NEXT: vpbroadcastw 2(%rdi), %zmm0
333 %tmp = load i32, ptr %ptr, align 4
334 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
335 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
336 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
340 define <32 x i16> @insert_dup_mem_v16i16_i64(ptr %ptr) {
341 ; KNL-LABEL: insert_dup_mem_v16i16_i64:
343 ; KNL-NEXT: vpbroadcastw (%rdi), %ymm0
344 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
347 ; SKX-LABEL: insert_dup_mem_v16i16_i64:
349 ; SKX-NEXT: vpbroadcastw (%rdi), %zmm0
351 %tmp = load i64, ptr %ptr, align 4
352 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
353 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
354 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> zeroinitializer
358 define <32 x i16> @insert_dup_elt1_mem_v16i16_i64(ptr %ptr) {
359 ; KNL-LABEL: insert_dup_elt1_mem_v16i16_i64:
361 ; KNL-NEXT: vpbroadcastw 2(%rdi), %ymm0
362 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
365 ; SKX-LABEL: insert_dup_elt1_mem_v16i16_i64:
367 ; SKX-NEXT: vpbroadcastw 2(%rdi), %zmm0
369 %tmp = load i64, ptr %ptr, align 4
370 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
371 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
372 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
376 define <32 x i16> @insert_dup_elt3_mem_v16i16_i64(ptr %ptr) {
377 ; KNL-LABEL: insert_dup_elt3_mem_v16i16_i64:
379 ; KNL-NEXT: vpbroadcastw 6(%rdi), %ymm0
380 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
383 ; SKX-LABEL: insert_dup_elt3_mem_v16i16_i64:
385 ; SKX-NEXT: vpbroadcastw 6(%rdi), %zmm0
387 %tmp = load i64, ptr %ptr, align 4
388 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
389 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
390 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
394 define <32 x i16> @insert_dup_elt7_mem_v16i16_i64(ptr %ptr) {
395 ; KNL-LABEL: insert_dup_elt7_mem_v16i16_i64:
397 ; KNL-NEXT: vpbroadcastw 6(%rdi), %ymm0
398 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
401 ; SKX-LABEL: insert_dup_elt7_mem_v16i16_i64:
403 ; SKX-NEXT: vpbroadcastw 6(%rdi), %zmm0
405 %tmp = load i64, ptr %ptr, align 4
406 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 1
407 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
408 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
412 define <32 x i16> @insert_dup_mem_v16i16_sext_i16_i64(ptr %ptr) {
413 ; KNL-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
415 ; KNL-NEXT: vpbroadcastw (%rdi), %ymm0
416 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
419 ; SKX-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
421 ; SKX-NEXT: vpbroadcastw (%rdi), %zmm0
423 %tmp = load i16, ptr %ptr, align 2
424 %tmp1 = sext i16 %tmp to i64
425 %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %tmp1, i32 0
426 %tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16>
427 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <32 x i32> zeroinitializer
431 define <32 x i16> @shuffle_v32i16_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz(<32 x i16> %a) {
432 ; ALL-LABEL: shuffle_v32i16_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
434 ; ALL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
436 %shuffle = shufflevector <32 x i16> zeroinitializer, <32 x i16> %a, <32 x i32> <i32 32, i32 0, i32 0, i32 0, i32 33, i32 0, i32 0, i32 0, i32 34, i32 0, i32 0, i32 0, i32 35, i32 0, i32 0, i32 0, i32 36, i32 0, i32 0, i32 0, i32 37, i32 0, i32 0, i32 0, i32 38, i32 0, i32 0, i32 0, i32 39, i32 0, i32 0, i32 0>
437 ret <32 x i16> %shuffle
440 define <32 x i16> @shuffle_v32i16_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz(<32 x i16> %a) {
441 ; ALL-LABEL: shuffle_v32i16_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz:
443 ; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
445 %shuffle = shufflevector <32 x i16> zeroinitializer, <32 x i16> %a, <32 x i32> <i32 32, i32 0, i32 33, i32 0, i32 34, i32 0, i32 35, i32 0, i32 36, i32 0, i32 37, i32 0, i32 38, i32 0, i32 39, i32 0, i32 40, i32 0, i32 41, i32 0, i32 42, i32 0, i32 43, i32 0, i32 44, i32 0, i32 45, i32 0, i32 46, i32 0, i32 47, i32 0>
446 ret <32 x i16> %shuffle
449 define <8 x i16> @pr32967(<32 x i16> %v) {
450 ; KNL-LABEL: pr32967:
452 ; KNL-NEXT: vpsrlq $16, %zmm0, %zmm0
453 ; KNL-NEXT: vpmovqw %zmm0, %xmm0
456 ; SKX-LABEL: pr32967:
458 ; SKX-NEXT: vpsrlq $16, %zmm0, %zmm0
459 ; SKX-NEXT: vpmovqw %zmm0, %xmm0
460 ; SKX-NEXT: vzeroupper
462 %shuffle = shufflevector <32 x i16> %v, <32 x i16> undef, <8 x i32> <i32 1,i32 5,i32 9,i32 13,i32 17,i32 21,i32 25,i32 29>
463 ret <8 x i16> %shuffle
466 define <32 x i16> @shuffle_v32i16_07_zz_05_zz_03_zz_01_zz_15_zz_13_zz_11_zz_09_zz_23_zz_21_zz_19_zz_17_zz_31_zz_29_zz_27_zz_25_zz(<32 x i16> %a) {
467 ; KNL-LABEL: shuffle_v32i16_07_zz_05_zz_03_zz_01_zz_15_zz_13_zz_11_zz_09_zz_23_zz_21_zz_19_zz_17_zz_31_zz_29_zz_27_zz_25_zz:
469 ; KNL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[14,15],zero,zero,ymm0[10,11],zero,zero,ymm0[6,7],zero,zero,ymm0[2,3],zero,zero,ymm0[30,31],zero,zero,ymm0[26,27],zero,zero,ymm0[22,23],zero,zero,ymm0[18,19],zero,zero
470 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
471 ; KNL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15],zero,zero,ymm0[10,11],zero,zero,ymm0[6,7],zero,zero,ymm0[2,3],zero,zero,ymm0[30,31],zero,zero,ymm0[26,27],zero,zero,ymm0[22,23],zero,zero,ymm0[20,21],zero,zero
472 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
475 ; SKX-LABEL: shuffle_v32i16_07_zz_05_zz_03_zz_01_zz_15_zz_13_zz_11_zz_09_zz_23_zz_21_zz_19_zz_17_zz_31_zz_29_zz_27_zz_25_zz:
477 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[14,15],zero,zero,zmm0[10,11],zero,zero,zmm0[6,7],zero,zero,zmm0[2,3],zero,zero,zmm0[30,31],zero,zero,zmm0[26,27],zero,zero,zmm0[22,23],zero,zero,zmm0[18,19],zero,zero,zmm0[46,47],zero,zero,zmm0[42,43],zero,zero,zmm0[38,39],zero,zero,zmm0[34,35],zero,zero,zmm0[62,63],zero,zero,zmm0[58,59],zero,zero,zmm0[54,55],zero,zero,zmm0[52,53],zero,zero
479 %shuffle = shufflevector <32 x i16> zeroinitializer, <32 x i16> %a, <32 x i32> <i32 39, i32 0, i32 37, i32 0, i32 35, i32 0, i32 33, i32 0, i32 47, i32 0, i32 45, i32 0, i32 43, i32 0, i32 41, i32 0, i32 55, i32 0, i32 53, i32 0, i32 51, i32 0, i32 49, i32 0, i32 63, i32 0, i32 61, i32 0, i32 59, i32 0, i32 58, i32 0>
480 ret <32 x i16> %shuffle