1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX1OR2,AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX1OR2,AVX2OR512VL,AVX2,AVX2-SLOW
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX1OR2,AVX2OR512VL,AVX2,AVX2-FAST,AVX2-FAST-ALL
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX1OR2,AVX2OR512VL,AVX2,AVX2-FAST,AVX2-FAST-PERLANE
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VL-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VL-FAST
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VL-FAST
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX1
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX2
12 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
13 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
15 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
16 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
17 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
20 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
21 ; AVX2OR512VL: # %bb.0:
22 ; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %ymm0
23 ; AVX2OR512VL-NEXT: retq
25 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
27 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
28 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
29 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
32 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
34 ; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0
36 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
37 ret <16 x i16> %shuffle
40 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
41 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
43 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
44 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
45 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
46 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
47 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
50 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
52 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
53 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
54 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
55 ; AVX2-SLOW-NEXT: retq
57 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
58 ; AVX2-FAST-ALL: # %bb.0:
59 ; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
60 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,1]
61 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
62 ; AVX2-FAST-ALL-NEXT: retq
64 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
65 ; AVX2-FAST-PERLANE: # %bb.0:
66 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
67 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
68 ; AVX2-FAST-PERLANE-NEXT: retq
70 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
72 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
73 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
76 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
78 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
79 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
80 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
81 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
82 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
85 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
87 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
88 ; XOPAVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
89 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
91 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
92 ret <16 x i16> %shuffle
95 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
96 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
98 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
99 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
100 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
101 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
102 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
105 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
106 ; AVX2-SLOW: # %bb.0:
107 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
108 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
109 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
110 ; AVX2-SLOW-NEXT: retq
112 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
113 ; AVX2-FAST-ALL: # %bb.0:
114 ; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
115 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
116 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
117 ; AVX2-FAST-ALL-NEXT: retq
119 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
120 ; AVX2-FAST-PERLANE: # %bb.0:
121 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
122 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
123 ; AVX2-FAST-PERLANE-NEXT: retq
125 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
127 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
128 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
129 ; AVX512VL-NEXT: retq
131 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
133 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
134 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
135 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
136 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
137 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
140 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
142 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
143 ; XOPAVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
144 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
146 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
147 ret <16 x i16> %shuffle
150 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
151 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
153 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
154 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
155 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
156 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
157 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
160 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
161 ; AVX2-SLOW: # %bb.0:
162 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
163 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
164 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
165 ; AVX2-SLOW-NEXT: retq
167 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
168 ; AVX2-FAST-ALL: # %bb.0:
169 ; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
170 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
171 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
172 ; AVX2-FAST-ALL-NEXT: retq
174 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
175 ; AVX2-FAST-PERLANE: # %bb.0:
176 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
177 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
178 ; AVX2-FAST-PERLANE-NEXT: retq
180 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
182 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
183 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
184 ; AVX512VL-NEXT: retq
186 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
188 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
189 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
190 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
191 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
192 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
195 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
197 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
198 ; XOPAVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
199 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
201 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
202 ret <16 x i16> %shuffle
205 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
206 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
208 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
209 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
210 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
211 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
214 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
216 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9]
217 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
220 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
222 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
223 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
224 ; AVX512VL-NEXT: retq
226 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
228 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
229 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
230 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
231 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
234 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
236 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9]
237 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
239 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
240 ret <16 x i16> %shuffle
243 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
244 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
246 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
247 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
248 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
249 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
252 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
254 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1]
255 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
258 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
260 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
261 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
262 ; AVX512VL-NEXT: retq
264 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
266 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
267 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
268 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
269 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
272 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
274 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1]
275 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
277 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
278 ret <16 x i16> %shuffle
281 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
282 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
284 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
285 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
286 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
287 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
290 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
292 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1]
293 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
296 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
298 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
299 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
300 ; AVX512VL-NEXT: retq
302 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
304 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
305 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
306 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
307 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
310 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
312 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1]
313 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
315 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
316 ret <16 x i16> %shuffle
319 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
320 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
322 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
323 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
324 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
325 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
328 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
330 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
331 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
334 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
336 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
337 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
338 ; AVX512VL-NEXT: retq
340 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
342 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
343 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
344 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
345 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
348 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
350 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
351 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
353 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
354 ret <16 x i16> %shuffle
357 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
358 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
360 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
361 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
362 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,1,4,5,6,7]
363 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1]
364 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
365 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
366 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
369 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
371 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
372 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
375 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
377 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,8]
378 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
379 ; AVX512VL-NEXT: retq
381 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
383 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
384 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1],xmm1[0,1]
385 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
386 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
387 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
390 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
392 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
393 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
395 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
396 ret <16 x i16> %shuffle
399 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
400 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
402 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
403 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
404 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,3,0,4,5,6,7]
405 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1]
406 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
407 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
408 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
411 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
413 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
414 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
417 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
419 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,9,0]
420 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
421 ; AVX512VL-NEXT: retq
423 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
425 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
426 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1],xmm1[2,3],xmm0[0,1]
427 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
428 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
429 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
432 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
434 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
435 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
437 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
438 ret <16 x i16> %shuffle
441 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
442 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
444 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
445 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
446 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
447 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
448 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
449 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
452 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
454 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
455 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
458 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
460 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,10,0,0]
461 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
462 ; AVX512VL-NEXT: retq
464 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
466 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
467 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1],xmm1[4,5],xmm0[0,1,0,1]
468 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
469 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
470 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
473 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
475 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
476 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
478 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
479 ret <16 x i16> %shuffle
482 define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
483 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
485 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
486 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
487 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
488 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
489 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
490 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
493 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
495 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
496 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
499 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
501 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,11,0,0,0]
502 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
503 ; AVX512VL-NEXT: retq
505 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
507 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
508 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1],xmm1[6,7],xmm0[0,1,0,1,0,1]
509 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
510 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
511 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
514 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
516 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
517 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
519 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
520 ret <16 x i16> %shuffle
523 define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
524 ; AVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
526 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
527 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
528 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
529 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
530 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
531 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
534 ; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
536 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
537 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
540 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
542 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,12,0,0,0,0]
543 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
544 ; AVX512VL-NEXT: retq
546 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
548 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
549 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1],xmm1[8,9],xmm0[0,1,0,1,0,1,0,1]
550 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
551 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
552 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
555 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
557 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
558 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
560 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
561 ret <16 x i16> %shuffle
564 define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
565 ; AVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
567 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
568 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
569 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
570 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
571 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
572 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
575 ; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
577 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
578 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
581 ; AVX512VL-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
583 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,13,0,0,0,0,0]
584 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
585 ; AVX512VL-NEXT: retq
587 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
589 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
590 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1,0,1],xmm1[10,11],xmm0[0,1,0,1,0,1,0,1,0,1]
591 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
592 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
593 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
596 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
598 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
599 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
601 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
602 ret <16 x i16> %shuffle
605 define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
606 ; AVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
608 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
609 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
610 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
611 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
612 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
613 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
616 ; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
618 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
619 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
622 ; AVX512VL-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
624 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,14,0,0,0,0,0,0]
625 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
626 ; AVX512VL-NEXT: retq
628 ; XOPAVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
630 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
631 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1],xmm1[12,13],xmm0[0,1,0,1,0,1,0,1,0,1,0,1]
632 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
633 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
634 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
637 ; XOPAVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
639 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
640 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
642 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
643 ret <16 x i16> %shuffle
646 define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
647 ; AVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
649 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
650 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
651 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
652 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
653 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
654 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
657 ; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
659 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
660 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,8,9,8,9,8,9,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
663 ; AVX512VL-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
665 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0]
666 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
667 ; AVX512VL-NEXT: retq
669 ; XOPAVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
671 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
672 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[14,15],xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1]
673 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
674 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
675 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
678 ; XOPAVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
680 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
681 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,8,9,8,9,8,9,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
683 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
684 ret <16 x i16> %shuffle
687 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
688 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
690 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
691 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
692 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
693 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
694 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
697 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
698 ; AVX2-SLOW: # %bb.0:
699 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
700 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
701 ; AVX2-SLOW-NEXT: retq
703 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
704 ; AVX2-FAST: # %bb.0:
705 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
706 ; AVX2-FAST-NEXT: retq
708 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
709 ; AVX512VL-SLOW: # %bb.0:
710 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
711 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
712 ; AVX512VL-SLOW-NEXT: retq
714 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
715 ; AVX512VL-FAST: # %bb.0:
716 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
717 ; AVX512VL-FAST-NEXT: retq
719 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
721 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
722 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
723 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
724 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
725 ; XOPAVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
728 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
730 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
731 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
733 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
734 ret <16 x i16> %shuffle
737 define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
738 ; AVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
740 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,7,7,7]
741 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
742 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
743 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
744 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
747 ; AVX2-SLOW-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
748 ; AVX2-SLOW: # %bb.0:
749 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
750 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
751 ; AVX2-SLOW-NEXT: retq
753 ; AVX2-FAST-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
754 ; AVX2-FAST: # %bb.0:
755 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
756 ; AVX2-FAST-NEXT: retq
758 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
759 ; AVX512VL-SLOW: # %bb.0:
760 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
761 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
762 ; AVX512VL-SLOW-NEXT: retq
764 ; AVX512VL-FAST-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
765 ; AVX512VL-FAST: # %bb.0:
766 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
767 ; AVX512VL-FAST-NEXT: retq
769 ; XOPAVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
771 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,7,7,7]
772 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
773 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
774 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
775 ; XOPAVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
778 ; XOPAVX2-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
780 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
781 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
783 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
784 ret <16 x i16> %shuffle
787 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
788 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
790 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
791 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
792 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
793 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
794 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
795 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
798 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
799 ; AVX2-SLOW: # %bb.0:
800 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
801 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
802 ; AVX2-SLOW-NEXT: retq
804 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
805 ; AVX2-FAST: # %bb.0:
806 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
807 ; AVX2-FAST-NEXT: retq
809 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
810 ; AVX512VL-SLOW: # %bb.0:
811 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
812 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
813 ; AVX512VL-SLOW-NEXT: retq
815 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
816 ; AVX512VL-FAST: # %bb.0:
817 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
818 ; AVX512VL-FAST-NEXT: retq
820 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
822 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
823 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
824 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
825 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
826 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
827 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
830 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
832 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
833 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
835 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
836 ret <16 x i16> %shuffle
839 define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
840 ; AVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
842 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
843 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7]
844 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
845 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
846 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
847 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
850 ; AVX2-SLOW-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
851 ; AVX2-SLOW: # %bb.0:
852 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
853 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
854 ; AVX2-SLOW-NEXT: retq
856 ; AVX2-FAST-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
857 ; AVX2-FAST: # %bb.0:
858 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,6,7,6,7,6,7,14,15,14,15,14,15,14,15,22,23,22,23,22,23,22,23,30,31,30,31,30,31,30,31]
859 ; AVX2-FAST-NEXT: retq
861 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
862 ; AVX512VL-SLOW: # %bb.0:
863 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
864 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
865 ; AVX512VL-SLOW-NEXT: retq
867 ; AVX512VL-FAST-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
868 ; AVX512VL-FAST: # %bb.0:
869 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,6,7,6,7,6,7,14,15,14,15,14,15,14,15,22,23,22,23,22,23,22,23,30,31,30,31,30,31,30,31]
870 ; AVX512VL-FAST-NEXT: retq
872 ; XOPAVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
874 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
875 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7]
876 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
877 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
878 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
879 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
882 ; XOPAVX2-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
884 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
885 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
887 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15>
888 ret <16 x i16> %shuffle
891 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x i16> %a, <16 x i16> %b) {
892 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
894 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
895 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
896 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
897 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
898 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
899 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
902 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
903 ; AVX2-SLOW: # %bb.0:
904 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
905 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
906 ; AVX2-SLOW-NEXT: retq
908 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
909 ; AVX2-FAST: # %bb.0:
910 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,16,17,16,17,20,21,20,21,24,25,24,25,28,29,28,29]
911 ; AVX2-FAST-NEXT: retq
913 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
914 ; AVX512VL-SLOW: # %bb.0:
915 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
916 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
917 ; AVX512VL-SLOW-NEXT: retq
919 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
920 ; AVX512VL-FAST: # %bb.0:
921 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,16,17,16,17,20,21,20,21,24,25,24,25,28,29,28,29]
922 ; AVX512VL-FAST-NEXT: retq
924 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
926 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
927 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
928 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
929 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
930 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
931 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
934 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
936 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
937 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
939 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
940 ret <16 x i16> %shuffle
943 define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x i16> %a, <16 x i16> %b) {
944 ; AVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
946 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7]
947 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7]
948 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
949 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7]
950 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
951 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
954 ; AVX2-SLOW-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
955 ; AVX2-SLOW: # %bb.0:
956 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
957 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
958 ; AVX2-SLOW-NEXT: retq
960 ; AVX2-FAST-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
961 ; AVX2-FAST: # %bb.0:
962 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15,18,19,18,19,22,23,22,23,26,27,26,27,30,31,30,31]
963 ; AVX2-FAST-NEXT: retq
965 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
966 ; AVX512VL-SLOW: # %bb.0:
967 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
968 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
969 ; AVX512VL-SLOW-NEXT: retq
971 ; AVX512VL-FAST-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
972 ; AVX512VL-FAST: # %bb.0:
973 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15,18,19,18,19,22,23,22,23,26,27,26,27,30,31,30,31]
974 ; AVX512VL-FAST-NEXT: retq
976 ; XOPAVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
978 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7]
979 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7]
980 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
981 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7]
982 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
983 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
986 ; XOPAVX2-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
988 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
989 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
991 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
992 ret <16 x i16> %shuffle
995 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
996 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
998 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
999 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
1000 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1003 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
1004 ; AVX2-SLOW: # %bb.0:
1005 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
1006 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
1007 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1008 ; AVX2-SLOW-NEXT: retq
1010 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
1011 ; AVX2-FAST-ALL: # %bb.0:
1012 ; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
1013 ; AVX2-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,1,0,0,0,1]
1014 ; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
1015 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
1016 ; AVX2-FAST-ALL-NEXT: retq
1018 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
1019 ; AVX2-FAST-PERLANE: # %bb.0:
1020 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
1021 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1022 ; AVX2-FAST-PERLANE-NEXT: retq
1024 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
1025 ; AVX512VL: # %bb.0:
1026 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0]
1027 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1028 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1029 ; AVX512VL-NEXT: retq
1031 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
1033 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
1034 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
1035 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1036 ; XOPAVX1-NEXT: retq
1038 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
1040 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
1041 ; XOPAVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
1042 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1043 ; XOPAVX2-NEXT: retq
1044 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
1045 ret <16 x i16> %shuffle
1048 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
1049 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1051 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1052 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1053 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1056 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1057 ; AVX2-SLOW: # %bb.0:
1058 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1059 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1060 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1061 ; AVX2-SLOW-NEXT: retq
1063 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1064 ; AVX2-FAST-ALL: # %bb.0:
1065 ; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1066 ; AVX2-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0]
1067 ; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
1068 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
1069 ; AVX2-FAST-ALL-NEXT: retq
1071 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1072 ; AVX2-FAST-PERLANE: # %bb.0:
1073 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
1074 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1075 ; AVX2-FAST-PERLANE-NEXT: retq
1077 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1078 ; AVX512VL: # %bb.0:
1079 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0]
1080 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1081 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1082 ; AVX512VL-NEXT: retq
1084 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1086 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1087 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1088 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1089 ; XOPAVX1-NEXT: retq
1091 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1093 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1094 ; XOPAVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1095 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1096 ; XOPAVX2-NEXT: retq
1097 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
1098 ret <16 x i16> %shuffle
1101 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
1102 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1104 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1105 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1106 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1109 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1110 ; AVX2-SLOW: # %bb.0:
1111 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1112 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1113 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1114 ; AVX2-SLOW-NEXT: retq
1116 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1117 ; AVX2-FAST-ALL: # %bb.0:
1118 ; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1119 ; AVX2-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0]
1120 ; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
1121 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
1122 ; AVX2-FAST-ALL-NEXT: retq
1124 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1125 ; AVX2-FAST-PERLANE: # %bb.0:
1126 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
1127 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1128 ; AVX2-FAST-PERLANE-NEXT: retq
1130 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1131 ; AVX512VL: # %bb.0:
1132 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0]
1133 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1134 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1135 ; AVX512VL-NEXT: retq
1137 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1139 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1140 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1141 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1142 ; XOPAVX1-NEXT: retq
1144 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1146 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1147 ; XOPAVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1148 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1149 ; XOPAVX2-NEXT: retq
1150 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
1151 ret <16 x i16> %shuffle
1154 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
1155 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
1157 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1158 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1161 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
1163 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1164 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1167 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
1168 ; AVX512VL: # %bb.0:
1169 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0,0,0,0,4,0,0,0,0]
1170 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1171 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1172 ; AVX512VL-NEXT: retq
1174 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
1176 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1177 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1178 ; XOPAVX1-NEXT: retq
1180 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
1182 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1183 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1184 ; XOPAVX2-NEXT: retq
1185 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
1186 ret <16 x i16> %shuffle
1189 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
1190 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
1192 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1193 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1196 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
1198 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1199 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1202 ; AVX512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
1203 ; AVX512VL: # %bb.0:
1204 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0,0,0,5,0,0,0,0,0]
1205 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1206 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1207 ; AVX512VL-NEXT: retq
1209 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
1211 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1212 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1213 ; XOPAVX1-NEXT: retq
1215 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
1217 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1218 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1219 ; XOPAVX2-NEXT: retq
1220 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
1221 ret <16 x i16> %shuffle
1224 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
1225 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
1227 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1228 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1231 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
1233 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1234 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1237 ; AVX512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
1238 ; AVX512VL: # %bb.0:
1239 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
1240 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1241 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1242 ; AVX512VL-NEXT: retq
1244 ; XOPAVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
1246 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1247 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1248 ; XOPAVX1-NEXT: retq
1250 ; XOPAVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
1252 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1253 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1254 ; XOPAVX2-NEXT: retq
1255 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1256 ret <16 x i16> %shuffle
1259 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
1260 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
1262 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1263 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1266 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
1268 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1269 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1272 ; AVX512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
1273 ; AVX512VL: # %bb.0:
1274 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [7,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
1275 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1276 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1277 ; AVX512VL-NEXT: retq
1279 ; XOPAVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
1281 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1282 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1283 ; XOPAVX1-NEXT: retq
1285 ; XOPAVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
1287 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1288 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1289 ; XOPAVX2-NEXT: retq
1290 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1291 ret <16 x i16> %shuffle
1294 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
1295 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
1297 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
1298 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
1299 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1300 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1303 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
1304 ; AVX2OR512VL: # %bb.0:
1305 ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
1306 ; AVX2OR512VL-NEXT: retq
1308 ; XOPAVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
1310 ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
1311 ; XOPAVX1-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
1312 ; XOPAVX1-NEXT: retq
1314 ; XOPAVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
1316 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
1317 ; XOPAVX2-NEXT: retq
1318 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1319 ret <16 x i16> %shuffle
1322 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
1323 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
1325 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
1326 ; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0
1327 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1328 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
1331 ; AVX2OR512VL-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
1332 ; AVX2OR512VL: # %bb.0:
1333 ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
1334 ; AVX2OR512VL-NEXT: retq
1336 ; XOPAVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
1338 ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
1339 ; XOPAVX1-NEXT: vpcmov %ymm2, %ymm0, %ymm1, %ymm0
1340 ; XOPAVX1-NEXT: retq
1342 ; XOPAVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
1344 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
1345 ; XOPAVX2-NEXT: retq
1346 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
1347 ret <16 x i16> %shuffle
1350 define <16 x i16> @shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
1351 ; ALL-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31:
1353 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1355 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
1356 ret <16 x i16> %shuffle
1359 define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
1360 ; ALL-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
1362 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1364 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15>
1365 ret <16 x i16> %shuffle
1368 define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31(<16 x i16> %a, <16 x i16> %b) {
1369 ; AVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
1371 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,0]
1372 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
1373 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1374 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1377 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
1378 ; AVX2OR512VL: # %bb.0:
1379 ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
1380 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1381 ; AVX2OR512VL-NEXT: retq
1383 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
1385 ; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0
1386 ; XOPAVX1-NEXT: retq
1388 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
1390 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
1391 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1392 ; XOPAVX2-NEXT: retq
1393 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
1394 ret <16 x i16> %shuffle
1397 define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
1398 ; AVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
1400 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [0,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535]
1401 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
1402 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1403 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1406 ; AVX2OR512VL-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
1407 ; AVX2OR512VL: # %bb.0:
1408 ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
1409 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1410 ; AVX2OR512VL-NEXT: retq
1412 ; XOPAVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
1414 ; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0
1415 ; XOPAVX1-NEXT: retq
1417 ; XOPAVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
1419 ; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
1420 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1421 ; XOPAVX2-NEXT: retq
1422 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1423 ret <16 x i16> %shuffle
1426 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
1427 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
1429 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,0,65535,0,65535,0,65535,0,65535]
1430 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
1431 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1432 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1435 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
1437 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
1438 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1441 ; AVX512VL-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
1442 ; AVX512VL: # %bb.0:
1443 ; AVX512VL-NEXT: movw $21930, %ax # imm = 0x55AA
1444 ; AVX512VL-NEXT: kmovd %eax, %k1
1445 ; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1}
1446 ; AVX512VL-NEXT: retq
1448 ; XOPAVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
1450 ; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0
1451 ; XOPAVX1-NEXT: retq
1453 ; XOPAVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
1455 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
1456 ; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1457 ; XOPAVX2-NEXT: retq
1458 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
1459 ret <16 x i16> %shuffle
1462 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
1463 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
1465 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [0,65535,0,65535,0,65535,0,65535,65535,0,65535,0,65535,0,65535,0]
1466 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
1467 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1468 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1471 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
1473 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0]
1474 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1477 ; AVX512VL-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
1478 ; AVX512VL: # %bb.0:
1479 ; AVX512VL-NEXT: movw $-21931, %ax # imm = 0xAA55
1480 ; AVX512VL-NEXT: kmovd %eax, %k1
1481 ; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1}
1482 ; AVX512VL-NEXT: retq
1484 ; XOPAVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
1486 ; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0
1487 ; XOPAVX1-NEXT: retq
1489 ; XOPAVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
1491 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0]
1492 ; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1493 ; XOPAVX2-NEXT: retq
1494 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1495 ret <16 x i16> %shuffle
1498 define <16 x i16> @shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
1499 ; ALL-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31:
1501 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
1503 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
1504 ret <16 x i16> %shuffle
1507 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) {
1508 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
1510 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1511 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1512 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1515 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
1516 ; AVX2OR512VL: # %bb.0:
1517 ; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1518 ; AVX2OR512VL-NEXT: vpbroadcastd %xmm0, %ymm0
1519 ; AVX2OR512VL-NEXT: retq
1521 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
1523 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1524 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1525 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1526 ; XOPAVX1-NEXT: retq
1528 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
1530 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1531 ; XOPAVX2-NEXT: vpbroadcastd %xmm0, %ymm0
1532 ; XOPAVX2-NEXT: retq
1533 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16>
1534 ret <16 x i16> %shuffle
1537 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) {
1538 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
1540 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1541 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1542 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1543 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1544 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1545 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
1548 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
1550 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
1551 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
1554 ; AVX512VL-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
1555 ; AVX512VL: # %bb.0:
1556 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,0,16,0,16,0,16,8,24,8,24,8,24,8,24]
1557 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
1558 ; AVX512VL-NEXT: retq
1560 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
1562 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1563 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1564 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1565 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1566 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1567 ; XOPAVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
1568 ; XOPAVX1-NEXT: retq
1570 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
1572 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
1573 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
1574 ; XOPAVX2-NEXT: retq
1575 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24>
1576 ret <16 x i16> %shuffle
1579 define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
1580 ; AVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
1582 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1583 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1584 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
1585 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1586 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1587 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1588 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1591 ; AVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
1593 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1594 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1597 ; AVX512VL-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
1598 ; AVX512VL: # %bb.0:
1599 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,20,21,22,23,8,8,8,8,28,29,30,31]
1600 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
1601 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
1602 ; AVX512VL-NEXT: retq
1604 ; XOPAVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
1606 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1607 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1608 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,0,1,0,1,0,1,24,25,26,27,28,29,30,31]
1609 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
1610 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0
1611 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1612 ; XOPAVX1-NEXT: retq
1614 ; XOPAVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
1616 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1617 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1618 ; XOPAVX2-NEXT: retq
1619 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15>
1620 ret <16 x i16> %shuffle
1623 define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12(<16 x i16> %a, <16 x i16> %b) {
1624 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
1626 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1627 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
1628 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1629 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
1630 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1631 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
1632 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
1633 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1634 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1637 ; AVX2-SLOW-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
1638 ; AVX2-SLOW: # %bb.0:
1639 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1640 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
1641 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
1642 ; AVX2-SLOW-NEXT: retq
1644 ; AVX2-FAST-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
1645 ; AVX2-FAST: # %bb.0:
1646 ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1647 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,14,15,12,13,10,11,8,9,22,23,20,21,18,19,16,17,30,31,28,29,26,27,24,25]
1648 ; AVX2-FAST-NEXT: retq
1650 ; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
1651 ; AVX512VL: # %bb.0:
1652 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,23,22,21,20,11,10,9,8,31,30,29,28]
1653 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
1654 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
1655 ; AVX512VL-NEXT: retq
1657 ; XOPAVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
1659 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1660 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1661 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [6,7,4,5,2,3,0,1,30,31,28,29,26,27,24,25]
1662 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
1663 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0
1664 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1665 ; XOPAVX1-NEXT: retq
1667 ; XOPAVX2-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
1669 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1670 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
1671 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
1672 ; XOPAVX2-NEXT: retq
1673 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12>
1674 ret <16 x i16> %shuffle
1677 define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08(<16 x i16> %a, <16 x i16> %b) {
1678 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
1680 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1681 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1682 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1683 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [12,13,8,9,4,5,0,1,14,15,10,11,6,7,2,3]
1684 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1685 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1686 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1687 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1690 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
1692 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
1693 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
1694 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1697 ; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
1698 ; AVX512VL: # %bb.0:
1699 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,19,18,17,16,11,10,9,8,27,26,25,24]
1700 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
1701 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
1702 ; AVX512VL-NEXT: retq
1704 ; XOPAVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
1706 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1707 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1708 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [6,7,4,5,2,3,0,1,22,23,20,21,18,19,16,17]
1709 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
1710 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0
1711 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1712 ; XOPAVX1-NEXT: retq
1714 ; XOPAVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
1716 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
1717 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
1718 ; XOPAVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1719 ; XOPAVX2-NEXT: retq
1720 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8>
1721 ret <16 x i16> %shuffle
1724 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08(<16 x i16> %a, <16 x i16> %b) {
1725 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1727 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,0,4,5,6,7]
1728 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1729 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
1730 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1731 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1734 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1735 ; AVX2-SLOW: # %bb.0:
1736 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,1,0,4,5,6,7,8,8,9,8,12,13,14,15]
1737 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1738 ; AVX2-SLOW-NEXT: retq
1740 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1741 ; AVX2-FAST: # %bb.0:
1742 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
1743 ; AVX2-FAST-NEXT: retq
1745 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1746 ; AVX512VL-SLOW: # %bb.0:
1747 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,1,0,4,5,6,7,8,8,9,8,12,13,14,15]
1748 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1749 ; AVX512VL-SLOW-NEXT: retq
1751 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1752 ; AVX512VL-FAST: # %bb.0:
1753 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
1754 ; AVX512VL-FAST-NEXT: retq
1756 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1758 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,0,4,5,6,7]
1759 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1760 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
1761 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1762 ; XOPAVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1763 ; XOPAVX1-NEXT: retq
1765 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1767 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,1,0,4,5,6,7,8,8,9,8,12,13,14,15]
1768 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1769 ; XOPAVX2-NEXT: retq
1770 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8>
1771 ret <16 x i16> %shuffle
1774 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08(<16 x i16> %a, <16 x i16> %b) {
1775 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1777 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,2,4,5,6,7]
1778 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1779 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1780 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1781 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1784 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1785 ; AVX2-SLOW: # %bb.0:
1786 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,2,4,5,6,7,8,8,8,10,12,13,14,15]
1787 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1788 ; AVX2-SLOW-NEXT: retq
1790 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1791 ; AVX2-FAST: # %bb.0:
1792 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
1793 ; AVX2-FAST-NEXT: retq
1795 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1796 ; AVX512VL-SLOW: # %bb.0:
1797 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,2,4,5,6,7,8,8,8,10,12,13,14,15]
1798 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1799 ; AVX512VL-SLOW-NEXT: retq
1801 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1802 ; AVX512VL-FAST: # %bb.0:
1803 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
1804 ; AVX512VL-FAST-NEXT: retq
1806 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1808 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,2,4,5,6,7]
1809 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1810 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1811 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1812 ; XOPAVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1813 ; XOPAVX1-NEXT: retq
1815 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1817 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,2,4,5,6,7,8,8,8,10,12,13,14,15]
1818 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1819 ; XOPAVX2-NEXT: retq
1820 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8>
1821 ret <16 x i16> %shuffle
1824 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1825 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1827 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,3,0,4,5,6,7]
1828 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1829 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1830 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1831 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1834 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1835 ; AVX2-SLOW: # %bb.0:
1836 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,0,4,5,6,7,8,8,11,8,12,13,14,15]
1837 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1838 ; AVX2-SLOW-NEXT: retq
1840 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1841 ; AVX2-FAST: # %bb.0:
1842 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
1843 ; AVX2-FAST-NEXT: retq
1845 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1846 ; AVX512VL-SLOW: # %bb.0:
1847 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,0,4,5,6,7,8,8,11,8,12,13,14,15]
1848 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1849 ; AVX512VL-SLOW-NEXT: retq
1851 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1852 ; AVX512VL-FAST: # %bb.0:
1853 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
1854 ; AVX512VL-FAST-NEXT: retq
1856 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1858 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,3,0,4,5,6,7]
1859 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1860 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1861 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1862 ; XOPAVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1863 ; XOPAVX1-NEXT: retq
1865 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1867 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,0,4,5,6,7,8,8,11,8,12,13,14,15]
1868 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1869 ; XOPAVX2-NEXT: retq
1870 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8>
1871 ret <16 x i16> %shuffle
1874 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1875 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
1877 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1878 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1879 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1880 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1881 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1884 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
1885 ; AVX2OR512VL: # %bb.0:
1886 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17]
1887 ; AVX2OR512VL-NEXT: retq
1889 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
1891 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1892 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1893 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1894 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1895 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1896 ; XOPAVX1-NEXT: retq
1898 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
1900 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17]
1901 ; XOPAVX2-NEXT: retq
1902 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8>
1903 ret <16 x i16> %shuffle
1906 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1907 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
1909 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1910 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1911 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1912 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1913 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1916 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
1917 ; AVX2OR512VL: # %bb.0:
1918 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17]
1919 ; AVX2OR512VL-NEXT: retq
1921 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
1923 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1924 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1925 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1926 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1927 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1928 ; XOPAVX1-NEXT: retq
1930 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
1932 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17]
1933 ; XOPAVX2-NEXT: retq
1934 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8>
1935 ret <16 x i16> %shuffle
1938 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1939 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
1941 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1942 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1943 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1944 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1945 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1948 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
1949 ; AVX2OR512VL: # %bb.0:
1950 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17]
1951 ; AVX2OR512VL-NEXT: retq
1953 ; XOPAVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
1955 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1956 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1957 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1958 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1959 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1960 ; XOPAVX1-NEXT: retq
1962 ; XOPAVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
1964 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17]
1965 ; XOPAVX2-NEXT: retq
1966 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1967 ret <16 x i16> %shuffle
1970 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1971 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
1973 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1974 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1975 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1976 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1977 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1980 ; AVX2OR512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
1981 ; AVX2OR512VL: # %bb.0:
1982 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
1983 ; AVX2OR512VL-NEXT: retq
1985 ; XOPAVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
1987 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1988 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1989 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1990 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1991 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1992 ; XOPAVX1-NEXT: retq
1994 ; XOPAVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
1996 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
1997 ; XOPAVX2-NEXT: retq
1998 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1999 ret <16 x i16> %shuffle
2002 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
2003 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
2005 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2006 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2007 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2008 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2009 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2012 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
2013 ; AVX2OR512VL: # %bb.0:
2014 ; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
2015 ; AVX2OR512VL-NEXT: retq
2017 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
2019 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2020 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2021 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2022 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2023 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2024 ; XOPAVX1-NEXT: retq
2026 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
2028 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
2029 ; XOPAVX2-NEXT: retq
2030 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
2031 ret <16 x i16> %shuffle
2034 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
2035 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
2037 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2038 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2039 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2040 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2041 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2044 ; AVX2OR512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
2045 ; AVX2OR512VL: # %bb.0:
2046 ; AVX2OR512VL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
2047 ; AVX2OR512VL-NEXT: retq
2049 ; XOPAVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
2051 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2052 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2053 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2054 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2055 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2056 ; XOPAVX1-NEXT: retq
2058 ; XOPAVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
2060 ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
2061 ; XOPAVX2-NEXT: retq
2062 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
2063 ret <16 x i16> %shuffle
2066 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
2067 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
2069 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2070 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2071 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2072 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2073 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2076 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
2078 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3]
2079 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3]
2080 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
2083 ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
2084 ; AVX512VL: # %bb.0:
2085 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,12,28,13,29,14,30,15,31]
2086 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2087 ; AVX512VL-NEXT: retq
2089 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
2091 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2092 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2093 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2094 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2095 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2096 ; XOPAVX1-NEXT: retq
2098 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
2100 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3]
2101 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3]
2102 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
2103 ; XOPAVX2-NEXT: retq
2104 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
2105 ret <16 x i16> %shuffle
2108 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
2109 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
2111 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2112 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2113 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2114 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2115 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2118 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
2120 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3]
2121 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3]
2122 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
2125 ; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
2126 ; AVX512VL: # %bb.0:
2127 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,20,5,21,6,22,7,23,8,24,9,25,10,26,11,27]
2128 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2129 ; AVX512VL-NEXT: retq
2131 ; XOPAVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
2133 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2134 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2135 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2136 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2137 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2138 ; XOPAVX1-NEXT: retq
2140 ; XOPAVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
2142 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3]
2143 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3]
2144 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
2145 ; XOPAVX2-NEXT: retq
2146 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
2147 ret <16 x i16> %shuffle
2150 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
2151 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
2153 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,0,4,5,6,7]
2154 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1]
2155 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2156 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,0,0,4,5,6,7]
2157 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,1]
2158 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2161 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
2162 ; AVX2OR512VL: # %bb.0:
2163 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17]
2164 ; AVX2OR512VL-NEXT: retq
2166 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
2168 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,0,4,5,6,7]
2169 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1]
2170 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2171 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,0,0,4,5,6,7]
2172 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,1]
2173 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2174 ; XOPAVX1-NEXT: retq
2176 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
2178 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17]
2179 ; XOPAVX2-NEXT: retq
2180 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 9, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
2181 ret <16 x i16> %shuffle
2184 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
2185 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
2187 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,2,4,5,6,7]
2188 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0]
2189 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2190 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,0,4,5,6,7]
2191 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
2192 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2195 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
2196 ; AVX2OR512VL: # %bb.0:
2197 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17]
2198 ; AVX2OR512VL-NEXT: retq
2200 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
2202 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,2,4,5,6,7]
2203 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0]
2204 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2205 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,0,4,5,6,7]
2206 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
2207 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2208 ; XOPAVX1-NEXT: retq
2210 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
2212 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17]
2213 ; XOPAVX2-NEXT: retq
2214 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 10, i32 8, i32 8, i32 8, i32 8, i32 8>
2215 ret <16 x i16> %shuffle
2218 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
2219 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
2221 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,3,0,4,5,6,7]
2222 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0]
2223 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2224 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7]
2225 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
2226 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2229 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
2230 ; AVX2OR512VL: # %bb.0:
2231 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17]
2232 ; AVX2OR512VL-NEXT: retq
2234 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
2236 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,3,0,4,5,6,7]
2237 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0]
2238 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2239 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7]
2240 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
2241 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2242 ; XOPAVX1-NEXT: retq
2244 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
2246 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17]
2247 ; XOPAVX2-NEXT: retq
2248 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8, i32 8>
2249 ret <16 x i16> %shuffle
2252 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08(<16 x i16> %a, <16 x i16> %b) {
2253 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
2255 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
2256 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2257 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1]
2258 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2261 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
2262 ; AVX2OR512VL: # %bb.0:
2263 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17]
2264 ; AVX2OR512VL-NEXT: retq
2266 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
2268 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
2269 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2270 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1]
2271 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2272 ; XOPAVX1-NEXT: retq
2274 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
2276 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17]
2277 ; XOPAVX2-NEXT: retq
2278 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8>
2279 ret <16 x i16> %shuffle
2282 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08(<16 x i16> %a, <16 x i16> %b) {
2283 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
2285 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
2286 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2287 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
2288 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2291 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
2292 ; AVX2OR512VL: # %bb.0:
2293 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17]
2294 ; AVX2OR512VL-NEXT: retq
2296 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
2298 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
2299 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2300 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
2301 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2302 ; XOPAVX1-NEXT: retq
2304 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
2306 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17]
2307 ; XOPAVX2-NEXT: retq
2308 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 13, i32 8, i32 8>
2309 ret <16 x i16> %shuffle
2312 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
2313 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
2315 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
2316 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2317 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
2318 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2321 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
2322 ; AVX2OR512VL: # %bb.0:
2323 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17]
2324 ; AVX2OR512VL-NEXT: retq
2326 ; XOPAVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
2328 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
2329 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2330 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
2331 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2332 ; XOPAVX1-NEXT: retq
2334 ; XOPAVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
2336 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17]
2337 ; XOPAVX2-NEXT: retq
2338 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 14, i32 8>
2339 ret <16 x i16> %shuffle
2342 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
2343 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
2345 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2346 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2347 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
2348 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2351 ; AVX2OR512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
2352 ; AVX2OR512VL: # %bb.0:
2353 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31]
2354 ; AVX2OR512VL-NEXT: retq
2356 ; XOPAVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
2358 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2359 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2360 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
2361 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2362 ; XOPAVX1-NEXT: retq
2364 ; XOPAVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
2366 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31]
2367 ; XOPAVX2-NEXT: retq
2368 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 15>
2369 ret <16 x i16> %shuffle
2372 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
2373 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
2375 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
2376 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
2377 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2378 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1]
2379 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2382 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
2383 ; AVX2OR512VL: # %bb.0:
2384 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17]
2385 ; AVX2OR512VL-NEXT: retq
2387 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
2389 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
2390 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
2391 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2392 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1]
2393 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2394 ; XOPAVX1-NEXT: retq
2396 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
2398 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17]
2399 ; XOPAVX2-NEXT: retq
2400 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8>
2401 ret <16 x i16> %shuffle
2404 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
2405 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
2407 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
2408 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2409 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2410 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2411 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2414 ; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
2415 ; AVX2OR512VL: # %bb.0:
2416 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
2417 ; AVX2OR512VL-NEXT: retq
2419 ; XOPAVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
2421 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
2422 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2423 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2424 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2425 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2426 ; XOPAVX1-NEXT: retq
2428 ; XOPAVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
2430 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
2431 ; XOPAVX2-NEXT: retq
2432 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
2433 ret <16 x i16> %shuffle
2436 define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
2437 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
2439 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
2440 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
2441 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2442 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
2443 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2446 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
2447 ; AVX2OR512VL: # %bb.0:
2448 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17]
2449 ; AVX2OR512VL-NEXT: retq
2451 ; XOPAVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
2453 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
2454 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
2455 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2456 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
2457 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2458 ; XOPAVX1-NEXT: retq
2460 ; XOPAVX2-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
2462 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17]
2463 ; XOPAVX2-NEXT: retq
2464 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 14, i32 8>
2465 ret <16 x i16> %shuffle
2468 define <16 x i16> @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
2469 ; AVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
2471 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
2472 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2473 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
2474 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2477 ; AVX2OR512VL-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
2478 ; AVX2OR512VL: # %bb.0:
2479 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31]
2480 ; AVX2OR512VL-NEXT: retq
2482 ; XOPAVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
2484 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
2485 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2486 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
2487 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2488 ; XOPAVX1-NEXT: retq
2490 ; XOPAVX2-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
2492 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31]
2493 ; XOPAVX2-NEXT: retq
2494 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 15>
2495 ret <16 x i16> %shuffle
2498 define <16 x i16> @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
2499 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
2501 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7]
2502 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
2503 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2504 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1]
2505 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2508 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
2509 ; AVX2OR512VL: # %bb.0:
2510 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17]
2511 ; AVX2OR512VL-NEXT: retq
2513 ; XOPAVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
2515 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7]
2516 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
2517 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2518 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1]
2519 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2520 ; XOPAVX1-NEXT: retq
2522 ; XOPAVX2-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
2524 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17]
2525 ; XOPAVX2-NEXT: retq
2526 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 2, i32 4, i32 4, i32 undef, i32 6, i32 14, i32 14, i32 undef, i32 12, i32 10, i32 10, i32 8, i32 8>
2527 ret <16 x i16> %shuffle
2530 define <16 x i16> @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12(<16 x i16> %a, <16 x i16> %b) {
2531 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
2533 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,4,4]
2534 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,2,2]
2535 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2536 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2537 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2538 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2541 ; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
2542 ; AVX2OR512VL: # %bb.0:
2543 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25]
2544 ; AVX2OR512VL-NEXT: retq
2546 ; XOPAVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
2548 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,4,4]
2549 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,2,2]
2550 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2551 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2552 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2553 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2554 ; XOPAVX1-NEXT: retq
2556 ; XOPAVX2-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
2558 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25]
2559 ; XOPAVX2-NEXT: retq
2560 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 12, i32 12>
2561 ret <16 x i16> %shuffle
2564 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
2565 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
2567 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2568 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2569 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2570 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2571 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2574 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
2575 ; AVX2-SLOW: # %bb.0:
2576 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2577 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2578 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2579 ; AVX2-SLOW-NEXT: retq
2581 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
2582 ; AVX2-FAST: # %bb.0:
2583 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2584 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
2585 ; AVX2-FAST-NEXT: retq
2587 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
2588 ; AVX512VL: # %bb.0:
2589 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,16,16,16,16,20,20,20,20]
2590 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2591 ; AVX512VL-NEXT: retq
2593 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
2595 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2596 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2597 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2598 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2599 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2600 ; XOPAVX1-NEXT: retq
2602 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
2604 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2605 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2606 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2607 ; XOPAVX2-NEXT: retq
2608 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
2609 ret <16 x i16> %shuffle
2612 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
2613 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
2615 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2616 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2617 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2618 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2619 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2620 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2623 ; AVX2-SLOW-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
2624 ; AVX2-SLOW: # %bb.0:
2625 ; AVX2-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
2626 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2627 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2628 ; AVX2-SLOW-NEXT: retq
2630 ; AVX2-FAST-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
2631 ; AVX2-FAST: # %bb.0:
2632 ; AVX2-FAST-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
2633 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
2634 ; AVX2-FAST-NEXT: retq
2636 ; AVX512VL-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
2637 ; AVX512VL: # %bb.0:
2638 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20]
2639 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2640 ; AVX512VL-NEXT: retq
2642 ; XOPAVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
2644 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2645 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2646 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2647 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2648 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2649 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2650 ; XOPAVX1-NEXT: retq
2652 ; XOPAVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
2654 ; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
2655 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2656 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2657 ; XOPAVX2-NEXT: retq
2658 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
2659 ret <16 x i16> %shuffle
2662 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
2663 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
2665 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2666 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2667 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2668 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2669 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2670 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2671 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2674 ; AVX2-SLOW-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
2675 ; AVX2-SLOW: # %bb.0:
2676 ; AVX2-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2677 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2678 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2679 ; AVX2-SLOW-NEXT: retq
2681 ; AVX2-FAST-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
2682 ; AVX2-FAST: # %bb.0:
2683 ; AVX2-FAST-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2684 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
2685 ; AVX2-FAST-NEXT: retq
2687 ; AVX512VL-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
2688 ; AVX512VL: # %bb.0:
2689 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,24,24,24,24,28,28,28,28]
2690 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2691 ; AVX512VL-NEXT: retq
2693 ; XOPAVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
2695 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2696 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2697 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2698 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2699 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2700 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2701 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2702 ; XOPAVX1-NEXT: retq
2704 ; XOPAVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
2706 ; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2707 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2708 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2709 ; XOPAVX2-NEXT: retq
2710 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
2711 ret <16 x i16> %shuffle
2714 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
2715 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
2717 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2718 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2719 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2720 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2721 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2722 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2725 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
2726 ; AVX2-SLOW: # %bb.0:
2727 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2728 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2729 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2730 ; AVX2-SLOW-NEXT: retq
2732 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
2733 ; AVX2-FAST: # %bb.0:
2734 ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2735 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
2736 ; AVX2-FAST-NEXT: retq
2738 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
2739 ; AVX512VL: # %bb.0:
2740 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,24,24,24,24,28,28,28,28]
2741 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2742 ; AVX512VL-NEXT: retq
2744 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
2746 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2747 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2748 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2749 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2750 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2751 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2752 ; XOPAVX1-NEXT: retq
2754 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
2756 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2757 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2758 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2759 ; XOPAVX2-NEXT: retq
2760 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
2761 ret <16 x i16> %shuffle
2764 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i16> %a, <16 x i16> %b) {
2765 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
2767 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2768 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2769 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2772 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
2774 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2775 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2776 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
2779 ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
2780 ; AVX512VL: # %bb.0:
2781 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23]
2782 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2783 ; AVX512VL-NEXT: retq
2785 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
2787 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2788 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2789 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2790 ; XOPAVX1-NEXT: retq
2792 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
2794 ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2795 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2796 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
2797 ; XOPAVX2-NEXT: retq
2798 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
2799 ret <16 x i16> %shuffle
2802 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
2803 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
2805 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
2806 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2807 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
2808 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2811 ; AVX2OR512VL-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
2812 ; AVX2OR512VL: # %bb.0:
2813 ; AVX2OR512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
2814 ; AVX2OR512VL-NEXT: retq
2816 ; XOPAVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
2818 ; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
2819 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2820 ; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
2821 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2822 ; XOPAVX1-NEXT: retq
2824 ; XOPAVX2-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
2826 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
2827 ; XOPAVX2-NEXT: retq
2828 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
2829 ret <16 x i16> %shuffle
2832 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz(<16 x i16> %a) {
2833 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
2835 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
2836 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2837 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
2838 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2841 ; AVX2OR512VL-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
2842 ; AVX2OR512VL: # %bb.0:
2843 ; AVX2OR512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
2844 ; AVX2OR512VL-NEXT: retq
2846 ; XOPAVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
2848 ; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
2849 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2850 ; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
2851 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2852 ; XOPAVX1-NEXT: retq
2854 ; XOPAVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
2856 ; XOPAVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
2857 ; XOPAVX2-NEXT: retq
2858 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
2859 ret <16 x i16> %shuffle
2862 define <16 x i16> @shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13(<16 x i16> %a) {
2863 ; AVX1-LABEL: shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13:
2865 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2866 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,14,15,2,3,4,5,14,15,0,1,8,9,10,11]
2867 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2868 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2869 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2872 ; AVX2OR512VL-LABEL: shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13:
2873 ; AVX2OR512VL: # %bb.0:
2874 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,14,15,2,3,4,5,14,15,0,1,8,9,10,11,28,29,30,31,18,19,20,21,30,31,16,17,24,25,26,27]
2875 ; AVX2OR512VL-NEXT: retq
2877 ; XOPAVX1-LABEL: shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13:
2879 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2880 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,14,15,2,3,4,5,14,15,0,1,8,9,10,11]
2881 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2882 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2883 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2884 ; XOPAVX1-NEXT: retq
2886 ; XOPAVX2-LABEL: shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13:
2888 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,14,15,2,3,4,5,14,15,0,1,8,9,10,11,28,29,30,31,18,19,20,21,30,31,16,17,24,25,26,27]
2889 ; XOPAVX2-NEXT: retq
2890 %1 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 6, i32 7, i32 1, i32 2, i32 7, i32 0, i32 4, i32 5, i32 14, i32 15, i32 9, i32 10, i32 15, i32 8, i32 12, i32 13>
2895 ; Shuffle to logical bit shifts
2898 define <16 x i16> @shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i16> %a) {
2899 ; AVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
2901 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm1
2902 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2903 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm0
2904 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2907 ; AVX2OR512VL-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
2908 ; AVX2OR512VL: # %bb.0:
2909 ; AVX2OR512VL-NEXT: vpslld $16, %ymm0, %ymm0
2910 ; AVX2OR512VL-NEXT: retq
2912 ; XOPAVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
2914 ; XOPAVX1-NEXT: vpslld $16, %xmm0, %xmm1
2915 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2916 ; XOPAVX1-NEXT: vpslld $16, %xmm0, %xmm0
2917 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2918 ; XOPAVX1-NEXT: retq
2920 ; XOPAVX2-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
2922 ; XOPAVX2-NEXT: vpslld $16, %ymm0, %ymm0
2923 ; XOPAVX2-NEXT: retq
2924 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
2925 ret <16 x i16> %shuffle
2928 define <16 x i16> @shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i16> %a) {
2929 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
2931 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm1
2932 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2933 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0
2934 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2937 ; AVX2OR512VL-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
2938 ; AVX2OR512VL: # %bb.0:
2939 ; AVX2OR512VL-NEXT: vpsllq $48, %ymm0, %ymm0
2940 ; AVX2OR512VL-NEXT: retq
2942 ; XOPAVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
2944 ; XOPAVX1-NEXT: vpsllq $48, %xmm0, %xmm1
2945 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2946 ; XOPAVX1-NEXT: vpsllq $48, %xmm0, %xmm0
2947 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2948 ; XOPAVX1-NEXT: retq
2950 ; XOPAVX2-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
2952 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
2953 ; XOPAVX2-NEXT: retq
2954 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
2955 ret <16 x i16> %shuffle
2958 define <16 x i16> @shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz(<16 x i16> %a) {
2959 ; AVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
2961 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
2962 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2963 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
2964 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2967 ; AVX2OR512VL-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
2968 ; AVX2OR512VL: # %bb.0:
2969 ; AVX2OR512VL-NEXT: vpsrld $16, %ymm0, %ymm0
2970 ; AVX2OR512VL-NEXT: retq
2972 ; XOPAVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
2974 ; XOPAVX1-NEXT: vpsrld $16, %xmm0, %xmm1
2975 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2976 ; XOPAVX1-NEXT: vpsrld $16, %xmm0, %xmm0
2977 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2978 ; XOPAVX1-NEXT: retq
2980 ; XOPAVX2-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
2982 ; XOPAVX2-NEXT: vpsrld $16, %ymm0, %ymm0
2983 ; XOPAVX2-NEXT: retq
2984 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
2985 ret <16 x i16> %shuffle
2988 define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {
2989 ; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
2991 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
2992 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
2993 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
2996 ; AVX2OR512VL-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
2997 ; AVX2OR512VL: # %bb.0:
2998 ; AVX2OR512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
2999 ; AVX2OR512VL-NEXT: retq
3001 ; XOPAVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
3003 ; XOPAVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
3004 ; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
3005 ; XOPAVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
3006 ; XOPAVX1-NEXT: retq
3008 ; XOPAVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
3010 ; XOPAVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
3011 ; XOPAVX2-NEXT: retq
3012 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16>
3013 ret <16 x i16> %shuffle
3016 define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) {
3017 ; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
3019 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
3020 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
3021 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
3022 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3025 ; AVX2OR512VL-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
3026 ; AVX2OR512VL: # %bb.0:
3027 ; AVX2OR512VL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3028 ; AVX2OR512VL-NEXT: retq
3030 ; XOPAVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
3032 ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
3033 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
3034 ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
3035 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3036 ; XOPAVX1-NEXT: retq
3038 ; XOPAVX2-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
3040 ; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3041 ; XOPAVX2-NEXT: retq
3042 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0>
3043 ret <16 x i16> %shuffle
3046 define <16 x i16> @shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz(<16 x i16> %a) {
3047 ; AVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
3049 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
3050 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3051 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3052 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3055 ; AVX2OR512VL-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
3056 ; AVX2OR512VL: # %bb.0:
3057 ; AVX2OR512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3058 ; AVX2OR512VL-NEXT: retq
3060 ; XOPAVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
3062 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
3063 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3064 ; XOPAVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3065 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3066 ; XOPAVX1-NEXT: retq
3068 ; XOPAVX2-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
3070 ; XOPAVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3071 ; XOPAVX2-NEXT: retq
3072 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0>
3073 ret <16 x i16> %shuffle
3076 define <16 x i16> @shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz(<16 x i16> %a) {
3077 ; AVX1-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
3079 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3080 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
3081 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
3082 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
3083 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
3084 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3087 ; AVX2-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
3089 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3090 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[10,11],zero,zero,zero,zero,zero,zero,ymm0[28,29],zero,zero,zero,zero,zero,zero,ymm0[30,31],zero,zero,zero,zero,zero,zero
3093 ; AVX512VL-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
3094 ; AVX512VL: # %bb.0:
3095 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [28,1,2,3,29,5,6,7,30,9,10,11,31,13,14,15]
3096 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3097 ; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
3098 ; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
3099 ; AVX512VL-NEXT: retq
3101 ; XOPAVX1-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
3103 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3104 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
3105 ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
3106 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
3107 ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
3108 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3109 ; XOPAVX1-NEXT: retq
3111 ; XOPAVX2-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
3113 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3114 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[10,11],zero,zero,zero,zero,zero,zero,ymm0[28,29],zero,zero,zero,zero,zero,zero,ymm0[30,31],zero,zero,zero,zero,zero,zero
3115 ; XOPAVX2-NEXT: retq
3116 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 28, i32 0, i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 30, i32 0, i32 0, i32 0, i32 31, i32 0, i32 0, i32 0>
3117 ret <16 x i16> %shuffle
3120 define <16 x i16> @shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14(<16 x i16> %a, <16 x i16> %b) {
3121 ; AVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
3123 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3124 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3125 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3126 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3127 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3130 ; AVX2OR512VL-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
3131 ; AVX2OR512VL: # %bb.0:
3132 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
3133 ; AVX2OR512VL-NEXT: retq
3135 ; XOPAVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
3137 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3138 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3139 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3140 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3141 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3142 ; XOPAVX1-NEXT: retq
3144 ; XOPAVX2-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
3146 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
3147 ; XOPAVX2-NEXT: retq
3148 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3149 ret <16 x i16> %shuffle
3152 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24(<16 x i16> %a, <16 x i16> %b) {
3153 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
3155 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3156 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3157 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
3158 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
3159 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3162 ; AVX2OR512VL-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
3163 ; AVX2OR512VL: # %bb.0:
3164 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
3165 ; AVX2OR512VL-NEXT: retq
3167 ; XOPAVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
3169 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3170 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3171 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
3172 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
3173 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3174 ; XOPAVX1-NEXT: retq
3176 ; XOPAVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
3178 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
3179 ; XOPAVX2-NEXT: retq
3180 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24>
3181 ret <16 x i16> %shuffle
3184 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8(<16 x i16> %a, <16 x i16> %b) {
3185 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
3187 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3188 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3189 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
3190 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
3191 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3194 ; AVX2OR512VL-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
3195 ; AVX2OR512VL: # %bb.0:
3196 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
3197 ; AVX2OR512VL-NEXT: retq
3199 ; XOPAVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
3201 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3202 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3203 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
3204 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
3205 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3206 ; XOPAVX1-NEXT: retq
3208 ; XOPAVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
3210 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
3211 ; XOPAVX2-NEXT: retq
3212 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 00, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8>
3213 ret <16 x i16> %shuffle
3216 define <16 x i16> @shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30(<16 x i16> %a, <16 x i16> %b) {
3217 ; AVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
3219 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3220 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3221 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3222 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3223 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3226 ; AVX2OR512VL-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
3227 ; AVX2OR512VL: # %bb.0:
3228 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
3229 ; AVX2OR512VL-NEXT: retq
3231 ; XOPAVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
3233 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3234 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3235 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3236 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3237 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3238 ; XOPAVX1-NEXT: retq
3240 ; XOPAVX2-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
3242 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
3243 ; XOPAVX2-NEXT: retq
3244 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3245 ret <16 x i16> %shuffle
3248 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16(<16 x i16> %a, <16 x i16> %b) {
3249 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
3251 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
3252 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
3253 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3256 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
3258 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3259 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17]
3262 ; AVX512VL-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
3263 ; AVX512VL: # %bb.0:
3264 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,3,4,5,6,7,0,17,18,19,20,21,22,23,16]
3265 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
3266 ; AVX512VL-NEXT: retq
3268 ; XOPAVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
3270 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
3271 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
3272 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3273 ; XOPAVX1-NEXT: retq
3275 ; XOPAVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
3277 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3278 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17]
3279 ; XOPAVX2-NEXT: retq
3280 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16>
3281 ret <16 x i16> %shuffle
3284 define <16 x i16> @shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22(<16 x i16> %a, <16 x i16> %b) {
3285 ; AVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
3287 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3288 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3289 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3292 ; AVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
3294 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3295 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
3298 ; AVX512VL-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
3299 ; AVX512VL: # %bb.0:
3300 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [7,0,1,2,3,4,5,6,23,16,17,18,19,20,21,22]
3301 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
3302 ; AVX512VL-NEXT: retq
3304 ; XOPAVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
3306 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3307 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3308 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3309 ; XOPAVX1-NEXT: retq
3311 ; XOPAVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
3313 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3314 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
3315 ; XOPAVX2-NEXT: retq
3316 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
3317 ret <16 x i16> %shuffle
3320 define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11(<16 x i16> %a, <16 x i16> %b) {
3321 ; AVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
3323 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3324 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3325 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
3326 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
3327 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
3328 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3331 ; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
3333 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3334 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,0,1,2,3,4,5,6,7,4,5,14,15,16,17,18,19,16,17,18,19,20,21,22,23,20,21,22,23]
3337 ; AVX512VL-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
3338 ; AVX512VL: # %bb.0:
3339 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,0,1,2,3,2,11,8,9,8,9,10,11,10,11]
3340 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3341 ; AVX512VL-NEXT: retq
3343 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
3345 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3346 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,2,3,0,1,2,3,4,5,6,7,4,5],xmm1[6,7]
3347 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
3348 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3349 ; XOPAVX1-NEXT: retq
3351 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
3353 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3354 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,0,1,2,3,4,5,6,7,4,5,14,15,16,17,18,19,16,17,18,19,20,21,22,23,20,21,22,23]
3355 ; XOPAVX2-NEXT: retq
3356 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11>
3357 ret <16 x i16> %shuffle
3360 define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09(<16 x i16> %a, <16 x i16> %b) {
3361 ; AVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
3363 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3364 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
3365 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3366 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
3369 ; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
3371 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3372 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7,8],ymm1[9],ymm0[10,11,12,13,14,15]
3373 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
3376 ; AVX512VL-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
3377 ; AVX512VL: # %bb.0:
3378 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [6,7,4,5,2,3,0,9,14,15,12,13,10,11,8,9]
3379 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3380 ; AVX512VL-NEXT: retq
3382 ; XOPAVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
3384 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3385 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
3386 ; XOPAVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3387 ; XOPAVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
3388 ; XOPAVX1-NEXT: retq
3390 ; XOPAVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
3392 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3393 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7,8],ymm1[9],ymm0[10,11,12,13,14,15]
3394 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
3395 ; XOPAVX2-NEXT: retq
3396 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 9, i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9>
3397 ret <16 x i16> %shuffle
3400 define <16 x i16> @shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27(<16 x i16> %a, <16 x i16> %b) {
3401 ; AVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
3403 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3404 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3405 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
3406 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3407 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,14,15]
3408 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3409 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3412 ; AVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
3414 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
3415 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
3416 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
3417 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
3420 ; AVX512VL-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
3421 ; AVX512VL: # %bb.0:
3422 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,5,6,7,16,17,18,27,12,13,14,15,24,25,26,27]
3423 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
3424 ; AVX512VL-NEXT: retq
3426 ; XOPAVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
3428 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3429 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3430 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
3431 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3432 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,4,5,8,9,14,15]
3433 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3434 ; XOPAVX1-NEXT: retq
3436 ; XOPAVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
3438 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
3439 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
3440 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
3441 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
3442 ; XOPAVX2-NEXT: retq
3443 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 27, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27>
3444 ret <16 x i16> %shuffle
3447 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
3448 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
3450 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3451 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3452 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,1,4,5,6,7]
3453 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
3454 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
3455 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
3456 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3459 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
3461 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3462 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
3465 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
3466 ; AVX512VL: # %bb.0:
3467 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,8]
3468 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3469 ; AVX512VL-NEXT: retq
3471 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
3473 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3474 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1],xmm1[0,1]
3475 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
3476 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
3477 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3478 ; XOPAVX1-NEXT: retq
3480 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
3482 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3483 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
3484 ; XOPAVX2-NEXT: retq
3485 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
3486 ret <16 x i16> %shuffle
3489 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
3490 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3492 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3493 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
3494 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3495 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
3496 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
3497 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
3498 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
3499 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3502 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3503 ; AVX2-SLOW: # %bb.0:
3504 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3505 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm1, %ymm1
3506 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
3507 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
3508 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
3509 ; AVX2-SLOW-NEXT: retq
3511 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3512 ; AVX2-FAST-ALL: # %bb.0:
3513 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u>
3514 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
3515 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,4,5,4,5,4,5,8,9,16,17,16,17,16,17,16,17,20,21,20,21,20,21,20,21]
3516 ; AVX2-FAST-ALL-NEXT: retq
3518 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3519 ; AVX2-FAST-PERLANE: # %bb.0:
3520 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,u,u,16,17,16,17,16,17,16,17,24,25,24,25,24,25,u,u]
3521 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3522 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
3523 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3524 ; AVX2-FAST-PERLANE-NEXT: retq
3526 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3527 ; AVX512VL: # %bb.0:
3528 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,4,4,4,12,8,8,8,8,12,12,12,12]
3529 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3530 ; AVX512VL-NEXT: retq
3532 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3534 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3535 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9],xmm1[8,9]
3536 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
3537 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
3538 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3539 ; XOPAVX1-NEXT: retq
3541 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3543 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3544 ; XOPAVX2-NEXT: vpsllq $48, %ymm1, %ymm1
3545 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
3546 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
3547 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
3548 ; XOPAVX2-NEXT: retq
3549 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
3550 ret <16 x i16> %shuffle
3553 define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11(<16 x i16> %a, <16 x i16> %b) {
3554 ; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
3556 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3557 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0,0,1,1,2,2,3,3]
3558 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3559 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
3560 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
3561 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3564 ; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
3566 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3567 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
3570 ; AVX512VL-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
3571 ; AVX512VL: # %bb.0:
3572 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <u,0,u,1,u,2,u,11,u,8,u,9,u,10,u,11>
3573 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3574 ; AVX512VL-NEXT: retq
3576 ; XOPAVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
3578 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3579 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0,0,1,1,2,2,3,3]
3580 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,0,1,2,3,2,3,4,5,4,5,6,7],xmm1[6,7]
3581 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3582 ; XOPAVX1-NEXT: retq
3584 ; XOPAVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
3586 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3587 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
3588 ; XOPAVX2-NEXT: retq
3589 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11>
3590 ret <16 x i16> %shuffle
3593 define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15(<16 x i16> %a, <16 x i16> %b) {
3594 ; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
3596 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3597 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4,4,5,5,6,6,7,7]
3598 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3599 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
3600 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
3601 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3604 ; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
3606 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3]
3607 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
3610 ; AVX512VL-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
3611 ; AVX512VL: # %bb.0:
3612 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <u,4,u,5,u,6,u,15,u,12,u,13,u,14,u,15>
3613 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3614 ; AVX512VL-NEXT: retq
3616 ; XOPAVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
3618 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3619 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4,4,5,5,6,6,7,7]
3620 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,8,9,10,11,10,11,12,13,12,13,14,15],xmm1[14,15]
3621 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3622 ; XOPAVX1-NEXT: retq
3624 ; XOPAVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
3626 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3]
3627 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
3628 ; XOPAVX2-NEXT: retq
3629 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15>
3630 ret <16 x i16> %shuffle
3633 define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
3634 ; AVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
3636 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3637 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
3638 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
3639 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
3640 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3641 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
3644 ; AVX2-SLOW-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
3645 ; AVX2-SLOW: # %bb.0:
3646 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3647 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3648 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,1,2,0,4,5,6,7,11,9,10,8,12,13,14,15]
3649 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
3650 ; AVX2-SLOW-NEXT: retq
3652 ; AVX2-FAST-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
3653 ; AVX2-FAST: # %bb.0:
3654 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3655 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3656 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11,22,23,18,19,20,21,16,17,28,29,30,31,24,25,26,27]
3657 ; AVX2-FAST-NEXT: retq
3659 ; AVX512VL-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
3660 ; AVX512VL: # %bb.0:
3661 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,1,2,0,6,7,4,13,11,9,10,8,14,15,12,13]
3662 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3663 ; AVX512VL-NEXT: retq
3665 ; XOPAVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
3667 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3668 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9],xmm1[10,11]
3669 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
3670 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
3671 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3672 ; XOPAVX1-NEXT: retq
3674 ; XOPAVX2-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
3676 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3677 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3678 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,1,2,0,4,5,6,7,11,9,10,8,12,13,14,15]
3679 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
3680 ; XOPAVX2-NEXT: retq
3681 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 13, i32 11, i32 9, i32 10, i32 8, i32 14, i32 15, i32 12, i32 13>
3682 ret <16 x i16> %shuffle
3685 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
3686 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3688 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3689 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
3690 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u]
3691 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
3692 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
3693 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3696 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3697 ; AVX2-SLOW: # %bb.0:
3698 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u,24,25,24,25,24,25,24,25,16,17,16,17,16,17,u,u]
3699 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3700 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3701 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3702 ; AVX2-SLOW-NEXT: retq
3704 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3705 ; AVX2-FAST-ALL: # %bb.0:
3706 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,4,u,6,4,u,u>
3707 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
3708 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,4,5,4,5,4,5,8,9,16,17,16,17,16,17,16,17,20,21,20,21,20,21,20,21]
3709 ; AVX2-FAST-ALL-NEXT: retq
3711 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3712 ; AVX2-FAST-PERLANE: # %bb.0:
3713 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u,24,25,24,25,24,25,24,25,16,17,16,17,16,17,u,u]
3714 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3715 ; AVX2-FAST-PERLANE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3716 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3717 ; AVX2-FAST-PERLANE-NEXT: retq
3719 ; AVX512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3720 ; AVX512VL: # %bb.0:
3721 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,0,0,0,8,12,12,12,12,8,8,8,8]
3722 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3723 ; AVX512VL-NEXT: retq
3725 ; XOPAVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3727 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3728 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1],xmm1[0,1]
3729 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
3730 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3731 ; XOPAVX1-NEXT: retq
3733 ; XOPAVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3735 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u,24,25,24,25,24,25,24,25,16,17,16,17,16,17,u,u]
3736 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3737 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3738 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3739 ; XOPAVX2-NEXT: retq
3740 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 8, i32 12, i32 12, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8>
3741 ret <16 x i16> %shuffle
3744 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
3745 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
3747 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3748 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
3749 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3750 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
3753 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
3755 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3756 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3757 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
3760 ; AVX512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
3761 ; AVX512VL: # %bb.0:
3762 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,3,0,1,6,7,4,13,10,11,8,9,14,15,12,13]
3763 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3764 ; AVX512VL-NEXT: retq
3766 ; XOPAVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
3768 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3769 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
3770 ; XOPAVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3771 ; XOPAVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
3772 ; XOPAVX1-NEXT: retq
3774 ; XOPAVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
3776 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3777 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3778 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
3779 ; XOPAVX2-NEXT: retq
3780 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
3781 ret <16 x i16> %shuffle
3784 define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
3785 ; AVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
3787 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3788 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
3789 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
3790 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
3791 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3792 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
3795 ; AVX2-SLOW-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
3796 ; AVX2-SLOW: # %bb.0:
3797 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3798 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3799 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[2,3,0,2,4,5,6,7,10,11,8,10,12,13,14,15]
3800 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
3801 ; AVX2-SLOW-NEXT: retq
3803 ; AVX2-FAST-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
3804 ; AVX2-FAST: # %bb.0:
3805 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3806 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3807 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11,20,21,22,23,16,17,20,21,28,29,30,31,24,25,26,27]
3808 ; AVX2-FAST-NEXT: retq
3810 ; AVX512VL-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
3811 ; AVX512VL: # %bb.0:
3812 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,3,0,2,6,7,4,13,10,11,8,10,14,15,12,13]
3813 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3814 ; AVX512VL-NEXT: retq
3816 ; XOPAVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
3818 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3819 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9],xmm1[10,11]
3820 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
3821 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
3822 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3823 ; XOPAVX1-NEXT: retq
3825 ; XOPAVX2-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
3827 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3828 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3829 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[2,3,0,2,4,5,6,7,10,11,8,10,12,13,14,15]
3830 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
3831 ; XOPAVX2-NEXT: retq
3832 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 10, i32 14, i32 15, i32 12, i32 13>
3833 ret <16 x i16> %shuffle
3836 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15(<16 x i16> %a, <16 x i16> %b) {
3837 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
3839 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3840 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
3841 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
3842 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
3843 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
3844 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3847 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
3849 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3850 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
3851 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
3854 ; AVX512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
3855 ; AVX512VL: # %bb.0:
3856 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,3,0,1,6,7,4,15,10,11,8,9,14,15,12,15]
3857 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3858 ; AVX512VL-NEXT: retq
3860 ; XOPAVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
3862 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3863 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
3864 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
3865 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
3866 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
3867 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3868 ; XOPAVX1-NEXT: retq
3870 ; XOPAVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
3872 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3873 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
3874 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
3875 ; XOPAVX2-NEXT: retq
3876 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 15, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 15>
3877 ret <16 x i16> %shuffle
3880 define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08(<16 x i16> %a, <16 x i16> %b) {
3881 ; AVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
3883 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3884 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
3885 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
3886 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
3887 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3888 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
3891 ; AVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
3893 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3894 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
3895 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1,30,31,26,27,28,29,24,25,22,23,18,19,20,21,16,17]
3898 ; AVX512VL-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
3899 ; AVX512VL: # %bb.0:
3900 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [7,5,6,4,3,1,2,8,15,13,14,12,11,9,10,8]
3901 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3902 ; AVX512VL-NEXT: retq
3904 ; XOPAVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
3906 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3907 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5],xmm1[0,1]
3908 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
3909 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3910 ; XOPAVX1-NEXT: retq
3912 ; XOPAVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
3914 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3915 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
3916 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1,30,31,26,27,28,29,24,25,22,23,18,19,20,21,16,17]
3917 ; XOPAVX2-NEXT: retq
3918 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 8, i32 15, i32 13, i32 14, i32 12, i32 11, i32 9, i32 10, i32 8>
3919 ret <16 x i16> %shuffle
3922 define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
3923 ; AVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3925 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3926 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
3927 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u]
3928 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
3929 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
3930 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3933 ; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3934 ; AVX2-SLOW: # %bb.0:
3935 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u,18,19,16,17,26,27,24,25,26,27,24,25,18,19,u,u]
3936 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3937 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3938 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3939 ; AVX2-SLOW-NEXT: retq
3941 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3942 ; AVX2-FAST-ALL: # %bb.0:
3943 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u>
3944 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
3945 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,6,7,4,5,2,3,8,9,18,19,16,17,22,23,20,21,22,23,20,21,18,19,16,17]
3946 ; AVX2-FAST-ALL-NEXT: retq
3948 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3949 ; AVX2-FAST-PERLANE: # %bb.0:
3950 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u,18,19,16,17,26,27,24,25,26,27,24,25,18,19,u,u]
3951 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3952 ; AVX2-FAST-PERLANE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3953 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3954 ; AVX2-FAST-PERLANE-NEXT: retq
3956 ; AVX512VL-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3957 ; AVX512VL: # %bb.0:
3958 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,5,4,5,4,1,8,9,8,13,12,13,12,9,8]
3959 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3960 ; AVX512VL-NEXT: retq
3962 ; XOPAVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3964 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3965 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3],xmm1[0,1]
3966 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
3967 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3968 ; XOPAVX1-NEXT: retq
3970 ; XOPAVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3972 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u,18,19,16,17,26,27,24,25,26,27,24,25,18,19,u,u]
3973 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3974 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3975 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3976 ; XOPAVX2-NEXT: retq
3977 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 8, i32 9, i32 8, i32 13, i32 12, i32 13, i32 12, i32 9, i32 8>
3978 ret <16 x i16> %shuffle
3981 define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
3982 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
3984 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3985 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
3986 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u]
3987 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
3988 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
3989 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3992 ; AVX2-SLOW-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
3993 ; AVX2-SLOW: # %bb.0:
3994 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u,26,27,24,25,18,19,16,17,26,27,24,25,18,19,u,u]
3995 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3996 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3997 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3998 ; AVX2-SLOW-NEXT: retq
4000 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
4001 ; AVX2-FAST-ALL: # %bb.0:
4002 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,4,u,6,4,u,u>
4003 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4004 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,2,3,0,1,6,7,8,9,18,19,16,17,22,23,20,21,18,19,16,17,22,23,20,21]
4005 ; AVX2-FAST-ALL-NEXT: retq
4007 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
4008 ; AVX2-FAST-PERLANE: # %bb.0:
4009 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u,26,27,24,25,18,19,16,17,26,27,24,25,18,19,u,u]
4010 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4011 ; AVX2-FAST-PERLANE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4012 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4013 ; AVX2-FAST-PERLANE-NEXT: retq
4015 ; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
4016 ; AVX512VL: # %bb.0:
4017 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [5,4,1,0,5,4,1,8,13,12,9,8,13,12,9,8]
4018 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4019 ; AVX512VL-NEXT: retq
4021 ; XOPAVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
4023 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4024 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3],xmm1[0,1]
4025 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
4026 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4027 ; XOPAVX1-NEXT: retq
4029 ; XOPAVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
4031 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u,26,27,24,25,18,19,16,17,26,27,24,25,18,19,u,u]
4032 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4033 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4034 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4035 ; XOPAVX2-NEXT: retq
4036 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 8, i32 13, i32 12, i32 9, i32 8, i32 13, i32 12, i32 9, i32 8>
4037 ret <16 x i16> %shuffle
4040 define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12(<16 x i16> %a, <16 x i16> %b) {
4041 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4043 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4044 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4045 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u]
4046 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4047 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
4048 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4051 ; AVX2-SLOW-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4052 ; AVX2-SLOW: # %bb.0:
4053 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u,26,27,24,25,18,19,16,17,18,19,16,17,26,27,u,u]
4054 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4055 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4056 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4057 ; AVX2-SLOW-NEXT: retq
4059 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4060 ; AVX2-FAST-ALL: # %bb.0:
4061 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
4062 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4063 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,6,7,4,5,2,3,8,9,18,19,16,17,22,23,20,21,22,23,20,21,18,19,16,17]
4064 ; AVX2-FAST-ALL-NEXT: retq
4066 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4067 ; AVX2-FAST-PERLANE: # %bb.0:
4068 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u,26,27,24,25,18,19,16,17,18,19,16,17,26,27,u,u]
4069 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4070 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4071 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4072 ; AVX2-FAST-PERLANE-NEXT: retq
4074 ; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4075 ; AVX512VL: # %bb.0:
4076 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [5,4,1,0,1,0,5,12,13,12,9,8,9,8,13,12]
4077 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4078 ; AVX512VL-NEXT: retq
4080 ; XOPAVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4082 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4083 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11],xmm1[8,9]
4084 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
4085 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4086 ; XOPAVX1-NEXT: retq
4088 ; XOPAVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4090 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u,26,27,24,25,18,19,16,17,18,19,16,17,26,27,u,u]
4091 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4092 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4093 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4094 ; XOPAVX2-NEXT: retq
4095 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 12, i32 13, i32 12, i32 9, i32 8, i32 9, i32 8, i32 13, i32 12>
4096 ret <16 x i16> %shuffle
4099 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08(<16 x i16> %a, <16 x i16> %b) {
4100 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4102 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4103 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
4104 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u]
4105 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4106 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
4107 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4110 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4111 ; AVX2-SLOW: # %bb.0:
4112 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u,16,17,24,25,24,25,16,17,16,17,24,25,24,25,u,u]
4113 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4114 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4115 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4116 ; AVX2-SLOW-NEXT: retq
4118 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4119 ; AVX2-FAST-ALL: # %bb.0:
4120 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u>
4121 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4122 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,4,5,4,5,8,9,16,17,20,21,20,21,16,17,16,17,20,21,20,21,16,17]
4123 ; AVX2-FAST-ALL-NEXT: retq
4125 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4126 ; AVX2-FAST-PERLANE: # %bb.0:
4127 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u,16,17,24,25,24,25,16,17,16,17,24,25,24,25,u,u]
4128 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4129 ; AVX2-FAST-PERLANE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4130 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4131 ; AVX2-FAST-PERLANE-NEXT: retq
4133 ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4134 ; AVX512VL: # %bb.0:
4135 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,4,0,0,4,4,8,8,12,12,8,8,12,12,8]
4136 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4137 ; AVX512VL-NEXT: retq
4139 ; XOPAVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4141 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4142 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9],xmm1[0,1]
4143 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
4144 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4145 ; XOPAVX1-NEXT: retq
4147 ; XOPAVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4149 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u,16,17,24,25,24,25,16,17,16,17,24,25,24,25,u,u]
4150 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4151 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4152 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4153 ; XOPAVX2-NEXT: retq
4154 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8>
4155 ret <16 x i16> %shuffle
4158 define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12(<16 x i16> %a, <16 x i16> %b) {
4159 ; AVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4161 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4162 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4163 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u]
4164 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4165 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
4166 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4169 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4170 ; AVX2-SLOW: # %bb.0:
4171 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u,24,25,16,17,16,17,24,25,24,25,16,17,16,17,u,u]
4172 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4173 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4174 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4175 ; AVX2-SLOW-NEXT: retq
4177 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4178 ; AVX2-FAST-ALL: # %bb.0:
4179 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
4180 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4181 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,4,5,4,5,8,9,16,17,20,21,20,21,16,17,16,17,20,21,20,21,16,17]
4182 ; AVX2-FAST-ALL-NEXT: retq
4184 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4185 ; AVX2-FAST-PERLANE: # %bb.0:
4186 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u,24,25,16,17,16,17,24,25,24,25,16,17,16,17,u,u]
4187 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4188 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4189 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4190 ; AVX2-FAST-PERLANE-NEXT: retq
4192 ; AVX512VL-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4193 ; AVX512VL: # %bb.0:
4194 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,0,0,4,4,0,0,12,12,8,8,12,12,8,8,12]
4195 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4196 ; AVX512VL-NEXT: retq
4198 ; XOPAVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4200 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4201 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1],xmm1[8,9]
4202 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
4203 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4204 ; XOPAVX1-NEXT: retq
4206 ; XOPAVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4208 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u,24,25,16,17,16,17,24,25,24,25,16,17,16,17,u,u]
4209 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4210 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4211 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4212 ; XOPAVX2-NEXT: retq
4213 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12>
4214 ret <16 x i16> %shuffle
4217 define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
4218 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
4220 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4221 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
4222 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
4223 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
4224 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4225 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
4228 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
4230 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4231 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
4232 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7,20,21,28,29,24,25,16,17,26,27,18,19,30,31,22,23]
4235 ; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
4236 ; AVX512VL: # %bb.0:
4237 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,6,4,0,5,1,7,11,10,14,12,8,13,9,15,11]
4238 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4239 ; AVX512VL-NEXT: retq
4241 ; XOPAVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
4243 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4244 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15],xmm1[6,7]
4245 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
4246 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4247 ; XOPAVX1-NEXT: retq
4249 ; XOPAVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
4251 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4252 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
4253 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7,20,21,28,29,24,25,16,17,26,27,18,19,30,31,22,23]
4254 ; XOPAVX2-NEXT: retq
4255 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 11, i32 10, i32 14, i32 12, i32 8, i32 13, i32 9, i32 15, i32 11>
4256 ret <16 x i16> %shuffle
4259 define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
4260 ; AVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
4262 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4263 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
4264 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
4265 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
4266 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4267 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
4270 ; AVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
4272 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4273 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
4274 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7,20,21,16,17,28,29,24,25,26,27,18,19,30,31,22,23]
4277 ; AVX512VL-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
4278 ; AVX512VL: # %bb.0:
4279 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,0,6,4,5,1,7,11,10,8,14,12,13,9,15,11]
4280 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4281 ; AVX512VL-NEXT: retq
4283 ; XOPAVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
4285 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4286 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15],xmm1[6,7]
4287 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
4288 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4289 ; XOPAVX1-NEXT: retq
4291 ; XOPAVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
4293 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4294 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
4295 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7,20,21,16,17,28,29,24,25,26,27,18,19,30,31,22,23]
4296 ; XOPAVX2-NEXT: retq
4297 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 11, i32 10, i32 8, i32 14, i32 12, i32 13, i32 9, i32 15, i32 11>
4298 ret <16 x i16> %shuffle
4301 define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13(<16 x i16> %a, <16 x i16> %b) {
4302 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
4304 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4305 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
4306 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
4307 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
4308 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4309 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
4312 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
4314 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4315 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
4316 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11,20,21,28,29,24,25,16,17,18,19,22,23,30,31,26,27]
4319 ; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
4320 ; AVX512VL: # %bb.0:
4321 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,6,4,0,1,3,7,13,10,14,12,8,9,11,15,13]
4322 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4323 ; AVX512VL-NEXT: retq
4325 ; XOPAVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
4327 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4328 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15],xmm1[10,11]
4329 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
4330 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4331 ; XOPAVX1-NEXT: retq
4333 ; XOPAVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
4335 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4336 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
4337 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11,20,21,28,29,24,25,16,17,18,19,22,23,30,31,26,27]
4338 ; XOPAVX2-NEXT: retq
4339 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 13, i32 10, i32 14, i32 12, i32 8, i32 9, i32 11, i32 15, i32 13>
4340 ret <16 x i16> %shuffle
4343 define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11(<16 x i16> %a, <16 x i16> %b) {
4344 ; AVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4346 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4347 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
4348 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
4349 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
4350 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4351 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
4354 ; AVX2-SLOW-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4355 ; AVX2-SLOW: # %bb.0:
4356 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7]
4357 ; AVX2-SLOW-NEXT: vpermd %ymm0, %ymm1, %ymm0
4358 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23]
4359 ; AVX2-SLOW-NEXT: retq
4361 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4362 ; AVX2-FAST-ALL: # %bb.0:
4363 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,0,5,7,6,4,5]
4364 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4365 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,2,3,6,7,10,11,0,1,4,5,14,15,16,17,16,17,18,19,22,23,26,27,16,17,20,21,30,31]
4366 ; AVX2-FAST-ALL-NEXT: retq
4368 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4369 ; AVX2-FAST-PERLANE: # %bb.0:
4370 ; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7]
4371 ; AVX2-FAST-PERLANE-NEXT: vpermd %ymm0, %ymm1, %ymm0
4372 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23]
4373 ; AVX2-FAST-PERLANE-NEXT: retq
4375 ; AVX512VL-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4376 ; AVX512VL: # %bb.0:
4377 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,7,5,1,6,4,11,14,14,15,13,9,14,12,11]
4378 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4379 ; AVX512VL-NEXT: retq
4381 ; XOPAVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4383 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4384 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9],xmm1[6,7]
4385 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
4386 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4387 ; XOPAVX1-NEXT: retq
4389 ; XOPAVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4391 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7]
4392 ; XOPAVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
4393 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23]
4394 ; XOPAVX2-NEXT: retq
4395 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 11, i32 14, i32 14, i32 15, i32 13, i32 9, i32 14, i32 12, i32 11>
4396 ret <16 x i16> %shuffle
4399 define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
4400 ; AVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4402 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4403 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4404 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u]
4405 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4406 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
4407 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4410 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4411 ; AVX2-SLOW: # %bb.0:
4412 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u,16,17,16,17,24,25,24,25,24,25,24,25,24,25,u,u]
4413 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4414 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4415 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4416 ; AVX2-SLOW-NEXT: retq
4418 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4419 ; AVX2-FAST-ALL: # %bb.0:
4420 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u>
4421 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4422 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,4,5,4,5,4,5,8,9,16,17,16,17,20,21,20,21,20,21,20,21,20,21,20,21]
4423 ; AVX2-FAST-ALL-NEXT: retq
4425 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4426 ; AVX2-FAST-PERLANE: # %bb.0:
4427 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u,16,17,16,17,24,25,24,25,24,25,24,25,24,25,u,u]
4428 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4429 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4430 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4431 ; AVX2-FAST-PERLANE-NEXT: retq
4433 ; AVX512VL-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4434 ; AVX512VL: # %bb.0:
4435 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,4,4,4,4,4,12,8,8,12,12,12,12,12,12]
4436 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4437 ; AVX512VL-NEXT: retq
4439 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4441 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4442 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9],xmm1[8,9]
4443 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
4444 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4445 ; XOPAVX1-NEXT: retq
4447 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4449 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u,16,17,16,17,24,25,24,25,24,25,24,25,24,25,u,u]
4450 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4451 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4452 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4453 ; XOPAVX2-NEXT: retq
4454 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
4455 ret <16 x i16> %shuffle
4458 define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
4459 ; AVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4461 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4462 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4463 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u]
4464 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4465 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
4466 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4469 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4470 ; AVX2-SLOW: # %bb.0:
4471 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u,24,25,24,25,16,17,16,17,24,25,24,25,24,25,u,u]
4472 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4473 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4474 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4475 ; AVX2-SLOW-NEXT: retq
4477 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4478 ; AVX2-FAST-ALL: # %bb.0:
4479 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
4480 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4481 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,0,1,0,1,0,1,8,9,16,17,16,17,20,21,20,21,16,17,16,17,16,17,16,17]
4482 ; AVX2-FAST-ALL-NEXT: retq
4484 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4485 ; AVX2-FAST-PERLANE: # %bb.0:
4486 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u,24,25,24,25,16,17,16,17,24,25,24,25,24,25,u,u]
4487 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4488 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4489 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4490 ; AVX2-FAST-PERLANE-NEXT: retq
4492 ; AVX512VL-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4493 ; AVX512VL: # %bb.0:
4494 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,0,0,4,4,4,12,12,12,8,8,12,12,12,12]
4495 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4496 ; AVX512VL-NEXT: retq
4498 ; XOPAVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4500 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4501 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9],xmm1[8,9]
4502 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
4503 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4504 ; XOPAVX1-NEXT: retq
4506 ; XOPAVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4508 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u,24,25,24,25,16,17,16,17,24,25,24,25,24,25,u,u]
4509 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4510 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4511 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4512 ; XOPAVX2-NEXT: retq
4513 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
4514 ret <16 x i16> %shuffle
4517 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
4518 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4520 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4521 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4522 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u]
4523 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4524 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
4525 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4528 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4529 ; AVX2-SLOW: # %bb.0:
4530 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4531 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4532 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4533 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4534 ; AVX2-SLOW-NEXT: retq
4536 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4537 ; AVX2-FAST-ALL: # %bb.0:
4538 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u>
4539 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4540 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,4,5,4,5,4,5,8,9,16,17,20,21,20,21,16,17,20,21,20,21,20,21,20,21]
4541 ; AVX2-FAST-ALL-NEXT: retq
4543 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4544 ; AVX2-FAST-PERLANE: # %bb.0:
4545 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4546 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4547 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4548 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4549 ; AVX2-FAST-PERLANE-NEXT: retq
4551 ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4552 ; AVX512VL: # %bb.0:
4553 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,4,0,4,4,4,12,8,12,12,8,12,12,12,12]
4554 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4555 ; AVX512VL-NEXT: retq
4557 ; XOPAVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4559 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4560 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9],xmm1[8,9]
4561 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
4562 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4563 ; XOPAVX1-NEXT: retq
4565 ; XOPAVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4567 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4568 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4569 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4570 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4571 ; XOPAVX2-NEXT: retq
4572 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
4573 ret <16 x i16> %shuffle
4576 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
4577 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4579 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4580 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
4581 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u]
4582 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4583 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
4584 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4587 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4588 ; AVX2-SLOW: # %bb.0:
4589 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u,16,17,24,25,24,25,16,17,16,17,16,17,16,17,u,u]
4590 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4591 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4592 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4593 ; AVX2-SLOW-NEXT: retq
4595 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4596 ; AVX2-FAST-ALL: # %bb.0:
4597 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u>
4598 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4599 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,0,1,0,1,8,9,16,17,20,21,20,21,16,17,16,17,16,17,16,17,16,17]
4600 ; AVX2-FAST-ALL-NEXT: retq
4602 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4603 ; AVX2-FAST-PERLANE: # %bb.0:
4604 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u,16,17,24,25,24,25,16,17,16,17,16,17,16,17,u,u]
4605 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4606 ; AVX2-FAST-PERLANE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4607 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4608 ; AVX2-FAST-PERLANE-NEXT: retq
4610 ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4611 ; AVX512VL: # %bb.0:
4612 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,4,0,0,0,0,8,8,12,12,8,8,8,8,8]
4613 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4614 ; AVX512VL-NEXT: retq
4616 ; XOPAVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4618 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4619 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1],xmm1[0,1]
4620 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
4621 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4622 ; XOPAVX1-NEXT: retq
4624 ; XOPAVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4626 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u,16,17,24,25,24,25,16,17,16,17,16,17,16,17,u,u]
4627 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4628 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4629 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4630 ; XOPAVX2-NEXT: retq
4631 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8, i32 8>
4632 ret <16 x i16> %shuffle
4635 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
4636 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4638 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4639 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
4640 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
4641 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
4642 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
4643 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
4644 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4647 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4648 ; AVX2-SLOW: # %bb.0:
4649 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4650 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
4651 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,2,2,0,4,5,6,7,8,10,10,8,12,13,14,15]
4652 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
4653 ; AVX2-SLOW-NEXT: retq
4655 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4656 ; AVX2-FAST-ALL: # %bb.0:
4657 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,3,7,4,6,7,u>
4658 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4659 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,4,5,6,7,8,9,14,15,16,17,20,21,20,21,16,17,20,21,22,23,24,25,26,27]
4660 ; AVX2-FAST-ALL-NEXT: retq
4662 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4663 ; AVX2-FAST-PERLANE: # %bb.0:
4664 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4665 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,u,u,16,17,24,25,24,25,16,17,24,25,26,27,28,29,u,u]
4666 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
4667 ; AVX2-FAST-PERLANE-NEXT: retq
4669 ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4670 ; AVX512VL: # %bb.0:
4671 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,4,0,4,5,6,15,8,12,12,8,12,13,14,15]
4672 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4673 ; AVX512VL-NEXT: retq
4675 ; XOPAVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4677 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4678 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13],xmm1[14,15]
4679 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
4680 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
4681 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4682 ; XOPAVX1-NEXT: retq
4684 ; XOPAVX2-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4686 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4687 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
4688 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,2,2,0,4,5,6,7,8,10,10,8,12,13,14,15]
4689 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
4690 ; XOPAVX2-NEXT: retq
4691 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 15, i32 8, i32 12, i32 12, i32 8, i32 12, i32 13, i32 14, i32 15>
4692 ret <16 x i16> %shuffle
4695 define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
4696 ; AVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4698 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4699 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4700 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u]
4701 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4702 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
4703 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4706 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4707 ; AVX2-SLOW: # %bb.0:
4708 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u,16,17,18,19,24,25,24,25,24,25,24,25,24,25,u,u]
4709 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4710 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4711 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4712 ; AVX2-SLOW-NEXT: retq
4714 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4715 ; AVX2-FAST-ALL: # %bb.0:
4716 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u>
4717 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4718 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,4,5,4,5,4,5,4,5,4,5,8,9,16,17,u,u,20,21,20,21,20,21,20,21,20,21,20,21]
4719 ; AVX2-FAST-ALL-NEXT: retq
4721 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4722 ; AVX2-FAST-PERLANE: # %bb.0:
4723 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u,16,17,18,19,24,25,24,25,24,25,24,25,24,25,u,u]
4724 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4725 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4726 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4727 ; AVX2-FAST-PERLANE-NEXT: retq
4729 ; AVX512VL-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4730 ; AVX512VL: # %bb.0:
4731 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,4,4,4,4,4,12,8,u,12,12,12,12,12,12>
4732 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4733 ; AVX512VL-NEXT: retq
4735 ; XOPAVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4737 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4738 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9],xmm1[8,9]
4739 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
4740 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4741 ; XOPAVX1-NEXT: retq
4743 ; XOPAVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4745 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u,16,17,18,19,24,25,24,25,24,25,24,25,24,25,u,u]
4746 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4747 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4748 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4749 ; XOPAVX2-NEXT: retq
4750 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
4751 ret <16 x i16> %shuffle
4754 define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
4755 ; AVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4757 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4758 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4759 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u]
4760 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4761 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
4762 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4765 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4766 ; AVX2-SLOW: # %bb.0:
4767 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u,24,25,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4768 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4769 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4770 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4771 ; AVX2-SLOW-NEXT: retq
4773 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4774 ; AVX2-FAST-ALL: # %bb.0:
4775 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
4776 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4777 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,u,u,4,5,0,1,0,1,0,1,8,9,16,17,16,17,u,u,20,21,16,17,16,17,16,17,16,17]
4778 ; AVX2-FAST-ALL-NEXT: retq
4780 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4781 ; AVX2-FAST-PERLANE: # %bb.0:
4782 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u,24,25,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4783 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4784 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4785 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4786 ; AVX2-FAST-PERLANE-NEXT: retq
4788 ; AVX512VL-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4789 ; AVX512VL: # %bb.0:
4790 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <4,4,u,0,4,4,4,12,12,12,u,8,12,12,12,12>
4791 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4792 ; AVX512VL-NEXT: retq
4794 ; XOPAVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4796 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4797 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9],xmm1[8,9]
4798 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
4799 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4800 ; XOPAVX1-NEXT: retq
4802 ; XOPAVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4804 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u,24,25,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4805 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4806 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4807 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4808 ; XOPAVX2-NEXT: retq
4809 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 undef, i32 8, i32 12, i32 12, i32 12, i32 12>
4810 ret <16 x i16> %shuffle
4813 define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
4814 ; AVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4816 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4817 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4818 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u]
4819 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4820 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
4821 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4824 ; AVX2-SLOW-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4825 ; AVX2-SLOW: # %bb.0:
4826 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4827 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4828 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4829 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4830 ; AVX2-SLOW-NEXT: retq
4832 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4833 ; AVX2-FAST-ALL: # %bb.0:
4834 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
4835 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4836 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,0,1,4,5,0,1,0,1,0,1,8,9,u,u,16,17,16,17,20,21,16,17,16,17,16,17,16,17]
4837 ; AVX2-FAST-ALL-NEXT: retq
4839 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4840 ; AVX2-FAST-PERLANE: # %bb.0:
4841 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4842 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4843 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4844 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4845 ; AVX2-FAST-PERLANE-NEXT: retq
4847 ; AVX512VL-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4848 ; AVX512VL: # %bb.0:
4849 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <u,4,4,0,4,4,4,12,u,12,12,8,12,12,12,12>
4850 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4851 ; AVX512VL-NEXT: retq
4853 ; XOPAVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4855 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4856 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9],xmm1[8,9]
4857 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
4858 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4859 ; XOPAVX1-NEXT: retq
4861 ; XOPAVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4863 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4864 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4865 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4866 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4867 ; XOPAVX2-NEXT: retq
4868 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 undef, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
4869 ret <16 x i16> %shuffle
4872 define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
4873 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
4875 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4876 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
4877 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4878 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4879 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4882 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
4883 ; AVX2OR512VL: # %bb.0:
4884 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
4885 ; AVX2OR512VL-NEXT: retq
4887 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
4889 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4890 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
4891 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4892 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4893 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4894 ; XOPAVX1-NEXT: retq
4896 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
4898 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
4899 ; XOPAVX2-NEXT: retq
4900 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
4901 ret <16 x i16> %shuffle
4904 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
4905 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
4907 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4908 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
4909 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4910 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
4911 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4914 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
4916 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,2]
4917 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,10,11,0,1,2,3,4,5,14,15,16,17,18,19,24,25,26,27,16,17,18,19,20,21,30,31]
4920 ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
4921 ; AVX512VL: # %bb.0:
4922 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,8,9,4,5,6,11,12,13,8,9,12,13,14,11]
4923 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4924 ; AVX512VL-NEXT: retq
4926 ; XOPAVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
4928 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4929 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
4930 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4931 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
4932 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4933 ; XOPAVX1-NEXT: retq
4935 ; XOPAVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
4937 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,2]
4938 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,10,11,0,1,2,3,4,5,14,15,16,17,18,19,24,25,26,27,16,17,18,19,20,21,30,31]
4939 ; XOPAVX2-NEXT: retq
4940 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11>
4941 ret <16 x i16> %shuffle
4944 define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
4945 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
4947 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4948 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
4949 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4950 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4951 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4954 ; AVX2OR512VL-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
4955 ; AVX2OR512VL: # %bb.0:
4956 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
4957 ; AVX2OR512VL-NEXT: retq
4959 ; XOPAVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
4961 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4962 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
4963 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4964 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4965 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4966 ; XOPAVX1-NEXT: retq
4968 ; XOPAVX2-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
4970 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
4971 ; XOPAVX2-NEXT: retq
4972 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
4973 ret <16 x i16> %shuffle
4976 define <16 x i16> @shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a) {
4977 ; AVX1-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
4979 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7]
4980 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
4981 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7]
4982 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
4985 ; AVX2OR512VL-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
4986 ; AVX2OR512VL: # %bb.0:
4987 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[8,9,u,u,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
4988 ; AVX2OR512VL-NEXT: retq
4990 ; XOPAVX1-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
4992 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7]
4993 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
4994 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7]
4995 ; XOPAVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
4996 ; XOPAVX1-NEXT: retq
4998 ; XOPAVX2-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
5000 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[8,9,u,u,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
5001 ; XOPAVX2-NEXT: retq
5002 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 2, i32 16, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5003 ret <16 x i16> %shuffle
5006 define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
5007 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
5009 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5010 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
5011 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
5012 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
5013 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5014 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5017 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
5019 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
5020 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
5021 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7,16,17,18,19,20,21,30,31,24,25,26,27,28,29,22,23]
5024 ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
5025 ; AVX512VL: # %bb.0:
5026 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,7,4,5,6,11,8,9,10,15,12,13,14,11]
5027 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
5028 ; AVX512VL-NEXT: retq
5030 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
5032 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5033 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13],xmm1[6,7]
5034 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
5035 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
5036 ; XOPAVX1-NEXT: retq
5038 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
5040 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
5041 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
5042 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7,16,17,18,19,20,21,30,31,24,25,26,27,28,29,22,23]
5043 ; XOPAVX2-NEXT: retq
5044 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 11>
5045 ret <16 x i16> %shuffle
5048 define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15(<16 x i16> %a, <16 x i16> %b) {
5049 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
5051 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5052 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,u,u]
5053 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5054 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
5055 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
5058 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
5060 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
5061 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,u,u,24,25,26,27,28,29,22,23,16,17,18,19,20,21,u,u]
5062 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5065 ; AVX512VL-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
5066 ; AVX512VL: # %bb.0:
5067 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,3,0,1,2,15,12,13,14,11,8,9,10,15]
5068 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
5069 ; AVX512VL-NEXT: retq
5071 ; XOPAVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
5073 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5074 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5],xmm1[14,15]
5075 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
5076 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
5077 ; XOPAVX1-NEXT: retq
5079 ; XOPAVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
5081 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
5082 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,u,u,24,25,26,27,28,29,22,23,16,17,18,19,20,21,u,u]
5083 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5084 ; XOPAVX2-NEXT: retq
5085 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 15, i32 12, i32 13, i32 14, i32 11, i32 8, i32 9, i32 10, i32 15>
5086 ret <16 x i16> %shuffle
5089 define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13(<16 x i16> %a, <16 x i16> %b) {
5090 ; AVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5092 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5093 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
5094 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
5095 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
5096 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5097 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5100 ; AVX2-SLOW-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5101 ; AVX2-SLOW: # %bb.0:
5102 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7]
5103 ; AVX2-SLOW-NEXT: vpermd %ymm0, %ymm1, %ymm0
5104 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27]
5105 ; AVX2-SLOW-NEXT: retq
5107 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5108 ; AVX2-FAST-ALL: # %bb.0:
5109 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,0,6,5,7,4,6]
5110 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
5111 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,6,7,10,11,8,9,0,1,6,7,2,3,14,15,18,19,22,23,26,27,24,25,16,17,22,23,18,19,30,31]
5112 ; AVX2-FAST-ALL-NEXT: retq
5114 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5115 ; AVX2-FAST-PERLANE: # %bb.0:
5116 ; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7]
5117 ; AVX2-FAST-PERLANE-NEXT: vpermd %ymm0, %ymm1, %ymm0
5118 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27]
5119 ; AVX2-FAST-PERLANE-NEXT: retq
5121 ; AVX512VL-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5122 ; AVX512VL: # %bb.0:
5123 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,1,0,2,7,3,13,11,15,9,8,10,15,11,13]
5124 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
5125 ; AVX512VL-NEXT: retq
5127 ; XOPAVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5129 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5130 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7],xmm1[10,11]
5131 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
5132 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
5133 ; XOPAVX1-NEXT: retq
5135 ; XOPAVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5137 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7]
5138 ; XOPAVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
5139 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27]
5140 ; XOPAVX2-NEXT: retq
5141 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 13, i32 11, i32 15, i32 9, i32 8, i32 10, i32 15, i32 11, i32 13>
5142 ret <16 x i16> %shuffle
5145 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
5146 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
5148 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5149 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5150 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
5151 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3],xmm1[4,5,6,7]
5152 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5153 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5156 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
5158 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
5159 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u]
5160 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5163 ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
5164 ; AVX512VL: # %bb.0:
5165 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27]
5166 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5167 ; AVX512VL-NEXT: retq
5169 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
5171 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5172 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5173 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
5174 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3],xmm1[4,5,6,7]
5175 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5176 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5177 ; XOPAVX1-NEXT: retq
5179 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
5181 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
5182 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u]
5183 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5184 ; XOPAVX2-NEXT: retq
5185 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
5186 ret <16 x i16> %shuffle
5189 define <16 x i16> @shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31(<16 x i16> %a, <16 x i16> %b) {
5190 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
5192 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5193 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5194 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
5195 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
5196 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
5197 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,u,u,u,u,u,u,u,u]
5198 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5199 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5202 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
5204 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5205 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,u,u,16,17,24,25,18,19,26,27,20,21,28,29,22,23,u,u]
5206 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5207 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5210 ; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
5211 ; AVX512VL: # %bb.0:
5212 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,20,1,21,2,22,3,31,8,28,9,29,10,30,11,31]
5213 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5214 ; AVX512VL-NEXT: retq
5216 ; XOPAVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
5218 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5219 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5220 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
5221 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
5222 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],xmm3[14,15],xmm1[u,u,u,u,u,u,u,u]
5223 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5224 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5225 ; XOPAVX1-NEXT: retq
5227 ; XOPAVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
5229 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5230 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,u,u,16,17,24,25,18,19,26,27,20,21,28,29,22,23,u,u]
5231 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5232 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5233 ; XOPAVX2-NEXT: retq
5234 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 2, i32 22, i32 3, i32 31, i32 8, i32 28, i32 9, i32 29, i32 10, i32 30, i32 11, i32 31>
5235 ret <16 x i16> %shuffle
5238 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
5239 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
5241 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5242 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5243 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
5244 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
5245 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5246 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5249 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
5251 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
5252 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,3]
5253 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u]
5254 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5257 ; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
5258 ; AVX512VL: # %bb.0:
5259 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31]
5260 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5261 ; AVX512VL-NEXT: retq
5263 ; XOPAVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
5265 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5266 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5267 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
5268 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
5269 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5270 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5271 ; XOPAVX1-NEXT: retq
5273 ; XOPAVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
5275 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
5276 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,3]
5277 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u]
5278 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5279 ; XOPAVX2-NEXT: retq
5280 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
5281 ret <16 x i16> %shuffle
5284 define <16 x i16> @shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27(<16 x i16> %a, <16 x i16> %b) {
5285 ; AVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
5287 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5288 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5289 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
5290 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
5291 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
5292 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,14,15]
5293 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5294 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5297 ; AVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
5299 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
5300 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5301 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
5302 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7,24,25,16,17,26,27,18,19,28,29,20,21,30,31,22,23]
5305 ; AVX512VL-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
5306 ; AVX512VL: # %bb.0:
5307 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,16,5,17,6,18,7,27,12,24,13,25,14,26,15,27]
5308 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5309 ; AVX512VL-NEXT: retq
5311 ; XOPAVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
5313 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5314 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5315 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
5316 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
5317 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,2,3,4,5],xmm2[6,7]
5318 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5319 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5320 ; XOPAVX1-NEXT: retq
5322 ; XOPAVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
5324 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
5325 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5326 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
5327 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7,24,25,16,17,26,27,18,19,28,29,20,21,30,31,22,23]
5328 ; XOPAVX2-NEXT: retq
5329 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 16, i32 5, i32 17, i32 6, i32 18, i32 7, i32 27, i32 12, i32 24, i32 13, i32 25, i32 14, i32 26, i32 15, i32 27>
5330 ret <16 x i16> %shuffle
5333 define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
5334 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5336 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5337 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3]
5338 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
5339 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
5340 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
5341 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
5342 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,6,6]
5343 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
5344 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
5345 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
5346 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5347 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5350 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5351 ; AVX2-SLOW: # %bb.0:
5352 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
5353 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm2[7],ymm1[8,9,10,11,12,13,14],ymm2[15]
5354 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,3,2,3,4,7,6,7]
5355 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5356 ; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5357 ; AVX2-SLOW-NEXT: retq
5359 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5360 ; AVX2-FAST-ALL: # %bb.0:
5361 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = <0,3,7,u,4,7,u,u>
5362 ; AVX2-FAST-ALL-NEXT: vpermd %ymm1, %ymm2, %ymm1
5363 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,10,11,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u]
5364 ; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5365 ; AVX2-FAST-ALL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5366 ; AVX2-FAST-ALL-NEXT: retq
5368 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5369 ; AVX2-FAST-PERLANE: # %bb.0:
5370 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
5371 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm2[7],ymm1[8,9,10,11,12,13,14],ymm2[15]
5372 ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,3,2,3,4,7,6,7]
5373 ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5374 ; AVX2-FAST-PERLANE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5375 ; AVX2-FAST-PERLANE-NEXT: retq
5377 ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5378 ; AVX512VL: # %bb.0:
5379 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,6,22,7,31,8,24,9,25,14,30,15,31]
5380 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5381 ; AVX512VL-NEXT: retq
5383 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5385 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5386 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5387 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm3[0,1],xmm2[0,1],xmm3[2,3],xmm2[2,3],xmm3[12,13],xmm2[12,13],xmm3[14,15],xmm2[14,15]
5388 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0,1,2,3,12,13],xmm2[14,15],xmm1[u,u,u,u,u,u,u,u]
5389 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
5390 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5391 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5392 ; XOPAVX1-NEXT: retq
5394 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5396 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
5397 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm2[7],ymm1[8,9,10,11,12,13,14],ymm2[15]
5398 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,3,2,3,4,7,6,7]
5399 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5400 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5401 ; XOPAVX2-NEXT: retq
5402 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 6, i32 22, i32 7, i32 31, i32 8, i32 24, i32 9, i32 25, i32 14, i32 30, i32 15, i32 31>
5403 ret <16 x i16> %shuffle
5406 define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25(<16 x i16> %a, <16 x i16> %b) {
5407 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5409 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5410 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3]
5411 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
5412 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
5413 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
5414 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
5415 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
5416 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
5417 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5418 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5421 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5422 ; AVX2-SLOW: # %bb.0:
5423 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
5424 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3,4,5,6,7,8],ymm2[9],ymm1[10,11,12,13,14,15]
5425 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,0,2,3,6,4,6,7]
5426 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5427 ; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5428 ; AVX2-SLOW-NEXT: retq
5430 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5431 ; AVX2-FAST-ALL: # %bb.0:
5432 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = <2,0,4,u,6,4,u,u>
5433 ; AVX2-FAST-ALL-NEXT: vpermd %ymm1, %ymm2, %ymm1
5434 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,10,11,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u]
5435 ; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5436 ; AVX2-FAST-ALL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5437 ; AVX2-FAST-ALL-NEXT: retq
5439 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5440 ; AVX2-FAST-PERLANE: # %bb.0:
5441 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
5442 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3,4,5,6,7,8],ymm2[9],ymm1[10,11,12,13,14,15]
5443 ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,0,2,3,6,4,6,7]
5444 ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5445 ; AVX2-FAST-PERLANE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5446 ; AVX2-FAST-PERLANE-NEXT: retq
5448 ; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5449 ; AVX512VL: # %bb.0:
5450 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,20,1,21,6,16,7,25,8,28,9,29,14,24,15,25]
5451 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5452 ; AVX512VL-NEXT: retq
5454 ; XOPAVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5456 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5457 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5458 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm3[0,1],xmm2[8,9],xmm3[2,3],xmm2[10,11],xmm3[12,13],xmm2[0,1],xmm3[14,15],xmm2[2,3]
5459 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1],xmm2[2,3],xmm1[u,u,u,u,u,u,u,u]
5460 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
5461 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5462 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5463 ; XOPAVX1-NEXT: retq
5465 ; XOPAVX2-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5467 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
5468 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3,4,5,6,7,8],ymm2[9],ymm1[10,11,12,13,14,15]
5469 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,0,2,3,6,4,6,7]
5470 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5471 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5472 ; XOPAVX2-NEXT: retq
5473 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 6, i32 16, i32 7, i32 25, i32 8, i32 28, i32 9, i32 29, i32 14, i32 24, i32 15, i32 25>
5474 ret <16 x i16> %shuffle
5477 define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26(<16 x i16> %a, <16 x i16> %b) {
5478 ; AVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
5480 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5481 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
5482 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,u,u,u,u,u,u,u,u]
5483 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,0,3,2,4,5,6,7]
5484 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
5485 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
5486 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
5487 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
5488 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
5489 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
5492 ; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
5493 ; AVX2-SLOW: # %bb.0:
5494 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
5495 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21]
5496 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
5497 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,5,7]
5498 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
5499 ; AVX2-SLOW-NEXT: retq
5501 ; AVX2-FAST-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
5502 ; AVX2-FAST: # %bb.0:
5503 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,u,u,u,u,6,7,4,5,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21,u,u,u,u]
5504 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
5505 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21]
5506 ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
5507 ; AVX2-FAST-NEXT: retq
5509 ; AVX512VL-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
5510 ; AVX512VL: # %bb.0:
5511 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,17,16,3,2,19,26,9,8,25,24,11,10,27,26]
5512 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5513 ; AVX512VL-NEXT: retq
5515 ; XOPAVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
5517 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5518 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5519 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm3[2,3,0,1],xmm2[2,3,0,1],xmm3[6,7,4,5],xmm2[6,7,4,5]
5520 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[2,3,0,1,6,7],xmm2[4,5],xmm1[u,u,u,u,u,u,u,u]
5521 ; XOPAVX1-NEXT: vprotd $16, %xmm0, %xmm0
5522 ; XOPAVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5523 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5524 ; XOPAVX1-NEXT: retq
5526 ; XOPAVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
5528 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
5529 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21]
5530 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
5531 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,5,7]
5532 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
5533 ; XOPAVX2-NEXT: retq
5534 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26>
5535 ret <16 x i16> %shuffle
5538 define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11(<16 x i16> %a, <16 x i16> %b) {
5539 ; AVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
5541 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5542 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5543 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
5544 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm2[3],xmm0[4,5,6,7]
5545 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
5546 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5549 ; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
5551 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
5552 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u]
5553 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
5556 ; AVX512VL-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
5557 ; AVX512VL: # %bb.0:
5558 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27]
5559 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
5560 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
5561 ; AVX512VL-NEXT: retq
5563 ; XOPAVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
5565 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5566 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5567 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
5568 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm2[3],xmm0[4,5,6,7]
5569 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
5570 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5571 ; XOPAVX1-NEXT: retq
5573 ; XOPAVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
5575 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
5576 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u]
5577 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
5578 ; XOPAVX2-NEXT: retq
5579 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11>
5580 ret <16 x i16> %shuffle
5583 define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15(<16 x i16> %a, <16 x i16> %b) {
5584 ; AVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
5586 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5587 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5588 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
5589 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
5590 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
5591 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5594 ; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
5596 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
5597 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3]
5598 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u]
5599 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
5602 ; AVX512VL-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
5603 ; AVX512VL: # %bb.0:
5604 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31]
5605 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
5606 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
5607 ; AVX512VL-NEXT: retq
5609 ; XOPAVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
5611 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5612 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5613 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
5614 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
5615 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
5616 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5617 ; XOPAVX1-NEXT: retq
5619 ; XOPAVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
5621 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
5622 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3]
5623 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u]
5624 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
5625 ; XOPAVX2-NEXT: retq
5626 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
5627 ret <16 x i16> %shuffle
5630 define <16 x i16> @shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31(<16 x i16> %a, <16 x i16> %b) {
5631 ; AVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
5633 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5634 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm2[0,1,2,3,4,6,5,7]
5635 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
5636 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,2,1,3,4,5,6,7]
5637 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
5638 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
5639 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,u,u,u,u,u,u,u,u]
5640 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
5641 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5642 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5645 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
5646 ; AVX2-SLOW: # %bb.0:
5647 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5648 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15]
5649 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15]
5650 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5651 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5652 ; AVX2-SLOW-NEXT: retq
5654 ; AVX2-FAST-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
5655 ; AVX2-FAST: # %bb.0:
5656 ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5657 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,2,3,6,7,8,9,12,13,10,11,u,u,16,17,20,21,18,19,22,23,24,25,28,29,26,27,u,u]
5658 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5659 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5660 ; AVX2-FAST-NEXT: retq
5662 ; AVX512VL-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
5663 ; AVX512VL: # %bb.0:
5664 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,1,3,20,22,21,31,8,10,9,11,28,30,29,31]
5665 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5666 ; AVX512VL-NEXT: retq
5668 ; XOPAVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
5670 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5671 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5672 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm3[0,1,4,5,2,3,6,7],xmm2[8,9,12,13,10,11,14,15]
5673 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11],xmm2[14,15],xmm1[u,u,u,u,u,u,u,u]
5674 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
5675 ; XOPAVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5676 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5677 ; XOPAVX1-NEXT: retq
5679 ; XOPAVX2-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
5681 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5682 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15]
5683 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15]
5684 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5685 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5686 ; XOPAVX2-NEXT: retq
5687 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 1, i32 3, i32 20, i32 22, i32 21, i32 31, i32 8, i32 10, i32 9, i32 11, i32 28, i32 30, i32 29, i32 31>
5688 ret <16 x i16> %shuffle
5691 define <16 x i16> @shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
5692 ; AVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
5694 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
5695 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
5696 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7]
5697 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
5698 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
5699 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
5700 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
5701 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
5702 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
5705 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
5706 ; AVX2-SLOW: # %bb.0:
5707 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
5708 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7]
5709 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15]
5710 ; AVX2-SLOW-NEXT: retq
5712 ; AVX2-FAST-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
5713 ; AVX2-FAST: # %bb.0:
5714 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
5715 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15,24,25,24,25,22,23,20,21,24,25,26,27,28,29,30,31]
5716 ; AVX2-FAST-NEXT: retq
5718 ; AVX512VL-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
5719 ; AVX512VL: # %bb.0:
5720 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <4,4,3,18,u,u,u,u,12,12,11,26,u,u,u,u>
5721 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5722 ; AVX512VL-NEXT: retq
5724 ; XOPAVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
5726 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5727 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5728 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8,9,8,9,6,7,20,21,8,9,10,11,12,13,14,15]
5729 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
5730 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm1, %xmm0, %xmm0
5731 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5732 ; XOPAVX1-NEXT: retq
5734 ; XOPAVX2-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
5736 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
5737 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7]
5738 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15]
5739 ; XOPAVX2-NEXT: retq
5740 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 3, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 12, i32 11, i32 26, i32 undef, i32 undef, i32 undef, i32 undef>
5741 ret <16 x i16> %shuffle
5744 define <16 x i16> @shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
5745 ; AVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
5747 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5748 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5749 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
5750 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
5751 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
5752 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
5753 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
5754 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5757 ; AVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
5759 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5760 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19]
5763 ; AVX512VL-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
5764 ; AVX512VL: # %bb.0:
5765 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <0,3,2,21,u,u,u,u,8,11,10,29,u,u,u,u>
5766 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5767 ; AVX512VL-NEXT: retq
5769 ; XOPAVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
5771 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5772 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5773 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,6,7,4,5,26,27,0,1,26,27,0,1,2,3]
5774 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
5775 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm1, %xmm0, %xmm0
5776 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5777 ; XOPAVX1-NEXT: retq
5779 ; XOPAVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
5781 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5782 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19]
5783 ; XOPAVX2-NEXT: retq
5784 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
5785 ret <16 x i16> %shuffle
5788 define <16 x i16> @shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
5789 ; ALL-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
5791 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm1[2,2,2,2,6,6,6,6]
5793 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
5794 ret <16 x i16> %shuffle
5797 define <16 x i16> @shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
5798 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
5800 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5801 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5802 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
5803 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7]
5804 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
5805 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
5806 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5809 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
5811 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,2,2,2,6,6,6,6]
5812 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
5815 ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
5816 ; AVX512VL: # %bb.0:
5817 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <0,1,2,21,u,u,u,u,8,9,10,29,u,u,u,u>
5818 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5819 ; AVX512VL-NEXT: retq
5821 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
5823 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5824 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5825 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
5826 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7]
5827 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
5828 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
5829 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5830 ; XOPAVX1-NEXT: retq
5832 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
5834 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,2,2,2,6,6,6,6]
5835 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
5836 ; XOPAVX2-NEXT: retq
5837 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
5838 ret <16 x i16> %shuffle
5841 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
5842 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
5844 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5845 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
5846 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
5847 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
5848 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
5849 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5852 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
5854 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
5855 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
5858 ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
5859 ; AVX512VL: # %bb.0:
5860 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,u,u,4,5,6,27,u,u,u,u,12,13,14,27>
5861 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
5862 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
5863 ; AVX512VL-NEXT: retq
5865 ; XOPAVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
5867 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5868 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
5869 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
5870 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
5871 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
5872 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5873 ; XOPAVX1-NEXT: retq
5875 ; XOPAVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
5877 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
5878 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
5879 ; XOPAVX2-NEXT: retq
5880 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11>
5881 ret <16 x i16> %shuffle
5884 define <16 x i16> @shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
5885 ; AVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
5887 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5888 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5889 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
5890 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7]
5891 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
5892 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
5893 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5896 ; AVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
5898 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
5899 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
5902 ; AVX512VL-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
5903 ; AVX512VL: # %bb.0:
5904 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <4,5,6,19,u,u,u,u,12,13,14,27,u,u,u,u>
5905 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
5906 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
5907 ; AVX512VL-NEXT: retq
5909 ; XOPAVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
5911 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5912 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5913 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
5914 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7]
5915 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
5916 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
5917 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5918 ; XOPAVX1-NEXT: retq
5920 ; XOPAVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
5922 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
5923 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
5924 ; XOPAVX2-NEXT: retq
5925 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
5926 ret <16 x i16> %shuffle
5929 define <16 x i16> @shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
5930 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
5932 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5933 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
5934 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
5935 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4,5,6],xmm0[7]
5936 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
5937 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
5938 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
5939 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
5942 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
5944 ; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5945 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
5946 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
5947 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7,16,17,18,19,20,21,26,27,24,25,26,27,28,29,22,23]
5950 ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
5951 ; AVX512VL: # %bb.0:
5952 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,21,20,21,22,11,8,9,10,29,28,29,30,11]
5953 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5954 ; AVX512VL-NEXT: retq
5956 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
5958 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5959 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5960 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm2 = xmm3[0,1,2,3,4,5],xmm2[10,11,8,9,10,11,12,13],xmm3[6,7]
5961 ; XOPAVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
5962 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
5963 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3,4,5,6],xmm0[7]
5964 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5965 ; XOPAVX1-NEXT: retq
5967 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
5969 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5970 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
5971 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
5972 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7,16,17,18,19,20,21,26,27,24,25,26,27,28,29,22,23]
5973 ; XOPAVX2-NEXT: retq
5974 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 20, i32 21, i32 22, i32 11, i32 8, i32 9, i32 10, i32 29, i32 28, i32 29, i32 30, i32 11>
5975 ret <16 x i16> %shuffle
5978 define <16 x i16> @shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15(<16 x i16> %a, <16 x i16> %b) {
5979 ; AVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
5981 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5982 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5983 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7]
5984 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
5985 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
5986 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5989 ; AVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
5991 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
5992 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15]
5995 ; AVX512VL-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
5996 ; AVX512VL: # %bb.0:
5997 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,17,2,3,20,21,22,15,8,25,10,11,28,29,30,15]
5998 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5999 ; AVX512VL-NEXT: retq
6001 ; XOPAVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
6003 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6004 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6005 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7]
6006 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
6007 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
6008 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6009 ; XOPAVX1-NEXT: retq
6011 ; XOPAVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
6013 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
6014 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15]
6015 ; XOPAVX2-NEXT: retq
6016 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 20, i32 21, i32 22, i32 15, i32 8, i32 25, i32 10, i32 11, i32 28, i32 29, i32 30, i32 15>
6017 ret <16 x i16> %shuffle
6020 define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25(<16 x i16> %a, <16 x i16> %b) {
6021 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6023 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
6024 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
6025 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6026 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
6027 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7]
6028 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7]
6029 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
6030 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
6031 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
6032 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6035 ; AVX2-SLOW-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6036 ; AVX2-SLOW: # %bb.0:
6037 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6038 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
6039 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,1,1,4,5,6,7,9,9,9,9,12,13,14,15]
6040 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15]
6041 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
6042 ; AVX2-SLOW-NEXT: retq
6044 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6045 ; AVX2-FAST-ALL: # %bb.0:
6046 ; AVX2-FAST-ALL-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
6047 ; AVX2-FAST-ALL-NEXT: vpermd %ymm1, %ymm2, %ymm1
6048 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,2,3,2,3,8,9,10,11,14,15,u,u,18,19,18,19,18,19,18,19,24,25,26,27,30,31,u,u]
6049 ; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
6050 ; AVX2-FAST-ALL-NEXT: retq
6052 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6053 ; AVX2-FAST-PERLANE: # %bb.0:
6054 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,2,3,2,3,8,9,10,11,14,15,u,u,18,19,18,19,18,19,18,19,24,25,26,27,30,31,u,u]
6055 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6056 ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
6057 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
6058 ; AVX2-FAST-PERLANE-NEXT: retq
6060 ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6061 ; AVX512VL: # %bb.0:
6062 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,u,1,u,5,7,25,u,u,u,9,u,13,15,25>
6063 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
6064 ; AVX512VL-NEXT: retq
6066 ; XOPAVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6068 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6069 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
6070 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
6071 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2,3,2,3,2,3,2,3,8,9,10,11,14,15,30,31]
6072 ; XOPAVX1-NEXT: vpperm %xmm3, %xmm1, %xmm2, %xmm2
6073 ; XOPAVX1-NEXT: vpperm %xmm3, %xmm1, %xmm0, %xmm0
6074 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6075 ; XOPAVX1-NEXT: retq
6077 ; XOPAVX2-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6079 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6080 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
6081 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,1,1,4,5,6,7,9,9,9,9,12,13,14,15]
6082 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15]
6083 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
6084 ; XOPAVX2-NEXT: retq
6085 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 25, i32 undef, i32 undef, i32 undef, i32 9, i32 undef, i32 13, i32 15, i32 25>
6086 ret <16 x i16> %shuffle
6089 define <16 x i16> @shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu(<16 x i16> %a, <16 x i16> %b) {
6090 ; AVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
6092 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6093 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = <u,u,u,u,u,u,u,u,0,1,4,5,8,9,4,5>
6094 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
6095 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
6096 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
6097 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
6098 ; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
6099 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
6100 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
6101 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6104 ; AVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
6106 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,4,5,u,u,u,u,u,u,u,u,16,17,20,21,24,25,20,21]
6107 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
6108 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
6111 ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
6112 ; AVX512VL: # %bb.0:
6113 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,20,u,0,2,4,u,u,u,28,u,8,10,12,u>
6114 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
6115 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
6116 ; AVX512VL-NEXT: retq
6118 ; XOPAVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
6120 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6121 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6122 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8,9,10,11,8,9,10,11,16,17,20,21,24,25,20,21]
6123 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
6124 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm1, %xmm0, %xmm0
6125 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6126 ; XOPAVX1-NEXT: retq
6128 ; XOPAVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
6130 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,4,5,u,u,u,u,u,u,u,u,16,17,20,21,24,25,20,21]
6131 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
6132 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
6133 ; XOPAVX2-NEXT: retq
6134 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef>
6135 ret <16 x i16> %shuffle
6138 define <16 x i16> @shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
6139 ; AVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
6141 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6142 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6143 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6144 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
6145 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
6146 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6149 ; AVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
6151 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15]
6152 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
6153 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4],ymm1[5,6,7,8,9,10,11],ymm0[12],ymm1[13,14,15]
6154 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6157 ; AVX512VL-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
6158 ; AVX512VL: # %bb.0:
6159 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12]
6160 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
6161 ; AVX512VL-NEXT: retq
6163 ; XOPAVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
6165 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6166 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6167 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6168 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
6169 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
6170 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6171 ; XOPAVX1-NEXT: retq
6173 ; XOPAVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
6175 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15]
6176 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
6177 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4],ymm1[5,6,7,8,9,10,11],ymm0[12],ymm1[13,14,15]
6178 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6179 ; XOPAVX2-NEXT: retq
6180 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 12, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12>
6181 ret <16 x i16> %shuffle
6184 define <16 x i16> @shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
6185 ; AVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
6187 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6188 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6189 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6190 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
6191 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6194 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
6195 ; AVX2OR512VL: # %bb.0:
6196 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25]
6197 ; AVX2OR512VL-NEXT: retq
6199 ; XOPAVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
6201 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6202 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6203 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6204 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
6205 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6206 ; XOPAVX1-NEXT: retq
6208 ; XOPAVX2-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
6210 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25]
6211 ; XOPAVX2-NEXT: retq
6212 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 22, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 30, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
6213 ret <16 x i16> %shuffle
6216 define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
6217 ; AVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
6219 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
6220 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
6221 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6222 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6223 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
6226 ; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
6228 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
6229 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7,8,9,10,11],ymm1[12],ymm0[13,14,15]
6230 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6233 ; AVX512VL-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
6234 ; AVX512VL: # %bb.0:
6235 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [5,6,7,0,1,2,3,12,13,14,15,8,9,10,11,12]
6236 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
6237 ; AVX512VL-NEXT: retq
6239 ; XOPAVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
6241 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
6242 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
6243 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6244 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6245 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
6246 ; XOPAVX1-NEXT: retq
6248 ; XOPAVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
6250 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
6251 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7,8,9,10,11],ymm1[12],ymm0[13,14,15]
6252 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6253 ; XOPAVX2-NEXT: retq
6254 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12>
6255 ret <16 x i16> %shuffle
6258 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
6259 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
6261 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6262 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6263 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6264 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6267 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
6268 ; AVX2OR512VL: # %bb.0:
6269 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6270 ; AVX2OR512VL-NEXT: retq
6272 ; XOPAVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
6274 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6275 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6276 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6277 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6278 ; XOPAVX1-NEXT: retq
6280 ; XOPAVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
6282 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6283 ; XOPAVX2-NEXT: retq
6284 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
6285 ret <16 x i16> %shuffle
6288 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
6289 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
6291 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
6292 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6293 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
6294 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6297 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
6298 ; AVX2OR512VL: # %bb.0:
6299 ; AVX2OR512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25]
6300 ; AVX2OR512VL-NEXT: retq
6302 ; XOPAVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
6304 ; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
6305 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6306 ; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
6307 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6308 ; XOPAVX1-NEXT: retq
6310 ; XOPAVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
6312 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25]
6313 ; XOPAVX2-NEXT: retq
6314 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
6315 ret <16 x i16> %shuffle
6318 define <16 x i16> @shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10(<16 x i16> %a, <16 x i16> %b) {
6319 ; AVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
6321 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6322 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6323 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6324 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
6325 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,0,1,4,5,10,11]
6326 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
6327 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
6328 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6331 ; AVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
6333 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15]
6334 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
6335 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4,5,6,7,8,9],ymm0[10],ymm1[11,12,13,14,15]
6336 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6339 ; AVX512VL-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
6340 ; AVX512VL: # %bb.0:
6341 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26]
6342 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
6343 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
6344 ; AVX512VL-NEXT: retq
6346 ; XOPAVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
6348 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6349 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6350 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6351 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
6352 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,4,5,10,11]
6353 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6354 ; XOPAVX1-NEXT: retq
6356 ; XOPAVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
6358 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15]
6359 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
6360 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4,5,6,7,8,9],ymm0[10],ymm1[11,12,13,14,15]
6361 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6362 ; XOPAVX2-NEXT: retq
6363 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10>
6364 ret <16 x i16> %shuffle
6367 define <16 x i16> @shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
6368 ; AVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
6370 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6371 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6372 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6373 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
6374 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6377 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
6378 ; AVX2OR512VL: # %bb.0:
6379 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21]
6380 ; AVX2OR512VL-NEXT: retq
6382 ; XOPAVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
6384 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6385 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6386 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6387 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
6388 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6389 ; XOPAVX1-NEXT: retq
6391 ; XOPAVX2-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
6393 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21]
6394 ; XOPAVX2-NEXT: retq
6395 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 20, i32 21, i32 22, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 undef, i32 undef, i32 9, i32 undef>
6396 ret <16 x i16> %shuffle
6399 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10(<16 x i16> %a, <16 x i16> %b) {
6400 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
6402 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
6403 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
6404 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6405 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6406 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
6409 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
6411 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
6412 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
6413 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6416 ; AVX512VL-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
6417 ; AVX512VL: # %bb.0:
6418 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,4,5,6,7,0,1,10,11,12,13,14,15,8,9,10]
6419 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
6420 ; AVX512VL-NEXT: retq
6422 ; XOPAVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
6424 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
6425 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
6426 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6427 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6428 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
6429 ; XOPAVX1-NEXT: retq
6431 ; XOPAVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
6433 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
6434 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
6435 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6436 ; XOPAVX2-NEXT: retq
6437 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10>
6438 ret <16 x i16> %shuffle
6441 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
6442 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
6444 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6445 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6446 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6447 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6450 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
6451 ; AVX2OR512VL: # %bb.0:
6452 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6453 ; AVX2OR512VL-NEXT: retq
6455 ; XOPAVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
6457 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6458 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6459 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6460 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6461 ; XOPAVX1-NEXT: retq
6463 ; XOPAVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
6465 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6466 ; XOPAVX2-NEXT: retq
6467 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 9, i32 undef>
6468 ret <16 x i16> %shuffle
6471 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
6472 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
6474 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
6475 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6476 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
6477 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6480 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
6481 ; AVX2OR512VL: # %bb.0:
6482 ; AVX2OR512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
6483 ; AVX2OR512VL-NEXT: retq
6485 ; XOPAVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
6487 ; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
6488 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6489 ; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
6490 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6491 ; XOPAVX1-NEXT: retq
6493 ; XOPAVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
6495 ; XOPAVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
6496 ; XOPAVX2-NEXT: retq
6497 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
6498 ret <16 x i16> %shuffle
6501 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26(<16 x i16> %a, <16 x i16> %b) {
6502 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
6504 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6505 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6506 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6507 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
6508 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,u,u,0,1,4,5,10,11]
6509 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
6510 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7]
6511 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6514 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
6516 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
6517 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6518 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
6519 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6522 ; AVX512VL-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
6523 ; AVX512VL: # %bb.0:
6524 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26]
6525 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
6526 ; AVX512VL-NEXT: retq
6528 ; XOPAVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
6530 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6531 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6532 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6533 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
6534 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,4,5,10,11]
6535 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6536 ; XOPAVX1-NEXT: retq
6538 ; XOPAVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
6540 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
6541 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6542 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
6543 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6544 ; XOPAVX2-NEXT: retq
6545 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26>
6546 ret <16 x i16> %shuffle
6549 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu(<16 x i16> %a, <16 x i16> %b) {
6550 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
6552 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6553 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6554 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6555 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
6556 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6559 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
6560 ; AVX2OR512VL: # %bb.0:
6561 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21]
6562 ; AVX2OR512VL-NEXT: retq
6564 ; XOPAVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
6566 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6567 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6568 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6569 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
6570 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6571 ; XOPAVX1-NEXT: retq
6573 ; XOPAVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
6575 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21]
6576 ; XOPAVX2-NEXT: retq
6577 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 17, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 25, i32 undef>
6578 ret <16 x i16> %shuffle
6581 define <16 x i16> @shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28(<16 x i16> %a, <16 x i16> %b) {
6582 ; AVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
6584 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6585 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6586 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6587 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
6588 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
6589 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6592 ; AVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
6594 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15]
6595 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6596 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7,8,9,10,11],ymm1[12],ymm0[13,14,15]
6597 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6600 ; AVX512VL-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
6601 ; AVX512VL: # %bb.0:
6602 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12]
6603 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
6604 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
6605 ; AVX512VL-NEXT: retq
6607 ; XOPAVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
6609 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6610 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6611 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6612 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
6613 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
6614 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6615 ; XOPAVX1-NEXT: retq
6617 ; XOPAVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
6619 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15]
6620 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6621 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7,8,9,10,11],ymm1[12],ymm0[13,14,15]
6622 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6623 ; XOPAVX2-NEXT: retq
6624 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28>
6625 ret <16 x i16> %shuffle
6628 define <16 x i16> @shuffle_v16i16_00_02_04_06_01_03_05_07_31_30_29_28_27_26_25_24(<16 x i16> %a, <16 x i16> %b) {
6629 ; AVX1-LABEL: shuffle_v16i16_00_02_04_06_01_03_05_07_31_30_29_28_27_26_25_24:
6631 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,2,3,6,7,10,11,14,15]
6632 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
6633 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1]
6634 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
6637 ; AVX2-LABEL: shuffle_v16i16_00_02_04_06_01_03_05_07_31_30_29_28_27_26_25_24:
6639 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
6640 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,2,3,6,7,10,11,14,15,30,31,28,29,26,27,24,25,22,23,20,21,18,19,16,17]
6643 ; AVX512VL-LABEL: shuffle_v16i16_00_02_04_06_01_03_05_07_31_30_29_28_27_26_25_24:
6644 ; AVX512VL: # %bb.0:
6645 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,1,3,5,7,31,30,29,28,27,26,25,24]
6646 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
6647 ; AVX512VL-NEXT: retq
6649 ; XOPAVX1-LABEL: shuffle_v16i16_00_02_04_06_01_03_05_07_31_30_29_28_27_26_25_24:
6651 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,2,3,6,7,10,11,14,15]
6652 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
6653 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1]
6654 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
6655 ; XOPAVX1-NEXT: retq
6657 ; XOPAVX2-LABEL: shuffle_v16i16_00_02_04_06_01_03_05_07_31_30_29_28_27_26_25_24:
6659 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
6660 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,2,3,6,7,10,11,14,15,30,31,28,29,26,27,24,25,22,23,20,21,18,19,16,17]
6661 ; XOPAVX2-NEXT: retq
6662 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24>
6663 ret <16 x i16> %shuffle
6666 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu(<16 x i16> %a, <16 x i16> %b) {
6667 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
6669 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6670 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6671 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6672 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
6673 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6676 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
6677 ; AVX2OR512VL: # %bb.0:
6678 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25]
6679 ; AVX2OR512VL-NEXT: retq
6681 ; XOPAVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
6683 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6684 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6685 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6686 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
6687 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6688 ; XOPAVX1-NEXT: retq
6690 ; XOPAVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
6692 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25]
6693 ; XOPAVX2-NEXT: retq
6694 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 25, i32 26, i32 27, i32 undef>
6695 ret <16 x i16> %shuffle
6698 define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu(<16 x i16> %a, <16 x i16> %b) {
6699 ; AVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
6701 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6702 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
6703 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6704 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,4,4]
6705 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
6706 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
6707 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
6708 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
6709 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6712 ; AVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
6714 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
6715 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
6718 ; AVX512VL-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
6719 ; AVX512VL: # %bb.0:
6720 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <7,u,19,u,4,4,21,u,15,u,27,u,12,12,29,u>
6721 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
6722 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
6723 ; AVX512VL-NEXT: retq
6725 ; XOPAVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
6727 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6728 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6729 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [14,15,10,11,22,23,24,25,8,9,8,9,26,27,28,29]
6730 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
6731 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0
6732 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6733 ; XOPAVX1-NEXT: retq
6735 ; XOPAVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
6737 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
6738 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
6739 ; XOPAVX2-NEXT: retq
6740 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
6741 ret <16 x i16> %shuffle
6744 define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19(<16 x i16> %a, <16 x i16> %b) {
6745 ; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
6747 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6748 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
6751 ; AVX2OR512VL-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
6752 ; AVX2OR512VL: # %bb.0:
6753 ; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6754 ; AVX2OR512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
6755 ; AVX2OR512VL-NEXT: retq
6757 ; XOPAVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
6759 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6760 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
6761 ; XOPAVX1-NEXT: retq
6763 ; XOPAVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
6765 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6766 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
6767 ; XOPAVX2-NEXT: retq
6768 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19>
6769 ret <16 x i16> %shuffle
6772 define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) {
6773 ; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6775 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6776 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6777 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
6780 ; AVX2-SLOW-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6781 ; AVX2-SLOW: # %bb.0:
6782 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6783 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %ymm0
6784 ; AVX2-SLOW-NEXT: retq
6786 ; AVX2-FAST-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6787 ; AVX2-FAST: # %bb.0:
6788 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
6789 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
6790 ; AVX2-FAST-NEXT: retq
6792 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6793 ; AVX512VL-SLOW: # %bb.0:
6794 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6795 ; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm0, %ymm0
6796 ; AVX512VL-SLOW-NEXT: retq
6798 ; AVX512VL-FAST-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6799 ; AVX512VL-FAST: # %bb.0:
6800 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
6801 ; AVX512VL-FAST-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
6802 ; AVX512VL-FAST-NEXT: retq
6804 ; XOPAVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6806 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6807 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6808 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
6809 ; XOPAVX1-NEXT: retq
6811 ; XOPAVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6813 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6814 ; XOPAVX2-NEXT: vpbroadcastd %xmm0, %ymm0
6815 ; XOPAVX2-NEXT: retq
6816 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
6817 ret <16 x i16> %shuffle
6820 define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a, <16 x i16> %b) {
6821 ; AVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
6823 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6824 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
6825 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6826 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
6829 ; AVX2OR512VL-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
6830 ; AVX2OR512VL: # %bb.0:
6831 ; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
6832 ; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %ymm0
6833 ; AVX2OR512VL-NEXT: retq
6835 ; XOPAVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
6837 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6838 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
6839 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6840 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
6841 ; XOPAVX1-NEXT: retq
6843 ; XOPAVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
6845 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
6846 ; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0
6847 ; XOPAVX2-NEXT: retq
6848 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
6849 ret <16 x i16> %shuffle
6852 define <16 x i16> @shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
6853 ; ALL-LABEL: shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u:
6855 ; ALL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6857 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
6858 ret <16 x i16> %shuffle
6861 define <16 x i16> @shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
6862 ; AVX1-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6864 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6865 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6868 ; AVX2-SLOW-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6869 ; AVX2-SLOW: # %bb.0:
6870 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6871 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
6872 ; AVX2-SLOW-NEXT: retq
6874 ; AVX2-FAST-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6875 ; AVX2-FAST: # %bb.0:
6876 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
6877 ; AVX2-FAST-NEXT: retq
6879 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6880 ; AVX512VL-SLOW: # %bb.0:
6881 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6882 ; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
6883 ; AVX512VL-SLOW-NEXT: retq
6885 ; AVX512VL-FAST-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6886 ; AVX512VL-FAST: # %bb.0:
6887 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
6888 ; AVX512VL-FAST-NEXT: retq
6890 ; XOPAVX1-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6892 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6893 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6894 ; XOPAVX1-NEXT: retq
6896 ; XOPAVX2-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6898 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6899 ; XOPAVX2-NEXT: vpbroadcastd %xmm0, %xmm0
6900 ; XOPAVX2-NEXT: retq
6901 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
6902 ret <16 x i16> %shuffle
6905 define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
6906 ; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6908 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6909 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
6910 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6913 ; AVX2-SLOW-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6914 ; AVX2-SLOW: # %bb.0:
6915 ; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0
6916 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
6917 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
6918 ; AVX2-SLOW-NEXT: retq
6920 ; AVX2-FAST-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6921 ; AVX2-FAST: # %bb.0:
6922 ; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
6923 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
6924 ; AVX2-FAST-NEXT: retq
6926 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6927 ; AVX512VL-SLOW: # %bb.0:
6928 ; AVX512VL-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0
6929 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
6930 ; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
6931 ; AVX512VL-SLOW-NEXT: retq
6933 ; AVX512VL-FAST-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6934 ; AVX512VL-FAST: # %bb.0:
6935 ; AVX512VL-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
6936 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
6937 ; AVX512VL-FAST-NEXT: retq
6939 ; XOPAVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6941 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6942 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
6943 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6944 ; XOPAVX1-NEXT: retq
6946 ; XOPAVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6948 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
6949 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
6950 ; XOPAVX2-NEXT: vpbroadcastd %xmm0, %xmm0
6951 ; XOPAVX2-NEXT: retq
6952 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
6953 ret <16 x i16> %shuffle
6956 define <16 x i16> @shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25(<16 x i16> %a0, <16 x i16> %a1) {
6957 ; AVX1-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
6959 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6960 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6961 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
6962 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6963 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6964 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
6967 ; AVX2-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
6969 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
6970 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
6973 ; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
6974 ; AVX512VL: # %bb.0:
6975 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25]
6976 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
6977 ; AVX512VL-NEXT: retq
6979 ; XOPAVX1-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
6981 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6982 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6983 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
6984 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6985 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6986 ; XOPAVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
6987 ; XOPAVX1-NEXT: retq
6989 ; XOPAVX2-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
6991 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
6992 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
6993 ; XOPAVX2-NEXT: retq
6994 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 0, i32 16, i32 1, i32 17, i32 10, i32 26, i32 11, i32 27, i32 8, i32 24, i32 9, i32 25>
6998 define <16 x i16> @shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25(<16 x i16> %a0, <16 x i16> %a1) {
6999 ; AVX1-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
7001 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
7002 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
7003 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
7004 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,2,4,5,6,7]
7005 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
7006 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
7007 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[0,1,0,1]
7008 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,7,7]
7009 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
7010 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
7011 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
7012 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
7013 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
7014 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
7015 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
7018 ; AVX2-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
7020 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
7021 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,0,2]
7024 ; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
7025 ; AVX512VL: # %bb.0:
7026 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [2,18,3,19,10,26,11,27,0,16,1,17,8,24,9,25]
7027 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
7028 ; AVX512VL-NEXT: retq
7030 ; XOPAVX1-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
7032 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
7033 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
7034 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
7035 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
7036 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm2 = xmm0[0,1],xmm1[0,1],xmm0[4,5],xmm1[4,5],xmm0[2,3],xmm1[2,3],xmm0[6,7],xmm1[6,7]
7037 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9],xmm1[8,9],xmm0[12,13],xmm1[12,13],xmm0[10,11],xmm1[10,11],xmm0[14,15],xmm1[14,15]
7038 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
7039 ; XOPAVX1-NEXT: retq
7041 ; XOPAVX2-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
7043 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
7044 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,0,2]
7045 ; XOPAVX2-NEXT: retq
7046 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 0, i32 16, i32 1, i32 17, i32 10, i32 26, i32 11, i32 27, i32 8, i32 24, i32 9, i32 25>
7047 %2 = bitcast <16 x i16> %1 to <4 x i64>
7048 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
7049 %4 = bitcast <4 x i64> %3 to <16 x i16>
7053 define <16 x i16> @shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30(<8 x i32> %a0, <8 x i32> %a1) {
7054 ; AVX1-LABEL: shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7056 ; AVX1-NEXT: vpsrad $25, %xmm0, %xmm2
7057 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7058 ; AVX1-NEXT: vpsrad $25, %xmm0, %xmm0
7059 ; AVX1-NEXT: vpsrad $25, %xmm1, %xmm3
7060 ; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
7061 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7062 ; AVX1-NEXT: vpsrad $25, %xmm1, %xmm1
7063 ; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
7064 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
7067 ; AVX2OR512VL-LABEL: shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7068 ; AVX2OR512VL: # %bb.0:
7069 ; AVX2OR512VL-NEXT: vpsrad $25, %ymm0, %ymm0
7070 ; AVX2OR512VL-NEXT: vpsrad $25, %ymm1, %ymm1
7071 ; AVX2OR512VL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
7072 ; AVX2OR512VL-NEXT: retq
7074 ; XOPAVX1-LABEL: shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7076 ; XOPAVX1-NEXT: vpsrad $25, %xmm0, %xmm2
7077 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7078 ; XOPAVX1-NEXT: vpsrad $25, %xmm0, %xmm0
7079 ; XOPAVX1-NEXT: vpsrad $25, %xmm1, %xmm3
7080 ; XOPAVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
7081 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7082 ; XOPAVX1-NEXT: vpsrad $25, %xmm1, %xmm1
7083 ; XOPAVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
7084 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
7085 ; XOPAVX1-NEXT: retq
7087 ; XOPAVX2-LABEL: shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7089 ; XOPAVX2-NEXT: vpsrad $25, %ymm0, %ymm0
7090 ; XOPAVX2-NEXT: vpsrad $25, %ymm1, %ymm1
7091 ; XOPAVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
7092 ; XOPAVX2-NEXT: retq
7093 %1 = ashr <8 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
7094 %2 = ashr <8 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
7095 %3 = bitcast <8 x i32> %1 to <16 x i16>
7096 %4 = bitcast <8 x i32> %2 to <16 x i16>
7097 %5 = shufflevector <16 x i16> %3, <16 x i16> %4, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
7101 define <16 x i16> @shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30(<8 x i32> %a0, <8 x i32> %a1) {
7102 ; AVX1-LABEL: shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7104 ; AVX1-NEXT: vpsrld $25, %xmm0, %xmm2
7105 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7106 ; AVX1-NEXT: vpsrld $25, %xmm0, %xmm0
7107 ; AVX1-NEXT: vpsrld $25, %xmm1, %xmm3
7108 ; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
7109 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7110 ; AVX1-NEXT: vpsrld $25, %xmm1, %xmm1
7111 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
7112 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
7115 ; AVX2OR512VL-LABEL: shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7116 ; AVX2OR512VL: # %bb.0:
7117 ; AVX2OR512VL-NEXT: vpsrld $25, %ymm0, %ymm0
7118 ; AVX2OR512VL-NEXT: vpsrld $25, %ymm1, %ymm1
7119 ; AVX2OR512VL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
7120 ; AVX2OR512VL-NEXT: retq
7122 ; XOPAVX1-LABEL: shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7124 ; XOPAVX1-NEXT: vpsrld $25, %xmm0, %xmm2
7125 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7126 ; XOPAVX1-NEXT: vpsrld $25, %xmm0, %xmm0
7127 ; XOPAVX1-NEXT: vpsrld $25, %xmm1, %xmm3
7128 ; XOPAVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
7129 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7130 ; XOPAVX1-NEXT: vpsrld $25, %xmm1, %xmm1
7131 ; XOPAVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
7132 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
7133 ; XOPAVX1-NEXT: retq
7135 ; XOPAVX2-LABEL: shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7137 ; XOPAVX2-NEXT: vpsrld $25, %ymm0, %ymm0
7138 ; XOPAVX2-NEXT: vpsrld $25, %ymm1, %ymm1
7139 ; XOPAVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
7140 ; XOPAVX2-NEXT: retq
7141 %1 = lshr <8 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
7142 %2 = lshr <8 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
7143 %3 = bitcast <8 x i32> %1 to <16 x i16>
7144 %4 = bitcast <8 x i32> %2 to <16 x i16>
7145 %5 = shufflevector <16 x i16> %3, <16 x i16> %4, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
7149 define <16 x i16> @shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13(<16 x i16> %a) {
7150 ; AVX1-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7152 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,6,7,5]
7153 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7154 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
7155 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
7156 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
7159 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7160 ; AVX2-SLOW: # %bb.0:
7161 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,5,8,9,10,11,12,14,15,13]
7162 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
7163 ; AVX2-SLOW-NEXT: retq
7165 ; AVX2-FAST-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7166 ; AVX2-FAST: # %bb.0:
7167 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11,24,25,28,29,30,31,26,27,24,25,28,29,30,31,26,27]
7168 ; AVX2-FAST-NEXT: retq
7170 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7171 ; AVX512VL-SLOW: # %bb.0:
7172 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,5,8,9,10,11,12,14,15,13]
7173 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
7174 ; AVX512VL-SLOW-NEXT: retq
7176 ; AVX512VL-FAST-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7177 ; AVX512VL-FAST: # %bb.0:
7178 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11,24,25,28,29,30,31,26,27,24,25,28,29,30,31,26,27]
7179 ; AVX512VL-FAST-NEXT: retq
7181 ; XOPAVX1-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7183 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,6,7,5]
7184 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7185 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
7186 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
7187 ; XOPAVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
7188 ; XOPAVX1-NEXT: retq
7190 ; XOPAVX2-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7192 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,5,8,9,10,11,12,14,15,13]
7193 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
7194 ; XOPAVX2-NEXT: retq
7195 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5, i32 12, i32 14, i32 15, i32 undef, i32 undef, i32 14, i32 15, i32 13>
7196 ret <16 x i16> %shuffle
7199 define <16 x i16> @shuffle_v16i16_03_02_01_00_04_05_06_07_11_10_09_08_12_13_14_15_v8i16(<8 x i16> %a, <8 x i16> %b) {
7200 ; AVX1-LABEL: shuffle_v16i16_03_02_01_00_04_05_06_07_11_10_09_08_12_13_14_15_v8i16:
7202 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
7203 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
7204 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
7207 ; AVX2OR512VL-LABEL: shuffle_v16i16_03_02_01_00_04_05_06_07_11_10_09_08_12_13_14_15_v8i16:
7208 ; AVX2OR512VL: # %bb.0:
7209 ; AVX2OR512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
7210 ; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
7211 ; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
7212 ; AVX2OR512VL-NEXT: retq
7214 ; XOPAVX1-LABEL: shuffle_v16i16_03_02_01_00_04_05_06_07_11_10_09_08_12_13_14_15_v8i16:
7216 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
7217 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
7218 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
7219 ; XOPAVX1-NEXT: retq
7221 ; XOPAVX2-LABEL: shuffle_v16i16_03_02_01_00_04_05_06_07_11_10_09_08_12_13_14_15_v8i16:
7223 ; XOPAVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
7224 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
7225 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
7226 ; XOPAVX2-NEXT: retq
7227 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
7228 %2 = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
7229 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7233 define <16 x i16> @shuffle_v16i16_00_01_02_04_07_06_05_04_08_09_10_11_15_14_13_12_v8i16(<8 x i16> %a, <8 x i16> %b) {
7234 ; AVX1-LABEL: shuffle_v16i16_00_01_02_04_07_06_05_04_08_09_10_11_15_14_13_12_v8i16:
7236 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
7237 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
7238 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
7241 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_01_02_04_07_06_05_04_08_09_10_11_15_14_13_12_v8i16:
7242 ; AVX2OR512VL: # %bb.0:
7243 ; AVX2OR512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
7244 ; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
7245 ; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
7246 ; AVX2OR512VL-NEXT: retq
7248 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_02_04_07_06_05_04_08_09_10_11_15_14_13_12_v8i16:
7250 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
7251 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
7252 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
7253 ; XOPAVX1-NEXT: retq
7255 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_02_04_07_06_05_04_08_09_10_11_15_14_13_12_v8i16:
7257 ; XOPAVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
7258 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
7259 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
7260 ; XOPAVX2-NEXT: retq
7261 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4>
7262 %2 = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4>
7263 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7267 define <16 x i16> @shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14(<16 x i16> %a) {
7268 ; AVX1-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
7270 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7]
7271 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,7,6]
7272 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7273 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
7274 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
7275 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
7278 ; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
7279 ; AVX2-SLOW: # %bb.0:
7280 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
7281 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14]
7282 ; AVX2-SLOW-NEXT: retq
7284 ; AVX2-FAST-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
7285 ; AVX2-FAST: # %bb.0:
7286 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29]
7287 ; AVX2-FAST-NEXT: retq
7289 ; AVX512VL-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
7290 ; AVX512VL: # %bb.0:
7291 ; AVX512VL-NEXT: vprold $16, %ymm0, %ymm0
7292 ; AVX512VL-NEXT: retq
7294 ; XOPAVX1-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
7296 ; XOPAVX1-NEXT: vprotd $16, %xmm0, %xmm1
7297 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7298 ; XOPAVX1-NEXT: vprotd $16, %xmm0, %xmm0
7299 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
7300 ; XOPAVX1-NEXT: retq
7302 ; XOPAVX2-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
7304 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
7305 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14]
7306 ; XOPAVX2-NEXT: retq
7307 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
7308 ret <16 x i16> %shuffle
7311 define <16 x i16> @shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14(<16 x i16> %a) {
7312 ; AVX1-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
7314 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,0,1,2,4,5,6,7]
7315 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,4,5,6]
7316 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7317 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,0,1,2,4,5,6,7]
7318 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
7319 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
7322 ; AVX2-SLOW-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
7323 ; AVX2-SLOW: # %bb.0:
7324 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,0,1,2,4,5,6,7,11,8,9,10,12,13,14,15]
7325 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14]
7326 ; AVX2-SLOW-NEXT: retq
7328 ; AVX2-FAST-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
7329 ; AVX2-FAST: # %bb.0:
7330 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,2,3,4,5,14,15,8,9,10,11,12,13,22,23,16,17,18,19,20,21,30,31,24,25,26,27,28,29]
7331 ; AVX2-FAST-NEXT: retq
7333 ; AVX512VL-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
7334 ; AVX512VL: # %bb.0:
7335 ; AVX512VL-NEXT: vprolq $16, %ymm0, %ymm0
7336 ; AVX512VL-NEXT: retq
7338 ; XOPAVX1-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
7340 ; XOPAVX1-NEXT: vprotq $16, %xmm0, %xmm1
7341 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7342 ; XOPAVX1-NEXT: vprotq $16, %xmm0, %xmm0
7343 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
7344 ; XOPAVX1-NEXT: retq
7346 ; XOPAVX2-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
7348 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,0,1,2,4,5,6,7,11,8,9,10,12,13,14,15]
7349 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14]
7350 ; XOPAVX2-NEXT: retq
7351 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14>
7352 ret <16 x i16> %shuffle
7355 define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
7356 ; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
7358 ; ALL-NEXT: movzwl (%rdi), %eax
7359 ; ALL-NEXT: vmovd %eax, %xmm0
7361 %val = load i16, i16* %ptr
7362 %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0
7366 define <16 x i16> @concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31(<16 x i16> %a, <16 x i16> %b) {
7367 ; ALL-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
7369 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
7371 %alo = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7372 %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7373 %shuf = shufflevector <8 x i16> %alo, <8 x i16> %bhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7374 ret <16 x i16> %shuf
7377 define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc(<16 x i16> %a, <16 x i16> %b) {
7378 ; AVX1OR2-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
7380 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
7381 ; AVX1OR2-NEXT: retq
7383 ; AVX512VL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
7384 ; AVX512VL: # %bb.0:
7385 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
7386 ; AVX512VL-NEXT: retq
7388 ; XOP-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
7390 ; XOP-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
7392 %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7393 %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7394 %bc0hi = bitcast <8 x i16> %ahi to <16 x i8>
7395 %bc1hi = bitcast <8 x i16> %bhi to <16 x i8>
7396 %shuffle8 = shufflevector <16 x i8> %bc0hi, <16 x i8> %bc1hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7397 %shuffle16 = bitcast <32 x i8> %shuffle8 to <16 x i16>
7398 ret <16 x i16> %shuffle16
7401 define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) {
7402 ; AVX1-LABEL: PR24935:
7404 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
7405 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
7406 ; AVX1-NEXT: vpalignr {{.*#+}} xmm4 = xmm3[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
7407 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2],xmm4[3,4,5,6,7]
7408 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
7409 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm5 = xmm4[0,1,2,3,5,5,5,5]
7410 ; AVX1-NEXT: vpshufb {{.*#+}} xmm6 = xmm0[2,3,u,u,u,u,u,u,u,u,8,9,0,1,u,u]
7411 ; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0],xmm5[1],xmm6[2,3],xmm5[4],xmm6[5,6,7]
7412 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm5[0,1],xmm2[2,3],xmm5[4,5,6],xmm2[7]
7413 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
7414 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
7415 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5],xmm1[6,7]
7416 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,7,4,5,u,u,10,11,4,5,14,15,u,u,0,1]
7417 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3,4,5],xmm0[6],xmm1[7]
7418 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
7421 ; AVX2-SLOW-LABEL: PR24935:
7422 ; AVX2-SLOW: # %bb.0:
7423 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm1[8,9],zero,zero,zero,zero,ymm1[14,15,12,13,0,1,24,25,24,25],zero,zero,ymm1[24,25,16,17,30,31,28,29,16,17]
7424 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
7425 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5],zero,zero,ymm1[10,11,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
7426 ; AVX2-SLOW-NEXT: vpor %ymm2, %ymm1, %ymm1
7427 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm2 = ymm0[1,1,1,1,4,5,6,7,9,9,9,9,12,13,14,15]
7428 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,5,5,5,5,8,9,10,11,13,13,13,13]
7429 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
7430 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u]
7431 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15]
7432 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255]
7433 ; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
7434 ; AVX2-SLOW-NEXT: retq
7436 ; AVX2-FAST-ALL-LABEL: PR24935:
7437 ; AVX2-FAST-ALL: # %bb.0:
7438 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = <0,5,u,u,0,4,6,2>
7439 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm2, %ymm0
7440 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[2,3],zero,zero,zero,zero,zero,zero,ymm0[6,7],zero,zero,ymm0[18,19,22,23],zero,zero,zero,zero,ymm0[26,27,28,29,16,17],zero,zero
7441 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = <5,6,3,0,0,6,4,u>
7442 ; AVX2-FAST-ALL-NEXT: vpermd %ymm1, %ymm2, %ymm1
7443 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[2,3,0,1],zero,zero,ymm1[6,7,0,1,10,11],zero,zero,ymm1[12,13],zero,zero,zero,zero,ymm1[16,17,20,21],zero,zero,zero,zero,zero,zero,ymm1[24,25]
7444 ; AVX2-FAST-ALL-NEXT: vpor %ymm0, %ymm1, %ymm0
7445 ; AVX2-FAST-ALL-NEXT: retq
7447 ; AVX2-FAST-PERLANE-LABEL: PR24935:
7448 ; AVX2-FAST-PERLANE: # %bb.0:
7449 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm1[8,9],zero,zero,zero,zero,ymm1[14,15,12,13,0,1,24,25,24,25],zero,zero,ymm1[24,25,16,17,30,31,28,29,16,17]
7450 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
7451 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5],zero,zero,ymm1[10,11,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
7452 ; AVX2-FAST-PERLANE-NEXT: vpor %ymm2, %ymm1, %ymm1
7453 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,2,3,2,3,u,u,10,11,u,u,u,u,u,u,u,u,18,19,18,19,u,u,26,27,u,u,u,u,u,u]
7454 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
7455 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u]
7456 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15]
7457 ; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255]
7458 ; AVX2-FAST-PERLANE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
7459 ; AVX2-FAST-PERLANE-NEXT: retq
7461 ; AVX512VL-LABEL: PR24935:
7462 ; AVX512VL: # %bb.0:
7463 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [11,10,17,13,10,7,27,0,17,25,0,12,29,20,16,8]
7464 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
7465 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
7466 ; AVX512VL-NEXT: retq
7468 ; XOPAVX1-LABEL: PR24935:
7470 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
7471 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm2[u,u,u,u],xmm1[0,1],xmm2[8,9,u,u,u,u,u,u,0,1]
7472 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
7473 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm5 = xmm0[2,3],xmm4[2,3],xmm0[u,u,u,u],xmm4[10,11],xmm0[8,9,0,1,u,u]
7474 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1],xmm3[2,3],xmm5[4,5,6],xmm3[7]
7475 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm2[6,7,4,5,u,u,10,11,4,5],xmm1[14,15],xmm2[u,u],xmm1[0,1]
7476 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
7477 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
7478 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3,4,5],xmm0[6],xmm1[7]
7479 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
7480 ; XOPAVX1-NEXT: retq
7482 ; XOPAVX2-LABEL: PR24935:
7484 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm1[8,9],zero,zero,zero,zero,ymm1[14,15,12,13,0,1,24,25,24,25],zero,zero,ymm1[24,25,16,17,30,31,28,29,16,17]
7485 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
7486 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5],zero,zero,ymm1[10,11,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
7487 ; XOPAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
7488 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm2 = ymm0[1,1,1,1,4,5,6,7,9,9,9,9,12,13,14,15]
7489 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,5,5,5,5,8,9,10,11,13,13,13,13]
7490 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
7491 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u]
7492 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15]
7493 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255]
7494 ; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
7495 ; XOPAVX2-NEXT: retq
7496 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 27, i32 26, i32 1, i32 29, i32 26, i32 23, i32 11, i32 16, i32 1, i32 9, i32 16, i32 28, i32 13, i32 4, i32 0, i32 24>
7497 ret <16 x i16> %shuffle
7500 define <16 x i16> @PR34369(<16 x i16> %vec, <16 x i16> %mask) {
7501 ; AVX1-LABEL: PR34369:
7503 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
7504 ; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm2[u,u,u,u,u,u,10,11,u,u,u,u,u,u,4,5]
7505 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u]
7506 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3],xmm0[4,5,6],xmm3[7]
7507 ; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[14,15,0,1,12,13,0,1,2,3,4,5,8,9,8,9]
7508 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
7509 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
7510 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
7511 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
7512 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
7513 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
7514 ; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
7517 ; AVX2-SLOW-LABEL: PR34369:
7518 ; AVX2-SLOW: # %bb.0:
7519 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25]
7520 ; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0
7521 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,1]
7522 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
7523 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3],xmm2[4,5,6],xmm0[7]
7524 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
7525 ; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
7526 ; AVX2-SLOW-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
7527 ; AVX2-SLOW-NEXT: vpand %ymm0, %ymm1, %ymm0
7528 ; AVX2-SLOW-NEXT: retq
7530 ; AVX2-FAST-LABEL: PR34369:
7531 ; AVX2-FAST: # %bb.0:
7532 ; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm2
7533 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,10,11,u,u,u,u,u,u,4,5]
7534 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25]
7535 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[3],xmm0[4,5,6],xmm2[7]
7536 ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
7537 ; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
7538 ; AVX2-FAST-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
7539 ; AVX2-FAST-NEXT: vpand %ymm0, %ymm1, %ymm0
7540 ; AVX2-FAST-NEXT: retq
7542 ; AVX512VL-LABEL: PR34369:
7543 ; AVX512VL: # %bb.0:
7544 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,0,13,5,2,2,10,15,8,14,8,9,10,12,12]
7545 ; AVX512VL-NEXT: vptestnmw %ymm1, %ymm1, %k1
7546 ; AVX512VL-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z}
7547 ; AVX512VL-NEXT: retq
7549 ; XOPAVX1-LABEL: PR34369:
7551 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
7552 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[6,7,0,1,0,1],xmm2[10,11],xmm0[10,11,4,5,4,5],xmm2[4,5]
7553 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[14,15,0,1,12,13,0,1,2,3,4,5,8,9,8,9]
7554 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
7555 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
7556 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
7557 ; XOPAVX1-NEXT: vpcomeqw %xmm3, %xmm2, %xmm2
7558 ; XOPAVX1-NEXT: vpcomeqw %xmm3, %xmm1, %xmm1
7559 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
7560 ; XOPAVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
7561 ; XOPAVX1-NEXT: retq
7563 ; XOPAVX2-LABEL: PR34369:
7565 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
7566 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25]
7567 ; XOPAVX2-NEXT: vpperm {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5],xmm2[10,11],xmm0[8,9,10,11,12,13],xmm2[4,5]
7568 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
7569 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
7570 ; XOPAVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
7571 ; XOPAVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
7572 ; XOPAVX2-NEXT: retq
7573 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 9, i32 10, i32 12, i32 12>
7574 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
7575 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
7579 define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) {
7580 ; AVX1-LABEL: insert_dup_mem_v16i16_i32:
7582 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
7583 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7584 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7585 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7588 ; AVX2OR512VL-LABEL: insert_dup_mem_v16i16_i32:
7589 ; AVX2OR512VL: # %bb.0:
7590 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %ymm0
7591 ; AVX2OR512VL-NEXT: retq
7593 ; XOPAVX1-LABEL: insert_dup_mem_v16i16_i32:
7595 ; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
7596 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7597 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7598 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7599 ; XOPAVX1-NEXT: retq
7601 ; XOPAVX2-LABEL: insert_dup_mem_v16i16_i32:
7603 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %ymm0
7604 ; XOPAVX2-NEXT: retq
7605 %tmp = load i32, i32* %ptr, align 4
7606 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
7607 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
7608 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer
7609 ret <16 x i16> %tmp3
7612 define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
7613 ; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
7615 ; AVX1-NEXT: movzwl (%rdi), %eax
7616 ; AVX1-NEXT: vmovd %eax, %xmm0
7617 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7618 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7619 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7622 ; AVX2OR512VL-LABEL: insert_dup_mem_v16i16_sext_i16:
7623 ; AVX2OR512VL: # %bb.0:
7624 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %ymm0
7625 ; AVX2OR512VL-NEXT: retq
7627 ; XOPAVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
7629 ; XOPAVX1-NEXT: movzwl (%rdi), %eax
7630 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
7631 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7632 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7633 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7634 ; XOPAVX1-NEXT: retq
7636 ; XOPAVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
7638 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %ymm0
7639 ; XOPAVX2-NEXT: retq
7640 %tmp = load i16, i16* %ptr, align 2
7641 %tmp1 = sext i16 %tmp to i32
7642 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
7643 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
7644 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer
7645 ret <16 x i16> %tmp4
7648 define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 {
7649 ; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i32:
7651 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
7652 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
7653 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7654 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7657 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v16i16_i32:
7658 ; AVX2OR512VL: # %bb.0:
7659 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %ymm0
7660 ; AVX2OR512VL-NEXT: retq
7662 ; XOPAVX1-LABEL: insert_dup_elt1_mem_v16i16_i32:
7664 ; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
7665 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
7666 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7667 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7668 ; XOPAVX1-NEXT: retq
7670 ; XOPAVX2-LABEL: insert_dup_elt1_mem_v16i16_i32:
7672 ; XOPAVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
7673 ; XOPAVX2-NEXT: retq
7674 %tmp = load i32, i32* %ptr, align 4
7675 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
7676 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
7677 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
7678 ret <16 x i16> %tmp3
7681 define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {
7682 ; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i32:
7684 ; AVX1-NEXT: vbroadcastss (%rdi), %xmm0
7685 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
7686 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7687 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7690 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v16i16_i32:
7691 ; AVX2OR512VL: # %bb.0:
7692 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %ymm0
7693 ; AVX2OR512VL-NEXT: retq
7695 ; XOPAVX1-LABEL: insert_dup_elt3_mem_v16i16_i32:
7697 ; XOPAVX1-NEXT: vbroadcastss (%rdi), %xmm0
7698 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
7699 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7700 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7701 ; XOPAVX1-NEXT: retq
7703 ; XOPAVX2-LABEL: insert_dup_elt3_mem_v16i16_i32:
7705 ; XOPAVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
7706 ; XOPAVX2-NEXT: retq
7707 %tmp = load i32, i32* %ptr, align 4
7708 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
7709 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
7710 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7711 ret <16 x i16> %tmp3
7714 define <16 x i16> @insert_dup_mem_v16i16_i64(i64* %ptr) {
7715 ; AVX1-LABEL: insert_dup_mem_v16i16_i64:
7717 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7718 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7719 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7720 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7723 ; AVX2OR512VL-LABEL: insert_dup_mem_v16i16_i64:
7724 ; AVX2OR512VL: # %bb.0:
7725 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %ymm0
7726 ; AVX2OR512VL-NEXT: retq
7728 ; XOPAVX1-LABEL: insert_dup_mem_v16i16_i64:
7730 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7731 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7732 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7733 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7734 ; XOPAVX1-NEXT: retq
7736 ; XOPAVX2-LABEL: insert_dup_mem_v16i16_i64:
7738 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %ymm0
7739 ; XOPAVX2-NEXT: retq
7740 %tmp = load i64, i64* %ptr, align 4
7741 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
7742 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
7743 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer
7744 ret <16 x i16> %tmp3
7747 define <16 x i16> @insert_dup_elt1_mem_v16i16_i64(i64* %ptr) {
7748 ; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i64:
7750 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7751 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
7752 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7753 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7756 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v16i16_i64:
7757 ; AVX2OR512VL: # %bb.0:
7758 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %ymm0
7759 ; AVX2OR512VL-NEXT: retq
7761 ; XOPAVX1-LABEL: insert_dup_elt1_mem_v16i16_i64:
7763 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7764 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
7765 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7766 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7767 ; XOPAVX1-NEXT: retq
7769 ; XOPAVX2-LABEL: insert_dup_elt1_mem_v16i16_i64:
7771 ; XOPAVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
7772 ; XOPAVX2-NEXT: retq
7773 %tmp = load i64, i64* %ptr, align 4
7774 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
7775 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
7776 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
7777 ret <16 x i16> %tmp3
7780 define <16 x i16> @insert_dup_elt3_mem_v16i16_i64(i64* %ptr) {
7781 ; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i64:
7783 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7784 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
7785 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7786 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7789 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v16i16_i64:
7790 ; AVX2OR512VL: # %bb.0:
7791 ; AVX2OR512VL-NEXT: vpbroadcastw 6(%rdi), %ymm0
7792 ; AVX2OR512VL-NEXT: retq
7794 ; XOPAVX1-LABEL: insert_dup_elt3_mem_v16i16_i64:
7796 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7797 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
7798 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7799 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7800 ; XOPAVX1-NEXT: retq
7802 ; XOPAVX2-LABEL: insert_dup_elt3_mem_v16i16_i64:
7804 ; XOPAVX2-NEXT: vpbroadcastw 6(%rdi), %ymm0
7805 ; XOPAVX2-NEXT: retq
7806 %tmp = load i64, i64* %ptr, align 4
7807 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
7808 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
7809 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7810 ret <16 x i16> %tmp3
7813 define <16 x i16> @insert_dup_elt7_mem_v16i16_i64(i64* %ptr) {
7814 ; AVX1-LABEL: insert_dup_elt7_mem_v16i16_i64:
7816 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7817 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
7818 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7821 ; AVX2OR512VL-LABEL: insert_dup_elt7_mem_v16i16_i64:
7822 ; AVX2OR512VL: # %bb.0:
7823 ; AVX2OR512VL-NEXT: vpbroadcastw 6(%rdi), %ymm0
7824 ; AVX2OR512VL-NEXT: retq
7826 ; XOPAVX1-LABEL: insert_dup_elt7_mem_v16i16_i64:
7828 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7829 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
7830 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7831 ; XOPAVX1-NEXT: retq
7833 ; XOPAVX2-LABEL: insert_dup_elt7_mem_v16i16_i64:
7835 ; XOPAVX2-NEXT: vpbroadcastw 6(%rdi), %ymm0
7836 ; XOPAVX2-NEXT: retq
7837 %tmp = load i64, i64* %ptr, align 4
7838 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 1
7839 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
7840 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
7841 ret <16 x i16> %tmp3
7844 define <16 x i16> @insert_dup_mem_v16i16_sext_i16_i64(i16* %ptr) {
7845 ; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
7847 ; AVX1-NEXT: movzwl (%rdi), %eax
7848 ; AVX1-NEXT: vmovd %eax, %xmm0
7849 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7850 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7851 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7854 ; AVX2OR512VL-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
7855 ; AVX2OR512VL: # %bb.0:
7856 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %ymm0
7857 ; AVX2OR512VL-NEXT: retq
7859 ; XOPAVX1-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
7861 ; XOPAVX1-NEXT: movzwl (%rdi), %eax
7862 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
7863 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7864 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7865 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7866 ; XOPAVX1-NEXT: retq
7868 ; XOPAVX2-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
7870 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %ymm0
7871 ; XOPAVX2-NEXT: retq
7872 %tmp = load i16, i16* %ptr, align 2
7873 %tmp1 = sext i16 %tmp to i64
7874 %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %tmp1, i32 0
7875 %tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16>
7876 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer
7877 ret <16 x i16> %tmp4
7880 define <16 x i16> @unpckh_v16i16(<16 x i16> %x, <16 x i16> %y) {
7881 ; AVX1-LABEL: unpckh_v16i16:
7883 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7884 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7887 ; AVX2OR512VL-LABEL: unpckh_v16i16:
7888 ; AVX2OR512VL: # %bb.0:
7889 ; AVX2OR512VL-NEXT: vextracti128 $1, %ymm1, %xmm1
7890 ; AVX2OR512VL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7891 ; AVX2OR512VL-NEXT: retq
7893 ; XOPAVX1-LABEL: unpckh_v16i16:
7895 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7896 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7897 ; XOPAVX1-NEXT: retq
7899 ; XOPAVX2-LABEL: unpckh_v16i16:
7901 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
7902 ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7903 ; XOPAVX2-NEXT: retq
7904 %unpckh = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 4, i32 28, i32 5, i32 29, i32 6, i32 30, i32 7, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
7905 ret <16 x i16> %unpckh
7908 define <16 x i16> @pr43230(<16 x i16> %a, <16 x i16> %b) {
7909 ; AVX1-LABEL: pr43230:
7911 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7912 ; AVX1-NEXT: vpsllw $12, %xmm1, %xmm2
7913 ; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
7914 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
7915 ; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2
7916 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7917 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3
7918 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
7919 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
7920 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
7921 ; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1
7922 ; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
7923 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
7924 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
7925 ; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
7926 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
7927 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7928 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
7931 ; AVX2-LABEL: pr43230:
7933 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
7934 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
7935 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
7936 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
7937 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[26,27],zero,zero
7940 ; AVX512VL-LABEL: pr43230:
7941 ; AVX512VL: # %bb.0:
7942 ; AVX512VL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
7943 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
7944 ; AVX512VL-NEXT: retq
7946 ; XOPAVX1-LABEL: pr43230:
7948 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7949 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
7950 ; XOPAVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
7951 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7952 ; XOPAVX1-NEXT: vpshlw %xmm1, %xmm0, %xmm0
7953 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7954 ; XOPAVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
7955 ; XOPAVX1-NEXT: retq
7957 ; XOPAVX2-LABEL: pr43230:
7959 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
7960 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
7961 ; XOPAVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
7962 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
7963 ; XOPAVX2-NEXT: vpshlw %xmm1, %xmm0, %xmm0
7964 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
7965 ; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
7966 ; XOPAVX2-NEXT: retq
7967 %shr = lshr <16 x i16> %a, %b
7968 %shuf = shufflevector <16 x i16> zeroinitializer, <16 x i16> %shr, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 30, i32 15>
7969 ret <16 x i16> %shuf