1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX1OR2,AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX1OR2,AVX2OR512VL,AVX2,AVX2-SLOW
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX1OR2,AVX2OR512VL,AVX2,AVX2-FAST,AVX2-FAST-ALL
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX1OR2,AVX2OR512VL,AVX2,AVX2-FAST,AVX2-FAST-PERLANE
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VL-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VL-FAST,AVX512VL-FAST-CROSSLANE
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VL-FAST,AVX512VL-FAST-PERLANE
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX1
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX2
12 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
13 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
15 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
16 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
17 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
20 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
21 ; AVX2OR512VL: # %bb.0:
22 ; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %ymm0
23 ; AVX2OR512VL-NEXT: retq
25 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
27 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
28 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
29 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
32 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
34 ; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0
36 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
37 ret <16 x i16> %shuffle
40 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
41 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
43 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
44 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
45 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
46 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
47 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
50 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
52 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
53 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
54 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
55 ; AVX2-SLOW-NEXT: retq
57 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
58 ; AVX2-FAST-ALL: # %bb.0:
59 ; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
60 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,1]
61 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
62 ; AVX2-FAST-ALL-NEXT: retq
64 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
65 ; AVX2-FAST-PERLANE: # %bb.0:
66 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
67 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
68 ; AVX2-FAST-PERLANE-NEXT: retq
70 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
72 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
73 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
76 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
78 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
79 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
80 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
81 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
82 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
85 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
87 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
88 ; XOPAVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
89 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
91 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
92 ret <16 x i16> %shuffle
95 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
96 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
98 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
99 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
100 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
101 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
102 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
105 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
106 ; AVX2-SLOW: # %bb.0:
107 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
108 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
109 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
110 ; AVX2-SLOW-NEXT: retq
112 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
113 ; AVX2-FAST-ALL: # %bb.0:
114 ; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
115 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
116 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
117 ; AVX2-FAST-ALL-NEXT: retq
119 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
120 ; AVX2-FAST-PERLANE: # %bb.0:
121 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
122 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
123 ; AVX2-FAST-PERLANE-NEXT: retq
125 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
127 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
128 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
129 ; AVX512VL-NEXT: retq
131 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
133 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
134 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
135 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
136 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
137 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
140 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
142 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
143 ; XOPAVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
144 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
146 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
147 ret <16 x i16> %shuffle
150 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
151 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
153 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
154 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
155 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
156 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
157 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
160 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
161 ; AVX2-SLOW: # %bb.0:
162 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
163 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
164 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
165 ; AVX2-SLOW-NEXT: retq
167 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
168 ; AVX2-FAST-ALL: # %bb.0:
169 ; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
170 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
171 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
172 ; AVX2-FAST-ALL-NEXT: retq
174 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
175 ; AVX2-FAST-PERLANE: # %bb.0:
176 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
177 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
178 ; AVX2-FAST-PERLANE-NEXT: retq
180 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
182 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
183 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
184 ; AVX512VL-NEXT: retq
186 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
188 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
189 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
190 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
191 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
192 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
195 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
197 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
198 ; XOPAVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
199 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
201 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
202 ret <16 x i16> %shuffle
205 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
206 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
208 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
209 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
210 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
211 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
214 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
216 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9]
217 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
220 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
222 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
223 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
224 ; AVX512VL-NEXT: retq
226 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
228 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
229 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
230 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
231 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
234 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
236 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9]
237 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
239 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
240 ret <16 x i16> %shuffle
243 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
244 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
246 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
247 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
248 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
249 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
252 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
254 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1]
255 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
258 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
260 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
261 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
262 ; AVX512VL-NEXT: retq
264 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
266 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
267 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
268 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
269 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
272 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
274 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1]
275 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
277 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
278 ret <16 x i16> %shuffle
281 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
282 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
284 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
285 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
286 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
287 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
290 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
292 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1]
293 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
296 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
298 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
299 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
300 ; AVX512VL-NEXT: retq
302 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
304 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
305 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
306 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
307 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
310 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
312 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1]
313 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
315 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
316 ret <16 x i16> %shuffle
319 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
320 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
322 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
323 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
324 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
325 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
328 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
330 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
331 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
334 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
336 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
337 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
338 ; AVX512VL-NEXT: retq
340 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
342 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
343 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
344 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
345 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
348 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
350 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
351 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
353 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
354 ret <16 x i16> %shuffle
357 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
358 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
360 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
361 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
362 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,1,4,5,6,7]
363 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1]
364 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
365 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
366 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
369 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
371 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
372 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
375 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
377 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,8]
378 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
379 ; AVX512VL-NEXT: retq
381 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
383 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
384 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1],xmm1[0,1]
385 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
386 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
387 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
390 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
392 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
393 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
395 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
396 ret <16 x i16> %shuffle
399 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
400 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
402 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
403 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
404 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,3,0,4,5,6,7]
405 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1]
406 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
407 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
408 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
411 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
413 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
414 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
417 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
419 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,9,0]
420 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
421 ; AVX512VL-NEXT: retq
423 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
425 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
426 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1],xmm1[2,3],xmm0[0,1]
427 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
428 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
429 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
432 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
434 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
435 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
437 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
438 ret <16 x i16> %shuffle
441 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
442 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
444 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
445 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
446 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
447 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
448 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
449 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
452 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
454 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
455 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
458 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
460 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,10,0,0]
461 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
462 ; AVX512VL-NEXT: retq
464 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
466 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
467 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1],xmm1[4,5],xmm0[0,1,0,1]
468 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
469 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
470 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
473 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
475 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
476 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
478 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
479 ret <16 x i16> %shuffle
482 define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
483 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
485 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
486 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
487 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
488 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
489 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
490 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
493 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
495 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
496 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
499 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
501 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,11,0,0,0]
502 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
503 ; AVX512VL-NEXT: retq
505 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
507 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
508 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1],xmm1[6,7],xmm0[0,1,0,1,0,1]
509 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
510 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
511 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
514 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
516 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
517 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
519 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
520 ret <16 x i16> %shuffle
523 define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
524 ; AVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
526 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
527 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
528 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
529 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
530 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
531 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
534 ; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
536 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
537 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
540 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
542 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,12,0,0,0,0]
543 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
544 ; AVX512VL-NEXT: retq
546 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
548 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
549 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1],xmm1[8,9],xmm0[0,1,0,1,0,1,0,1]
550 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
551 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
552 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
555 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
557 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
558 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
560 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
561 ret <16 x i16> %shuffle
564 define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
565 ; AVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
567 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
568 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
569 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
570 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
571 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
572 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
575 ; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
577 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
578 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
581 ; AVX512VL-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
583 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,13,0,0,0,0,0]
584 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
585 ; AVX512VL-NEXT: retq
587 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
589 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
590 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1,0,1],xmm1[10,11],xmm0[0,1,0,1,0,1,0,1,0,1]
591 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
592 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
593 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
596 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
598 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
599 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
601 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
602 ret <16 x i16> %shuffle
605 define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
606 ; AVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
608 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
609 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
610 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
611 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
612 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
613 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
616 ; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
618 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
619 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
622 ; AVX512VL-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
624 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,14,0,0,0,0,0,0]
625 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
626 ; AVX512VL-NEXT: retq
628 ; XOPAVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
630 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
631 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1],xmm1[12,13],xmm0[0,1,0,1,0,1,0,1,0,1,0,1]
632 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
633 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
634 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
637 ; XOPAVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
639 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
640 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
642 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
643 ret <16 x i16> %shuffle
646 define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
647 ; AVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
649 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
650 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
651 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
652 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
653 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
654 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
657 ; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
659 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
660 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,8,9,8,9,8,9,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
663 ; AVX512VL-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
665 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0]
666 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
667 ; AVX512VL-NEXT: retq
669 ; XOPAVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
671 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
672 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[14,15],xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1]
673 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
674 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
675 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
678 ; XOPAVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
680 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
681 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,8,9,8,9,8,9,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
683 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
684 ret <16 x i16> %shuffle
687 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
688 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
690 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
691 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
692 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
693 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
694 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
697 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
698 ; AVX2-SLOW: # %bb.0:
699 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
700 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
701 ; AVX2-SLOW-NEXT: retq
703 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
704 ; AVX2-FAST: # %bb.0:
705 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
706 ; AVX2-FAST-NEXT: retq
708 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
709 ; AVX512VL-SLOW: # %bb.0:
710 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
711 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
712 ; AVX512VL-SLOW-NEXT: retq
714 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
715 ; AVX512VL-FAST: # %bb.0:
716 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
717 ; AVX512VL-FAST-NEXT: retq
719 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
721 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
722 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
723 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
724 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
725 ; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
728 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
730 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
731 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
733 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
734 ret <16 x i16> %shuffle
737 define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
738 ; AVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
740 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,7,7,7]
741 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
742 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
743 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
744 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
747 ; AVX2-SLOW-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
748 ; AVX2-SLOW: # %bb.0:
749 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
750 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
751 ; AVX2-SLOW-NEXT: retq
753 ; AVX2-FAST-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
754 ; AVX2-FAST: # %bb.0:
755 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
756 ; AVX2-FAST-NEXT: retq
758 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
759 ; AVX512VL-SLOW: # %bb.0:
760 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
761 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
762 ; AVX512VL-SLOW-NEXT: retq
764 ; AVX512VL-FAST-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
765 ; AVX512VL-FAST: # %bb.0:
766 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
767 ; AVX512VL-FAST-NEXT: retq
769 ; XOPAVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
771 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,7,7,7]
772 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
773 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
774 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
775 ; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
778 ; XOPAVX2-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
780 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
781 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
783 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
784 ret <16 x i16> %shuffle
787 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
788 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
790 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
791 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
792 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
793 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
794 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
795 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
798 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
799 ; AVX2-SLOW: # %bb.0:
800 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
801 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
802 ; AVX2-SLOW-NEXT: retq
804 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
805 ; AVX2-FAST: # %bb.0:
806 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
807 ; AVX2-FAST-NEXT: retq
809 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
810 ; AVX512VL-SLOW: # %bb.0:
811 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
812 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
813 ; AVX512VL-SLOW-NEXT: retq
815 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
816 ; AVX512VL-FAST: # %bb.0:
817 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
818 ; AVX512VL-FAST-NEXT: retq
820 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
822 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
823 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
824 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
825 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
826 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
827 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
830 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
832 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
833 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
835 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
836 ret <16 x i16> %shuffle
839 define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
840 ; AVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
842 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
843 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7]
844 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
845 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
846 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
847 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
850 ; AVX2-SLOW-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
851 ; AVX2-SLOW: # %bb.0:
852 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
853 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
854 ; AVX2-SLOW-NEXT: retq
856 ; AVX2-FAST-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
857 ; AVX2-FAST: # %bb.0:
858 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,6,7,6,7,6,7,14,15,14,15,14,15,14,15,22,23,22,23,22,23,22,23,30,31,30,31,30,31,30,31]
859 ; AVX2-FAST-NEXT: retq
861 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
862 ; AVX512VL-SLOW: # %bb.0:
863 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
864 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
865 ; AVX512VL-SLOW-NEXT: retq
867 ; AVX512VL-FAST-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
868 ; AVX512VL-FAST: # %bb.0:
869 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,6,7,6,7,6,7,14,15,14,15,14,15,14,15,22,23,22,23,22,23,22,23,30,31,30,31,30,31,30,31]
870 ; AVX512VL-FAST-NEXT: retq
872 ; XOPAVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
874 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
875 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7]
876 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
877 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
878 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
879 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
882 ; XOPAVX2-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
884 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
885 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
887 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15>
888 ret <16 x i16> %shuffle
891 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x i16> %a, <16 x i16> %b) {
892 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
894 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
895 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
896 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
897 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
898 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
899 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
902 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
903 ; AVX2-SLOW: # %bb.0:
904 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
905 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
906 ; AVX2-SLOW-NEXT: retq
908 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
909 ; AVX2-FAST: # %bb.0:
910 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,16,17,16,17,20,21,20,21,24,25,24,25,28,29,28,29]
911 ; AVX2-FAST-NEXT: retq
913 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
914 ; AVX512VL-SLOW: # %bb.0:
915 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
916 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
917 ; AVX512VL-SLOW-NEXT: retq
919 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
920 ; AVX512VL-FAST: # %bb.0:
921 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,16,17,16,17,20,21,20,21,24,25,24,25,28,29,28,29]
922 ; AVX512VL-FAST-NEXT: retq
924 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
926 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
927 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
928 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
929 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
930 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
931 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
934 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
936 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
937 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
939 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
940 ret <16 x i16> %shuffle
943 define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x i16> %a, <16 x i16> %b) {
944 ; AVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
946 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7]
947 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7]
948 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
949 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7]
950 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
951 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
954 ; AVX2-SLOW-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
955 ; AVX2-SLOW: # %bb.0:
956 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
957 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
958 ; AVX2-SLOW-NEXT: retq
960 ; AVX2-FAST-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
961 ; AVX2-FAST: # %bb.0:
962 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15,18,19,18,19,22,23,22,23,26,27,26,27,30,31,30,31]
963 ; AVX2-FAST-NEXT: retq
965 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
966 ; AVX512VL-SLOW: # %bb.0:
967 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
968 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
969 ; AVX512VL-SLOW-NEXT: retq
971 ; AVX512VL-FAST-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
972 ; AVX512VL-FAST: # %bb.0:
973 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15,18,19,18,19,22,23,22,23,26,27,26,27,30,31,30,31]
974 ; AVX512VL-FAST-NEXT: retq
976 ; XOPAVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
978 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7]
979 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7]
980 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
981 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7]
982 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
983 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
986 ; XOPAVX2-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
988 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
989 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
991 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
992 ret <16 x i16> %shuffle
995 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
996 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
998 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
999 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
1000 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1003 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
1004 ; AVX2-SLOW: # %bb.0:
1005 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
1006 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
1007 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1008 ; AVX2-SLOW-NEXT: retq
1010 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
1011 ; AVX2-FAST-ALL: # %bb.0:
1012 ; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
1013 ; AVX2-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,1,0,0,0,1]
1014 ; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
1015 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
1016 ; AVX2-FAST-ALL-NEXT: retq
1018 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
1019 ; AVX2-FAST-PERLANE: # %bb.0:
1020 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
1021 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1022 ; AVX2-FAST-PERLANE-NEXT: retq
1024 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
1025 ; AVX512VL: # %bb.0:
1026 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0]
1027 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1028 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1029 ; AVX512VL-NEXT: retq
1031 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
1033 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
1034 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
1035 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1036 ; XOPAVX1-NEXT: retq
1038 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
1040 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
1041 ; XOPAVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
1042 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1043 ; XOPAVX2-NEXT: retq
1044 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
1045 ret <16 x i16> %shuffle
1048 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
1049 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1051 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1052 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1053 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1056 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1057 ; AVX2-SLOW: # %bb.0:
1058 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1059 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1060 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1061 ; AVX2-SLOW-NEXT: retq
1063 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1064 ; AVX2-FAST-ALL: # %bb.0:
1065 ; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1066 ; AVX2-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0]
1067 ; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
1068 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
1069 ; AVX2-FAST-ALL-NEXT: retq
1071 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1072 ; AVX2-FAST-PERLANE: # %bb.0:
1073 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
1074 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1075 ; AVX2-FAST-PERLANE-NEXT: retq
1077 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1078 ; AVX512VL: # %bb.0:
1079 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0]
1080 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1081 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1082 ; AVX512VL-NEXT: retq
1084 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1086 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1087 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1088 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1089 ; XOPAVX1-NEXT: retq
1091 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
1093 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1094 ; XOPAVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1095 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1096 ; XOPAVX2-NEXT: retq
1097 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
1098 ret <16 x i16> %shuffle
1101 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
1102 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1104 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1105 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1106 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1109 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1110 ; AVX2-SLOW: # %bb.0:
1111 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1112 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1113 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1114 ; AVX2-SLOW-NEXT: retq
1116 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1117 ; AVX2-FAST-ALL: # %bb.0:
1118 ; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1119 ; AVX2-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0]
1120 ; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
1121 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
1122 ; AVX2-FAST-ALL-NEXT: retq
1124 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1125 ; AVX2-FAST-PERLANE: # %bb.0:
1126 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
1127 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1128 ; AVX2-FAST-PERLANE-NEXT: retq
1130 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1131 ; AVX512VL: # %bb.0:
1132 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0]
1133 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1134 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1135 ; AVX512VL-NEXT: retq
1137 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1139 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1140 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1141 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1142 ; XOPAVX1-NEXT: retq
1144 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
1146 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1147 ; XOPAVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
1148 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1149 ; XOPAVX2-NEXT: retq
1150 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
1151 ret <16 x i16> %shuffle
1154 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
1155 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
1157 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1158 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1161 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
1163 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1164 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1167 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
1168 ; AVX512VL: # %bb.0:
1169 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0,0,0,0,4,0,0,0,0]
1170 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1171 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1172 ; AVX512VL-NEXT: retq
1174 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
1176 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1177 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1178 ; XOPAVX1-NEXT: retq
1180 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
1182 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1183 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1184 ; XOPAVX2-NEXT: retq
1185 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
1186 ret <16 x i16> %shuffle
1189 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
1190 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
1192 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1193 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1196 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
1198 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1199 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1202 ; AVX512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
1203 ; AVX512VL: # %bb.0:
1204 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0,0,0,5,0,0,0,0,0]
1205 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1206 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1207 ; AVX512VL-NEXT: retq
1209 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
1211 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1212 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1213 ; XOPAVX1-NEXT: retq
1215 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
1217 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1218 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1219 ; XOPAVX2-NEXT: retq
1220 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
1221 ret <16 x i16> %shuffle
1224 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
1225 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
1227 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1228 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1231 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
1233 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1234 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1237 ; AVX512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
1238 ; AVX512VL: # %bb.0:
1239 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
1240 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1241 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1242 ; AVX512VL-NEXT: retq
1244 ; XOPAVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
1246 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1247 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1248 ; XOPAVX1-NEXT: retq
1250 ; XOPAVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
1252 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1253 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1254 ; XOPAVX2-NEXT: retq
1255 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1256 ret <16 x i16> %shuffle
1259 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
1260 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
1262 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1263 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1266 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
1268 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1269 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1272 ; AVX512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
1273 ; AVX512VL: # %bb.0:
1274 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [7,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
1275 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1276 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
1277 ; AVX512VL-NEXT: retq
1279 ; XOPAVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
1281 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1282 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1283 ; XOPAVX1-NEXT: retq
1285 ; XOPAVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
1287 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1288 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1289 ; XOPAVX2-NEXT: retq
1290 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1291 ret <16 x i16> %shuffle
1294 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
1295 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
1297 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
1298 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
1299 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1300 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1303 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
1304 ; AVX2OR512VL: # %bb.0:
1305 ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
1306 ; AVX2OR512VL-NEXT: retq
1308 ; XOPAVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
1310 ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
1311 ; XOPAVX1-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
1312 ; XOPAVX1-NEXT: retq
1314 ; XOPAVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
1316 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
1317 ; XOPAVX2-NEXT: retq
1318 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1319 ret <16 x i16> %shuffle
1322 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
1323 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
1325 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
1326 ; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0
1327 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1328 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
1331 ; AVX2OR512VL-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
1332 ; AVX2OR512VL: # %bb.0:
1333 ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
1334 ; AVX2OR512VL-NEXT: retq
1336 ; XOPAVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
1338 ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
1339 ; XOPAVX1-NEXT: vpcmov %ymm2, %ymm0, %ymm1, %ymm0
1340 ; XOPAVX1-NEXT: retq
1342 ; XOPAVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
1344 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
1345 ; XOPAVX2-NEXT: retq
1346 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
1347 ret <16 x i16> %shuffle
1350 define <16 x i16> @shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
1351 ; ALL-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31:
1353 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1355 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
1356 ret <16 x i16> %shuffle
1359 define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
1360 ; ALL-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
1362 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1364 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15>
1365 ret <16 x i16> %shuffle
1368 define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31(<16 x i16> %a, <16 x i16> %b) {
1369 ; AVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
1371 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,0]
1372 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
1373 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1374 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1377 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
1378 ; AVX2OR512VL: # %bb.0:
1379 ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
1380 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1381 ; AVX2OR512VL-NEXT: retq
1383 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
1385 ; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0
1386 ; XOPAVX1-NEXT: retq
1388 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
1390 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
1391 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1392 ; XOPAVX2-NEXT: retq
1393 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
1394 ret <16 x i16> %shuffle
1397 define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
1398 ; AVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
1400 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [0,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535]
1401 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
1402 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1403 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1406 ; AVX2OR512VL-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
1407 ; AVX2OR512VL: # %bb.0:
1408 ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
1409 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1410 ; AVX2OR512VL-NEXT: retq
1412 ; XOPAVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
1414 ; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0
1415 ; XOPAVX1-NEXT: retq
1417 ; XOPAVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
1419 ; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
1420 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1421 ; XOPAVX2-NEXT: retq
1422 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1423 ret <16 x i16> %shuffle
1426 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
1427 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
1429 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,0,65535,0,65535,0,65535,0,65535]
1430 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
1431 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1432 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1435 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
1437 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
1438 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1441 ; AVX512VL-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
1442 ; AVX512VL: # %bb.0:
1443 ; AVX512VL-NEXT: movw $21930, %ax # imm = 0x55AA
1444 ; AVX512VL-NEXT: kmovd %eax, %k1
1445 ; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1}
1446 ; AVX512VL-NEXT: retq
1448 ; XOPAVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
1450 ; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0
1451 ; XOPAVX1-NEXT: retq
1453 ; XOPAVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
1455 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
1456 ; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1457 ; XOPAVX2-NEXT: retq
1458 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
1459 ret <16 x i16> %shuffle
1462 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
1463 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
1465 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [0,65535,0,65535,0,65535,0,65535,65535,0,65535,0,65535,0,65535,0]
1466 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
1467 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1468 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1471 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
1473 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0]
1474 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1477 ; AVX512VL-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
1478 ; AVX512VL: # %bb.0:
1479 ; AVX512VL-NEXT: movw $-21931, %ax # imm = 0xAA55
1480 ; AVX512VL-NEXT: kmovd %eax, %k1
1481 ; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1}
1482 ; AVX512VL-NEXT: retq
1484 ; XOPAVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
1486 ; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0
1487 ; XOPAVX1-NEXT: retq
1489 ; XOPAVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
1491 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0]
1492 ; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1493 ; XOPAVX2-NEXT: retq
1494 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1495 ret <16 x i16> %shuffle
1498 define <16 x i16> @shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
1499 ; ALL-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31:
1501 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
1503 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
1504 ret <16 x i16> %shuffle
1507 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) {
1508 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
1510 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1511 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1512 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1515 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
1516 ; AVX2OR512VL: # %bb.0:
1517 ; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1518 ; AVX2OR512VL-NEXT: vpbroadcastd %xmm0, %ymm0
1519 ; AVX2OR512VL-NEXT: retq
1521 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
1523 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1524 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1525 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1526 ; XOPAVX1-NEXT: retq
1528 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
1530 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1531 ; XOPAVX2-NEXT: vpbroadcastd %xmm0, %ymm0
1532 ; XOPAVX2-NEXT: retq
1533 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16>
1534 ret <16 x i16> %shuffle
1537 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) {
1538 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
1540 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1541 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1542 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1543 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1544 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1545 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
1548 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
1550 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
1551 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
1554 ; AVX512VL-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
1555 ; AVX512VL: # %bb.0:
1556 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,0,16,0,16,0,16,8,24,8,24,8,24,8,24]
1557 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
1558 ; AVX512VL-NEXT: retq
1560 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
1562 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1563 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1564 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1565 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1566 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1567 ; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
1568 ; XOPAVX1-NEXT: retq
1570 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
1572 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
1573 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
1574 ; XOPAVX2-NEXT: retq
1575 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24>
1576 ret <16 x i16> %shuffle
1579 define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
1580 ; AVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
1582 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1583 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1584 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
1585 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1586 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1587 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1588 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1591 ; AVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
1593 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1594 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1597 ; AVX512VL-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
1598 ; AVX512VL: # %bb.0:
1599 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,20,21,22,23,8,8,8,8,28,29,30,31]
1600 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
1601 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
1602 ; AVX512VL-NEXT: retq
1604 ; XOPAVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
1606 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1607 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1608 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,0,1,0,1,0,1,24,25,26,27,28,29,30,31]
1609 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
1610 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0
1611 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1612 ; XOPAVX1-NEXT: retq
1614 ; XOPAVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
1616 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1617 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1618 ; XOPAVX2-NEXT: retq
1619 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15>
1620 ret <16 x i16> %shuffle
1623 define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12(<16 x i16> %a, <16 x i16> %b) {
1624 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
1626 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1627 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
1628 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1629 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
1630 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1631 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
1632 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
1633 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1634 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1637 ; AVX2-SLOW-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
1638 ; AVX2-SLOW: # %bb.0:
1639 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1640 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
1641 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
1642 ; AVX2-SLOW-NEXT: retq
1644 ; AVX2-FAST-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
1645 ; AVX2-FAST: # %bb.0:
1646 ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1647 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,14,15,12,13,10,11,8,9,22,23,20,21,18,19,16,17,30,31,28,29,26,27,24,25]
1648 ; AVX2-FAST-NEXT: retq
1650 ; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
1651 ; AVX512VL: # %bb.0:
1652 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,23,22,21,20,11,10,9,8,31,30,29,28]
1653 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
1654 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
1655 ; AVX512VL-NEXT: retq
1657 ; XOPAVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
1659 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1660 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1661 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [6,7,4,5,2,3,0,1,30,31,28,29,26,27,24,25]
1662 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
1663 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0
1664 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1665 ; XOPAVX1-NEXT: retq
1667 ; XOPAVX2-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
1669 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1670 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
1671 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
1672 ; XOPAVX2-NEXT: retq
1673 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12>
1674 ret <16 x i16> %shuffle
1677 define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08(<16 x i16> %a, <16 x i16> %b) {
1678 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
1680 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1681 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1682 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1683 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [12,13,8,9,4,5,0,1,14,15,10,11,6,7,2,3]
1684 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1685 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1686 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1687 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1690 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
1692 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
1693 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
1694 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1697 ; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
1698 ; AVX512VL: # %bb.0:
1699 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,19,18,17,16,11,10,9,8,27,26,25,24]
1700 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
1701 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
1702 ; AVX512VL-NEXT: retq
1704 ; XOPAVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
1706 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1707 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1708 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [6,7,4,5,2,3,0,1,22,23,20,21,18,19,16,17]
1709 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
1710 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0
1711 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1712 ; XOPAVX1-NEXT: retq
1714 ; XOPAVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
1716 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
1717 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
1718 ; XOPAVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1719 ; XOPAVX2-NEXT: retq
1720 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8>
1721 ret <16 x i16> %shuffle
1724 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08(<16 x i16> %a, <16 x i16> %b) {
1725 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1727 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,0,4,5,6,7]
1728 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1729 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
1730 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1731 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1734 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1735 ; AVX2-SLOW: # %bb.0:
1736 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,1,0,4,5,6,7,8,8,9,8,12,13,14,15]
1737 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1738 ; AVX2-SLOW-NEXT: retq
1740 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1741 ; AVX2-FAST: # %bb.0:
1742 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
1743 ; AVX2-FAST-NEXT: retq
1745 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1746 ; AVX512VL-SLOW: # %bb.0:
1747 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,1,0,4,5,6,7,8,8,9,8,12,13,14,15]
1748 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1749 ; AVX512VL-SLOW-NEXT: retq
1751 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1752 ; AVX512VL-FAST: # %bb.0:
1753 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
1754 ; AVX512VL-FAST-NEXT: retq
1756 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1758 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,0,4,5,6,7]
1759 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1760 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
1761 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1762 ; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1763 ; XOPAVX1-NEXT: retq
1765 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
1767 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,1,0,4,5,6,7,8,8,9,8,12,13,14,15]
1768 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1769 ; XOPAVX2-NEXT: retq
1770 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8>
1771 ret <16 x i16> %shuffle
1774 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08(<16 x i16> %a, <16 x i16> %b) {
1775 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1777 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,2,4,5,6,7]
1778 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1779 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1780 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1781 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1784 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1785 ; AVX2-SLOW: # %bb.0:
1786 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,2,4,5,6,7,8,8,8,10,12,13,14,15]
1787 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1788 ; AVX2-SLOW-NEXT: retq
1790 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1791 ; AVX2-FAST: # %bb.0:
1792 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
1793 ; AVX2-FAST-NEXT: retq
1795 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1796 ; AVX512VL-SLOW: # %bb.0:
1797 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,2,4,5,6,7,8,8,8,10,12,13,14,15]
1798 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1799 ; AVX512VL-SLOW-NEXT: retq
1801 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1802 ; AVX512VL-FAST: # %bb.0:
1803 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
1804 ; AVX512VL-FAST-NEXT: retq
1806 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1808 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,2,4,5,6,7]
1809 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1810 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
1811 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1812 ; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1813 ; XOPAVX1-NEXT: retq
1815 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
1817 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,2,4,5,6,7,8,8,8,10,12,13,14,15]
1818 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1819 ; XOPAVX2-NEXT: retq
1820 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8>
1821 ret <16 x i16> %shuffle
1824 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1825 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1827 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,3,0,4,5,6,7]
1828 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1829 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1830 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1831 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1834 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1835 ; AVX2-SLOW: # %bb.0:
1836 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,0,4,5,6,7,8,8,11,8,12,13,14,15]
1837 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1838 ; AVX2-SLOW-NEXT: retq
1840 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1841 ; AVX2-FAST: # %bb.0:
1842 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
1843 ; AVX2-FAST-NEXT: retq
1845 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1846 ; AVX512VL-SLOW: # %bb.0:
1847 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,0,4,5,6,7,8,8,11,8,12,13,14,15]
1848 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1849 ; AVX512VL-SLOW-NEXT: retq
1851 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1852 ; AVX512VL-FAST: # %bb.0:
1853 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
1854 ; AVX512VL-FAST-NEXT: retq
1856 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1858 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,3,0,4,5,6,7]
1859 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1860 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
1861 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1862 ; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1863 ; XOPAVX1-NEXT: retq
1865 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
1867 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,0,4,5,6,7,8,8,11,8,12,13,14,15]
1868 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
1869 ; XOPAVX2-NEXT: retq
1870 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8>
1871 ret <16 x i16> %shuffle
1874 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1875 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
1877 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1878 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1879 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1880 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1881 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1884 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
1885 ; AVX2OR512VL: # %bb.0:
1886 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17]
1887 ; AVX2OR512VL-NEXT: retq
1889 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
1891 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1892 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1893 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1894 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1895 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1896 ; XOPAVX1-NEXT: retq
1898 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
1900 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17]
1901 ; XOPAVX2-NEXT: retq
1902 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8>
1903 ret <16 x i16> %shuffle
1906 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1907 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
1909 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1910 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1911 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1912 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1913 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1916 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
1917 ; AVX2OR512VL: # %bb.0:
1918 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17]
1919 ; AVX2OR512VL-NEXT: retq
1921 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
1923 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1924 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1925 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1926 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1927 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1928 ; XOPAVX1-NEXT: retq
1930 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
1932 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17]
1933 ; XOPAVX2-NEXT: retq
1934 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8>
1935 ret <16 x i16> %shuffle
1938 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1939 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
1941 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1942 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1943 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1944 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1945 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1948 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
1949 ; AVX2OR512VL: # %bb.0:
1950 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17]
1951 ; AVX2OR512VL-NEXT: retq
1953 ; XOPAVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
1955 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1956 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1957 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1958 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1959 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1960 ; XOPAVX1-NEXT: retq
1962 ; XOPAVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
1964 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17]
1965 ; XOPAVX2-NEXT: retq
1966 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1967 ret <16 x i16> %shuffle
1970 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1971 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
1973 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1974 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1975 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1976 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1977 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1980 ; AVX2OR512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
1981 ; AVX2OR512VL: # %bb.0:
1982 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
1983 ; AVX2OR512VL-NEXT: retq
1985 ; XOPAVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
1987 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1988 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1989 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1990 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1991 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1992 ; XOPAVX1-NEXT: retq
1994 ; XOPAVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
1996 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
1997 ; XOPAVX2-NEXT: retq
1998 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1999 ret <16 x i16> %shuffle
2002 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
2003 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
2005 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2006 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2007 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2008 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2009 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2012 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
2013 ; AVX2OR512VL: # %bb.0:
2014 ; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
2015 ; AVX2OR512VL-NEXT: retq
2017 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
2019 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2020 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2021 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2022 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2023 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2024 ; XOPAVX1-NEXT: retq
2026 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
2028 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
2029 ; XOPAVX2-NEXT: retq
2030 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
2031 ret <16 x i16> %shuffle
2034 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
2035 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
2037 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2038 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2039 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2040 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2041 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2044 ; AVX2OR512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
2045 ; AVX2OR512VL: # %bb.0:
2046 ; AVX2OR512VL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
2047 ; AVX2OR512VL-NEXT: retq
2049 ; XOPAVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
2051 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2052 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2053 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2054 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2055 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2056 ; XOPAVX1-NEXT: retq
2058 ; XOPAVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
2060 ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
2061 ; XOPAVX2-NEXT: retq
2062 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
2063 ret <16 x i16> %shuffle
2066 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
2067 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
2069 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2070 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2071 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2072 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2073 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2076 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
2078 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3]
2079 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3]
2080 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
2083 ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
2084 ; AVX512VL: # %bb.0:
2085 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,12,28,13,29,14,30,15,31]
2086 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2087 ; AVX512VL-NEXT: retq
2089 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
2091 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2092 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2093 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2094 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2095 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2096 ; XOPAVX1-NEXT: retq
2098 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
2100 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3]
2101 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3]
2102 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
2103 ; XOPAVX2-NEXT: retq
2104 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
2105 ret <16 x i16> %shuffle
2108 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
2109 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
2111 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2112 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2113 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2114 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2115 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2118 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
2120 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3]
2121 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3]
2122 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
2125 ; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
2126 ; AVX512VL: # %bb.0:
2127 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,20,5,21,6,22,7,23,8,24,9,25,10,26,11,27]
2128 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2129 ; AVX512VL-NEXT: retq
2131 ; XOPAVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
2133 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2134 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2135 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2136 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2137 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2138 ; XOPAVX1-NEXT: retq
2140 ; XOPAVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
2142 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3]
2143 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3]
2144 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
2145 ; XOPAVX2-NEXT: retq
2146 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
2147 ret <16 x i16> %shuffle
2150 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
2151 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
2153 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,0,4,5,6,7]
2154 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1]
2155 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2156 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,0,0,4,5,6,7]
2157 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,1]
2158 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2161 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
2162 ; AVX2OR512VL: # %bb.0:
2163 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17]
2164 ; AVX2OR512VL-NEXT: retq
2166 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
2168 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,0,4,5,6,7]
2169 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1]
2170 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2171 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,0,0,4,5,6,7]
2172 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,1]
2173 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2174 ; XOPAVX1-NEXT: retq
2176 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
2178 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17]
2179 ; XOPAVX2-NEXT: retq
2180 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 9, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
2181 ret <16 x i16> %shuffle
2184 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
2185 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
2187 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,2,4,5,6,7]
2188 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0]
2189 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2190 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,0,4,5,6,7]
2191 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
2192 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2195 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
2196 ; AVX2OR512VL: # %bb.0:
2197 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17]
2198 ; AVX2OR512VL-NEXT: retq
2200 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
2202 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,2,4,5,6,7]
2203 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0]
2204 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2205 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,0,4,5,6,7]
2206 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
2207 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2208 ; XOPAVX1-NEXT: retq
2210 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
2212 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17]
2213 ; XOPAVX2-NEXT: retq
2214 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 10, i32 8, i32 8, i32 8, i32 8, i32 8>
2215 ret <16 x i16> %shuffle
2218 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
2219 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
2221 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,3,0,4,5,6,7]
2222 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0]
2223 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2224 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7]
2225 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
2226 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2229 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
2230 ; AVX2OR512VL: # %bb.0:
2231 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17]
2232 ; AVX2OR512VL-NEXT: retq
2234 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
2236 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,3,0,4,5,6,7]
2237 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0]
2238 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2239 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7]
2240 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
2241 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2242 ; XOPAVX1-NEXT: retq
2244 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
2246 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17]
2247 ; XOPAVX2-NEXT: retq
2248 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8, i32 8>
2249 ret <16 x i16> %shuffle
2252 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08(<16 x i16> %a, <16 x i16> %b) {
2253 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
2255 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
2256 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2257 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1]
2258 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2261 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
2262 ; AVX2OR512VL: # %bb.0:
2263 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17]
2264 ; AVX2OR512VL-NEXT: retq
2266 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
2268 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
2269 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2270 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1]
2271 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2272 ; XOPAVX1-NEXT: retq
2274 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
2276 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17]
2277 ; XOPAVX2-NEXT: retq
2278 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8>
2279 ret <16 x i16> %shuffle
2282 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08(<16 x i16> %a, <16 x i16> %b) {
2283 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
2285 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
2286 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2287 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
2288 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2291 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
2292 ; AVX2OR512VL: # %bb.0:
2293 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17]
2294 ; AVX2OR512VL-NEXT: retq
2296 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
2298 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
2299 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2300 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
2301 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2302 ; XOPAVX1-NEXT: retq
2304 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
2306 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17]
2307 ; XOPAVX2-NEXT: retq
2308 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 13, i32 8, i32 8>
2309 ret <16 x i16> %shuffle
2312 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
2313 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
2315 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
2316 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2317 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
2318 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2321 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
2322 ; AVX2OR512VL: # %bb.0:
2323 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17]
2324 ; AVX2OR512VL-NEXT: retq
2326 ; XOPAVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
2328 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
2329 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2330 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
2331 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2332 ; XOPAVX1-NEXT: retq
2334 ; XOPAVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
2336 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17]
2337 ; XOPAVX2-NEXT: retq
2338 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 14, i32 8>
2339 ret <16 x i16> %shuffle
2342 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
2343 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
2345 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2346 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2347 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
2348 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2351 ; AVX2OR512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
2352 ; AVX2OR512VL: # %bb.0:
2353 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31]
2354 ; AVX2OR512VL-NEXT: retq
2356 ; XOPAVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
2358 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2359 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2360 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
2361 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2362 ; XOPAVX1-NEXT: retq
2364 ; XOPAVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
2366 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31]
2367 ; XOPAVX2-NEXT: retq
2368 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 15>
2369 ret <16 x i16> %shuffle
2372 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
2373 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
2375 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
2376 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
2377 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2378 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1]
2379 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2382 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
2383 ; AVX2OR512VL: # %bb.0:
2384 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17]
2385 ; AVX2OR512VL-NEXT: retq
2387 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
2389 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
2390 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
2391 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2392 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1]
2393 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2394 ; XOPAVX1-NEXT: retq
2396 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
2398 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17]
2399 ; XOPAVX2-NEXT: retq
2400 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8>
2401 ret <16 x i16> %shuffle
2404 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
2405 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
2407 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
2408 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2409 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2410 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2411 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2414 ; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
2415 ; AVX2OR512VL: # %bb.0:
2416 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
2417 ; AVX2OR512VL-NEXT: retq
2419 ; XOPAVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
2421 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
2422 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2423 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2424 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2425 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2426 ; XOPAVX1-NEXT: retq
2428 ; XOPAVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
2430 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
2431 ; XOPAVX2-NEXT: retq
2432 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
2433 ret <16 x i16> %shuffle
2436 define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
2437 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
2439 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
2440 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
2441 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2442 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
2443 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2446 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
2447 ; AVX2OR512VL: # %bb.0:
2448 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17]
2449 ; AVX2OR512VL-NEXT: retq
2451 ; XOPAVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
2453 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
2454 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
2455 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2456 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
2457 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2458 ; XOPAVX1-NEXT: retq
2460 ; XOPAVX2-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
2462 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17]
2463 ; XOPAVX2-NEXT: retq
2464 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 14, i32 8>
2465 ret <16 x i16> %shuffle
2468 define <16 x i16> @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
2469 ; AVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
2471 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
2472 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2473 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
2474 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2477 ; AVX2OR512VL-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
2478 ; AVX2OR512VL: # %bb.0:
2479 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31]
2480 ; AVX2OR512VL-NEXT: retq
2482 ; XOPAVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
2484 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
2485 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2486 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
2487 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2488 ; XOPAVX1-NEXT: retq
2490 ; XOPAVX2-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
2492 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31]
2493 ; XOPAVX2-NEXT: retq
2494 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 15>
2495 ret <16 x i16> %shuffle
2498 define <16 x i16> @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
2499 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
2501 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7]
2502 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
2503 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2504 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1]
2505 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2508 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
2509 ; AVX2OR512VL: # %bb.0:
2510 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17]
2511 ; AVX2OR512VL-NEXT: retq
2513 ; XOPAVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
2515 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7]
2516 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
2517 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2518 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1]
2519 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2520 ; XOPAVX1-NEXT: retq
2522 ; XOPAVX2-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
2524 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17]
2525 ; XOPAVX2-NEXT: retq
2526 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 2, i32 4, i32 4, i32 undef, i32 6, i32 14, i32 14, i32 undef, i32 12, i32 10, i32 10, i32 8, i32 8>
2527 ret <16 x i16> %shuffle
2530 define <16 x i16> @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12(<16 x i16> %a, <16 x i16> %b) {
2531 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
2533 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,4,4]
2534 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,2,2]
2535 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2536 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2537 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2538 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2541 ; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
2542 ; AVX2OR512VL: # %bb.0:
2543 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25]
2544 ; AVX2OR512VL-NEXT: retq
2546 ; XOPAVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
2548 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,4,4]
2549 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,2,2]
2550 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2551 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2552 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2553 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2554 ; XOPAVX1-NEXT: retq
2556 ; XOPAVX2-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
2558 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25]
2559 ; XOPAVX2-NEXT: retq
2560 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 12, i32 12>
2561 ret <16 x i16> %shuffle
2564 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
2565 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
2567 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2568 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2569 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2570 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2571 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2574 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
2575 ; AVX2-SLOW: # %bb.0:
2576 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2577 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2578 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2579 ; AVX2-SLOW-NEXT: retq
2581 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
2582 ; AVX2-FAST: # %bb.0:
2583 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2584 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
2585 ; AVX2-FAST-NEXT: retq
2587 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
2588 ; AVX512VL: # %bb.0:
2589 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,16,16,16,16,20,20,20,20]
2590 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2591 ; AVX512VL-NEXT: retq
2593 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
2595 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2596 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2597 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2598 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2599 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2600 ; XOPAVX1-NEXT: retq
2602 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
2604 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2605 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2606 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2607 ; XOPAVX2-NEXT: retq
2608 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
2609 ret <16 x i16> %shuffle
2612 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
2613 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
2615 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2616 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2617 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2618 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2619 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2620 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2623 ; AVX2-SLOW-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
2624 ; AVX2-SLOW: # %bb.0:
2625 ; AVX2-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
2626 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2627 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2628 ; AVX2-SLOW-NEXT: retq
2630 ; AVX2-FAST-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
2631 ; AVX2-FAST: # %bb.0:
2632 ; AVX2-FAST-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
2633 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
2634 ; AVX2-FAST-NEXT: retq
2636 ; AVX512VL-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
2637 ; AVX512VL: # %bb.0:
2638 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20]
2639 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2640 ; AVX512VL-NEXT: retq
2642 ; XOPAVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
2644 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2645 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2646 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2647 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2648 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2649 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2650 ; XOPAVX1-NEXT: retq
2652 ; XOPAVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
2654 ; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
2655 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2656 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2657 ; XOPAVX2-NEXT: retq
2658 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
2659 ret <16 x i16> %shuffle
2662 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
2663 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
2665 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2666 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2667 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2668 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2669 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2670 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2671 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2674 ; AVX2-SLOW-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
2675 ; AVX2-SLOW: # %bb.0:
2676 ; AVX2-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2677 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2678 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2679 ; AVX2-SLOW-NEXT: retq
2681 ; AVX2-FAST-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
2682 ; AVX2-FAST: # %bb.0:
2683 ; AVX2-FAST-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2684 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
2685 ; AVX2-FAST-NEXT: retq
2687 ; AVX512VL-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
2688 ; AVX512VL: # %bb.0:
2689 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,24,24,24,24,28,28,28,28]
2690 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2691 ; AVX512VL-NEXT: retq
2693 ; XOPAVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
2695 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2696 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2697 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2698 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2699 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2700 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2701 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2702 ; XOPAVX1-NEXT: retq
2704 ; XOPAVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
2706 ; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2707 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2708 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2709 ; XOPAVX2-NEXT: retq
2710 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
2711 ret <16 x i16> %shuffle
2714 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
2715 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
2717 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2718 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2719 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2720 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2721 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2722 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2725 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
2726 ; AVX2-SLOW: # %bb.0:
2727 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2728 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2729 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2730 ; AVX2-SLOW-NEXT: retq
2732 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
2733 ; AVX2-FAST: # %bb.0:
2734 ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2735 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
2736 ; AVX2-FAST-NEXT: retq
2738 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
2739 ; AVX512VL: # %bb.0:
2740 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,24,24,24,24,28,28,28,28]
2741 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2742 ; AVX512VL-NEXT: retq
2744 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
2746 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2747 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2748 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2749 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2750 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
2751 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2752 ; XOPAVX1-NEXT: retq
2754 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
2756 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2757 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2758 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
2759 ; XOPAVX2-NEXT: retq
2760 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
2761 ret <16 x i16> %shuffle
2764 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i16> %a, <16 x i16> %b) {
2765 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
2767 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2768 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2769 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2772 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
2774 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2775 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2776 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
2779 ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
2780 ; AVX512VL: # %bb.0:
2781 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23]
2782 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
2783 ; AVX512VL-NEXT: retq
2785 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
2787 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2788 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2789 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2790 ; XOPAVX1-NEXT: retq
2792 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
2794 ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2795 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2796 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
2797 ; XOPAVX2-NEXT: retq
2798 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
2799 ret <16 x i16> %shuffle
2802 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
2803 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
2805 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
2806 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2807 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
2808 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2811 ; AVX2OR512VL-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
2812 ; AVX2OR512VL: # %bb.0:
2813 ; AVX2OR512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
2814 ; AVX2OR512VL-NEXT: retq
2816 ; XOPAVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
2818 ; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
2819 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2820 ; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
2821 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2822 ; XOPAVX1-NEXT: retq
2824 ; XOPAVX2-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
2826 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
2827 ; XOPAVX2-NEXT: retq
2828 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
2829 ret <16 x i16> %shuffle
2832 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz(<16 x i16> %a) {
2833 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
2835 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
2836 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2837 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
2838 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2841 ; AVX2OR512VL-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
2842 ; AVX2OR512VL: # %bb.0:
2843 ; AVX2OR512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
2844 ; AVX2OR512VL-NEXT: retq
2846 ; XOPAVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
2848 ; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
2849 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2850 ; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
2851 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2852 ; XOPAVX1-NEXT: retq
2854 ; XOPAVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
2856 ; XOPAVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
2857 ; XOPAVX2-NEXT: retq
2858 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
2859 ret <16 x i16> %shuffle
2862 define <16 x i16> @shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13(<16 x i16> %a) {
2863 ; AVX1-LABEL: shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13:
2865 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2866 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,14,15,2,3,4,5,14,15,0,1,8,9,10,11]
2867 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2868 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2869 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2872 ; AVX2OR512VL-LABEL: shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13:
2873 ; AVX2OR512VL: # %bb.0:
2874 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,14,15,2,3,4,5,14,15,0,1,8,9,10,11,28,29,30,31,18,19,20,21,30,31,16,17,24,25,26,27]
2875 ; AVX2OR512VL-NEXT: retq
2877 ; XOPAVX1-LABEL: shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13:
2879 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2880 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,14,15,2,3,4,5,14,15,0,1,8,9,10,11]
2881 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2882 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2883 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2884 ; XOPAVX1-NEXT: retq
2886 ; XOPAVX2-LABEL: shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13:
2888 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,14,15,2,3,4,5,14,15,0,1,8,9,10,11,28,29,30,31,18,19,20,21,30,31,16,17,24,25,26,27]
2889 ; XOPAVX2-NEXT: retq
2890 %1 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 6, i32 7, i32 1, i32 2, i32 7, i32 0, i32 4, i32 5, i32 14, i32 15, i32 9, i32 10, i32 15, i32 8, i32 12, i32 13>
2895 ; Shuffle to logical bit shifts
2898 define <16 x i16> @shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i16> %a) {
2899 ; AVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
2901 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm1
2902 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2903 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm0
2904 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2907 ; AVX2OR512VL-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
2908 ; AVX2OR512VL: # %bb.0:
2909 ; AVX2OR512VL-NEXT: vpslld $16, %ymm0, %ymm0
2910 ; AVX2OR512VL-NEXT: retq
2912 ; XOPAVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
2914 ; XOPAVX1-NEXT: vpslld $16, %xmm0, %xmm1
2915 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2916 ; XOPAVX1-NEXT: vpslld $16, %xmm0, %xmm0
2917 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2918 ; XOPAVX1-NEXT: retq
2920 ; XOPAVX2-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
2922 ; XOPAVX2-NEXT: vpslld $16, %ymm0, %ymm0
2923 ; XOPAVX2-NEXT: retq
2924 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
2925 ret <16 x i16> %shuffle
2928 define <16 x i16> @shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i16> %a) {
2929 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
2931 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm1
2932 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2933 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0
2934 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2937 ; AVX2OR512VL-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
2938 ; AVX2OR512VL: # %bb.0:
2939 ; AVX2OR512VL-NEXT: vpsllq $48, %ymm0, %ymm0
2940 ; AVX2OR512VL-NEXT: retq
2942 ; XOPAVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
2944 ; XOPAVX1-NEXT: vpsllq $48, %xmm0, %xmm1
2945 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2946 ; XOPAVX1-NEXT: vpsllq $48, %xmm0, %xmm0
2947 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2948 ; XOPAVX1-NEXT: retq
2950 ; XOPAVX2-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
2952 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
2953 ; XOPAVX2-NEXT: retq
2954 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
2955 ret <16 x i16> %shuffle
2958 define <16 x i16> @shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz(<16 x i16> %a) {
2959 ; AVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
2961 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
2962 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2963 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
2964 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2967 ; AVX2OR512VL-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
2968 ; AVX2OR512VL: # %bb.0:
2969 ; AVX2OR512VL-NEXT: vpsrld $16, %ymm0, %ymm0
2970 ; AVX2OR512VL-NEXT: retq
2972 ; XOPAVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
2974 ; XOPAVX1-NEXT: vpsrld $16, %xmm0, %xmm1
2975 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2976 ; XOPAVX1-NEXT: vpsrld $16, %xmm0, %xmm0
2977 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2978 ; XOPAVX1-NEXT: retq
2980 ; XOPAVX2-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
2982 ; XOPAVX2-NEXT: vpsrld $16, %ymm0, %ymm0
2983 ; XOPAVX2-NEXT: retq
2984 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
2985 ret <16 x i16> %shuffle
2988 define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {
2989 ; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
2991 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
2992 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
2993 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
2996 ; AVX2OR512VL-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
2997 ; AVX2OR512VL: # %bb.0:
2998 ; AVX2OR512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
2999 ; AVX2OR512VL-NEXT: retq
3001 ; XOPAVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
3003 ; XOPAVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
3004 ; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
3005 ; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
3006 ; XOPAVX1-NEXT: retq
3008 ; XOPAVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
3010 ; XOPAVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
3011 ; XOPAVX2-NEXT: retq
3012 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16>
3013 ret <16 x i16> %shuffle
3016 define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) {
3017 ; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
3019 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
3020 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
3021 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
3022 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3025 ; AVX2OR512VL-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
3026 ; AVX2OR512VL: # %bb.0:
3027 ; AVX2OR512VL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3028 ; AVX2OR512VL-NEXT: retq
3030 ; XOPAVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
3032 ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
3033 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
3034 ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
3035 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3036 ; XOPAVX1-NEXT: retq
3038 ; XOPAVX2-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
3040 ; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3041 ; XOPAVX2-NEXT: retq
3042 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0>
3043 ret <16 x i16> %shuffle
3046 define <16 x i16> @shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz(<16 x i16> %a) {
3047 ; AVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
3049 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
3050 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3051 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3052 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3055 ; AVX2OR512VL-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
3056 ; AVX2OR512VL: # %bb.0:
3057 ; AVX2OR512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3058 ; AVX2OR512VL-NEXT: retq
3060 ; XOPAVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
3062 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
3063 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3064 ; XOPAVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3065 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3066 ; XOPAVX1-NEXT: retq
3068 ; XOPAVX2-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
3070 ; XOPAVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3071 ; XOPAVX2-NEXT: retq
3072 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0>
3073 ret <16 x i16> %shuffle
3076 define <16 x i16> @shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz(<16 x i16> %a) {
3077 ; AVX1-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
3079 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3080 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
3081 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
3082 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
3083 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
3084 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3087 ; AVX2-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
3089 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3090 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[10,11],zero,zero,zero,zero,zero,zero,ymm0[28,29],zero,zero,zero,zero,zero,zero,ymm0[30,31],zero,zero,zero,zero,zero,zero
3093 ; AVX512VL-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
3094 ; AVX512VL: # %bb.0:
3095 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [28,1,2,3,29,5,6,7,30,9,10,11,31,13,14,15]
3096 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3097 ; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
3098 ; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
3099 ; AVX512VL-NEXT: retq
3101 ; XOPAVX1-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
3103 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3104 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
3105 ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
3106 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
3107 ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
3108 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3109 ; XOPAVX1-NEXT: retq
3111 ; XOPAVX2-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
3113 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3114 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[10,11],zero,zero,zero,zero,zero,zero,ymm0[28,29],zero,zero,zero,zero,zero,zero,ymm0[30,31],zero,zero,zero,zero,zero,zero
3115 ; XOPAVX2-NEXT: retq
3116 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 28, i32 0, i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 30, i32 0, i32 0, i32 0, i32 31, i32 0, i32 0, i32 0>
3117 ret <16 x i16> %shuffle
3120 define <16 x i16> @shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14(<16 x i16> %a, <16 x i16> %b) {
3121 ; AVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
3123 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3124 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3125 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3126 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3127 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3130 ; AVX2OR512VL-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
3131 ; AVX2OR512VL: # %bb.0:
3132 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
3133 ; AVX2OR512VL-NEXT: retq
3135 ; XOPAVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
3137 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3138 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3139 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3140 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3141 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3142 ; XOPAVX1-NEXT: retq
3144 ; XOPAVX2-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
3146 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
3147 ; XOPAVX2-NEXT: retq
3148 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3149 ret <16 x i16> %shuffle
3152 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24(<16 x i16> %a, <16 x i16> %b) {
3153 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
3155 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3156 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3157 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
3158 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
3159 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3162 ; AVX2OR512VL-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
3163 ; AVX2OR512VL: # %bb.0:
3164 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
3165 ; AVX2OR512VL-NEXT: retq
3167 ; XOPAVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
3169 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3170 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3171 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
3172 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
3173 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3174 ; XOPAVX1-NEXT: retq
3176 ; XOPAVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
3178 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
3179 ; XOPAVX2-NEXT: retq
3180 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24>
3181 ret <16 x i16> %shuffle
3184 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8(<16 x i16> %a, <16 x i16> %b) {
3185 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
3187 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3188 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3189 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
3190 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
3191 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3194 ; AVX2OR512VL-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
3195 ; AVX2OR512VL: # %bb.0:
3196 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
3197 ; AVX2OR512VL-NEXT: retq
3199 ; XOPAVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
3201 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3202 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3203 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
3204 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
3205 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3206 ; XOPAVX1-NEXT: retq
3208 ; XOPAVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
3210 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
3211 ; XOPAVX2-NEXT: retq
3212 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 00, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8>
3213 ret <16 x i16> %shuffle
3216 define <16 x i16> @shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30(<16 x i16> %a, <16 x i16> %b) {
3217 ; AVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
3219 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3220 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3221 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3222 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3223 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3226 ; AVX2OR512VL-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
3227 ; AVX2OR512VL: # %bb.0:
3228 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
3229 ; AVX2OR512VL-NEXT: retq
3231 ; XOPAVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
3233 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3234 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3235 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3236 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3237 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3238 ; XOPAVX1-NEXT: retq
3240 ; XOPAVX2-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
3242 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
3243 ; XOPAVX2-NEXT: retq
3244 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3245 ret <16 x i16> %shuffle
3248 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16(<16 x i16> %a, <16 x i16> %b) {
3249 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
3251 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
3252 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
3253 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3256 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
3258 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3259 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17]
3262 ; AVX512VL-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
3263 ; AVX512VL: # %bb.0:
3264 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,3,4,5,6,7,0,17,18,19,20,21,22,23,16]
3265 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
3266 ; AVX512VL-NEXT: retq
3268 ; XOPAVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
3270 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
3271 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
3272 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3273 ; XOPAVX1-NEXT: retq
3275 ; XOPAVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
3277 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3278 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17]
3279 ; XOPAVX2-NEXT: retq
3280 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16>
3281 ret <16 x i16> %shuffle
3284 define <16 x i16> @shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22(<16 x i16> %a, <16 x i16> %b) {
3285 ; AVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
3287 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3288 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3289 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3292 ; AVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
3294 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3295 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
3298 ; AVX512VL-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
3299 ; AVX512VL: # %bb.0:
3300 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [7,0,1,2,3,4,5,6,23,16,17,18,19,20,21,22]
3301 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
3302 ; AVX512VL-NEXT: retq
3304 ; XOPAVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
3306 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3307 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3308 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3309 ; XOPAVX1-NEXT: retq
3311 ; XOPAVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
3313 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3314 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
3315 ; XOPAVX2-NEXT: retq
3316 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
3317 ret <16 x i16> %shuffle
3320 define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11(<16 x i16> %a, <16 x i16> %b) {
3321 ; AVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
3323 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3324 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3325 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
3326 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
3327 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
3328 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3331 ; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
3333 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3334 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,0,1,2,3,4,5,6,7,4,5,14,15,16,17,18,19,16,17,18,19,20,21,22,23,20,21,22,23]
3337 ; AVX512VL-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
3338 ; AVX512VL: # %bb.0:
3339 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,0,1,2,3,2,11,8,9,8,9,10,11,10,11]
3340 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3341 ; AVX512VL-NEXT: retq
3343 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
3345 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3346 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,2,3,0,1,2,3,4,5,6,7,4,5],xmm1[6,7]
3347 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
3348 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3349 ; XOPAVX1-NEXT: retq
3351 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
3353 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3354 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,0,1,2,3,4,5,6,7,4,5,14,15,16,17,18,19,16,17,18,19,20,21,22,23,20,21,22,23]
3355 ; XOPAVX2-NEXT: retq
3356 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11>
3357 ret <16 x i16> %shuffle
3360 define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09(<16 x i16> %a, <16 x i16> %b) {
3361 ; AVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
3363 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3364 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
3365 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3366 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
3369 ; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
3371 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3372 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7,8],ymm1[9],ymm0[10,11,12,13,14,15]
3373 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
3376 ; AVX512VL-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
3377 ; AVX512VL: # %bb.0:
3378 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [6,7,4,5,2,3,0,9,14,15,12,13,10,11,8,9]
3379 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3380 ; AVX512VL-NEXT: retq
3382 ; XOPAVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
3384 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3385 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
3386 ; XOPAVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3387 ; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
3388 ; XOPAVX1-NEXT: retq
3390 ; XOPAVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
3392 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3393 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7,8],ymm1[9],ymm0[10,11,12,13,14,15]
3394 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
3395 ; XOPAVX2-NEXT: retq
3396 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 9, i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9>
3397 ret <16 x i16> %shuffle
3400 define <16 x i16> @shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27(<16 x i16> %a, <16 x i16> %b) {
3401 ; AVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
3403 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3404 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3405 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
3406 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3407 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,14,15]
3408 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3409 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3412 ; AVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
3414 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
3415 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
3416 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
3417 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
3420 ; AVX512VL-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
3421 ; AVX512VL: # %bb.0:
3422 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,5,6,7,16,17,18,27,12,13,14,15,24,25,26,27]
3423 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
3424 ; AVX512VL-NEXT: retq
3426 ; XOPAVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
3428 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3429 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3430 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
3431 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3432 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,4,5,8,9,14,15]
3433 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3434 ; XOPAVX1-NEXT: retq
3436 ; XOPAVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
3438 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
3439 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
3440 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
3441 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
3442 ; XOPAVX2-NEXT: retq
3443 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 27, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27>
3444 ret <16 x i16> %shuffle
3447 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
3448 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
3450 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3451 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3452 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,1,4,5,6,7]
3453 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
3454 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
3455 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
3456 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3459 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
3461 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3462 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
3465 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
3466 ; AVX512VL: # %bb.0:
3467 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,8]
3468 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3469 ; AVX512VL-NEXT: retq
3471 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
3473 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3474 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1],xmm1[0,1]
3475 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
3476 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
3477 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3478 ; XOPAVX1-NEXT: retq
3480 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
3482 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3483 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
3484 ; XOPAVX2-NEXT: retq
3485 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
3486 ret <16 x i16> %shuffle
3489 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
3490 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3492 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3493 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
3494 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3495 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
3496 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
3497 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
3498 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
3499 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3502 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3503 ; AVX2-SLOW: # %bb.0:
3504 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3505 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm1, %ymm1
3506 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
3507 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
3508 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
3509 ; AVX2-SLOW-NEXT: retq
3511 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3512 ; AVX2-FAST-ALL: # %bb.0:
3513 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u>
3514 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
3515 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,4,5,4,5,4,5,8,9,16,17,16,17,16,17,16,17,20,21,20,21,20,21,20,21]
3516 ; AVX2-FAST-ALL-NEXT: retq
3518 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3519 ; AVX2-FAST-PERLANE: # %bb.0:
3520 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,u,u,16,17,16,17,16,17,16,17,24,25,24,25,24,25,u,u]
3521 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3522 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
3523 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3524 ; AVX2-FAST-PERLANE-NEXT: retq
3526 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3527 ; AVX512VL: # %bb.0:
3528 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,4,4,4,12,8,8,8,8,12,12,12,12]
3529 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3530 ; AVX512VL-NEXT: retq
3532 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3534 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3535 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9],xmm1[8,9]
3536 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
3537 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
3538 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3539 ; XOPAVX1-NEXT: retq
3541 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
3543 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3544 ; XOPAVX2-NEXT: vpsllq $48, %ymm1, %ymm1
3545 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
3546 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
3547 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
3548 ; XOPAVX2-NEXT: retq
3549 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
3550 ret <16 x i16> %shuffle
3553 define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11(<16 x i16> %a, <16 x i16> %b) {
3554 ; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
3556 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3557 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0,0,1,1,2,2,3,3]
3558 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3559 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
3560 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
3561 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3564 ; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
3566 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3567 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
3570 ; AVX512VL-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
3571 ; AVX512VL: # %bb.0:
3572 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <u,0,u,1,u,2,u,11,u,8,u,9,u,10,u,11>
3573 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3574 ; AVX512VL-NEXT: retq
3576 ; XOPAVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
3578 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3579 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0,0,1,1,2,2,3,3]
3580 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,0,1,2,3,2,3,4,5,4,5,6,7],xmm1[6,7]
3581 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3582 ; XOPAVX1-NEXT: retq
3584 ; XOPAVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
3586 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3587 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
3588 ; XOPAVX2-NEXT: retq
3589 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11>
3590 ret <16 x i16> %shuffle
3593 define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15(<16 x i16> %a, <16 x i16> %b) {
3594 ; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
3596 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3597 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4,4,5,5,6,6,7,7]
3598 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3599 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
3600 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
3601 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3604 ; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
3606 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3]
3607 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
3610 ; AVX512VL-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
3611 ; AVX512VL: # %bb.0:
3612 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <u,4,u,5,u,6,u,15,u,12,u,13,u,14,u,15>
3613 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3614 ; AVX512VL-NEXT: retq
3616 ; XOPAVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
3618 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3619 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4,4,5,5,6,6,7,7]
3620 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,8,9,10,11,10,11,12,13,12,13,14,15],xmm1[14,15]
3621 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3622 ; XOPAVX1-NEXT: retq
3624 ; XOPAVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
3626 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3]
3627 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
3628 ; XOPAVX2-NEXT: retq
3629 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15>
3630 ret <16 x i16> %shuffle
3633 define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
3634 ; AVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
3636 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3637 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
3638 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
3639 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
3640 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3641 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
3644 ; AVX2-SLOW-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
3645 ; AVX2-SLOW: # %bb.0:
3646 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3647 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3648 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,1,2,0,4,5,6,7,11,9,10,8,12,13,14,15]
3649 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
3650 ; AVX2-SLOW-NEXT: retq
3652 ; AVX2-FAST-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
3653 ; AVX2-FAST: # %bb.0:
3654 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3655 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3656 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11,22,23,18,19,20,21,16,17,28,29,30,31,24,25,26,27]
3657 ; AVX2-FAST-NEXT: retq
3659 ; AVX512VL-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
3660 ; AVX512VL: # %bb.0:
3661 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,1,2,0,6,7,4,13,11,9,10,8,14,15,12,13]
3662 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3663 ; AVX512VL-NEXT: retq
3665 ; XOPAVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
3667 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3668 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9],xmm1[10,11]
3669 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
3670 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
3671 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3672 ; XOPAVX1-NEXT: retq
3674 ; XOPAVX2-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
3676 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3677 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3678 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,1,2,0,4,5,6,7,11,9,10,8,12,13,14,15]
3679 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
3680 ; XOPAVX2-NEXT: retq
3681 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 13, i32 11, i32 9, i32 10, i32 8, i32 14, i32 15, i32 12, i32 13>
3682 ret <16 x i16> %shuffle
3685 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
3686 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3688 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3689 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
3690 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u]
3691 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
3692 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
3693 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3696 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3697 ; AVX2-SLOW: # %bb.0:
3698 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u,24,25,24,25,24,25,24,25,16,17,16,17,16,17,u,u]
3699 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3700 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3701 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3702 ; AVX2-SLOW-NEXT: retq
3704 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3705 ; AVX2-FAST-ALL: # %bb.0:
3706 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,4,u,6,4,u,u>
3707 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
3708 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,4,5,4,5,4,5,8,9,16,17,16,17,16,17,16,17,20,21,20,21,20,21,20,21]
3709 ; AVX2-FAST-ALL-NEXT: retq
3711 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3712 ; AVX2-FAST-PERLANE: # %bb.0:
3713 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u,24,25,24,25,24,25,24,25,16,17,16,17,16,17,u,u]
3714 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3715 ; AVX2-FAST-PERLANE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3716 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3717 ; AVX2-FAST-PERLANE-NEXT: retq
3719 ; AVX512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3720 ; AVX512VL: # %bb.0:
3721 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,0,0,0,8,12,12,12,12,8,8,8,8]
3722 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3723 ; AVX512VL-NEXT: retq
3725 ; XOPAVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3727 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3728 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1],xmm1[0,1]
3729 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
3730 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3731 ; XOPAVX1-NEXT: retq
3733 ; XOPAVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
3735 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u,24,25,24,25,24,25,24,25,16,17,16,17,16,17,u,u]
3736 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3737 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3738 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3739 ; XOPAVX2-NEXT: retq
3740 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 8, i32 12, i32 12, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8>
3741 ret <16 x i16> %shuffle
3744 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
3745 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
3747 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3748 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
3749 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3750 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
3753 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
3755 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3756 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3757 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
3760 ; AVX512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
3761 ; AVX512VL: # %bb.0:
3762 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,3,0,1,6,7,4,13,10,11,8,9,14,15,12,13]
3763 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3764 ; AVX512VL-NEXT: retq
3766 ; XOPAVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
3768 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3769 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
3770 ; XOPAVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3771 ; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
3772 ; XOPAVX1-NEXT: retq
3774 ; XOPAVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
3776 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3777 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3778 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
3779 ; XOPAVX2-NEXT: retq
3780 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
3781 ret <16 x i16> %shuffle
3784 define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
3785 ; AVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
3787 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3788 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
3789 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
3790 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
3791 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3792 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
3795 ; AVX2-SLOW-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
3796 ; AVX2-SLOW: # %bb.0:
3797 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3798 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3799 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[2,3,0,2,4,5,6,7,10,11,8,10,12,13,14,15]
3800 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
3801 ; AVX2-SLOW-NEXT: retq
3803 ; AVX2-FAST-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
3804 ; AVX2-FAST: # %bb.0:
3805 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3806 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3807 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11,20,21,22,23,16,17,20,21,28,29,30,31,24,25,26,27]
3808 ; AVX2-FAST-NEXT: retq
3810 ; AVX512VL-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
3811 ; AVX512VL: # %bb.0:
3812 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,3,0,2,6,7,4,13,10,11,8,10,14,15,12,13]
3813 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3814 ; AVX512VL-NEXT: retq
3816 ; XOPAVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
3818 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3819 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9],xmm1[10,11]
3820 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
3821 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
3822 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3823 ; XOPAVX1-NEXT: retq
3825 ; XOPAVX2-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
3827 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3828 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
3829 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[2,3,0,2,4,5,6,7,10,11,8,10,12,13,14,15]
3830 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
3831 ; XOPAVX2-NEXT: retq
3832 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 10, i32 14, i32 15, i32 12, i32 13>
3833 ret <16 x i16> %shuffle
3836 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15(<16 x i16> %a, <16 x i16> %b) {
3837 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
3839 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3840 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
3841 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
3842 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
3843 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
3844 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3847 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
3849 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3850 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
3851 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
3854 ; AVX512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
3855 ; AVX512VL: # %bb.0:
3856 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,3,0,1,6,7,4,15,10,11,8,9,14,15,12,15]
3857 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3858 ; AVX512VL-NEXT: retq
3860 ; XOPAVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
3862 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3863 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
3864 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
3865 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
3866 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
3867 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3868 ; XOPAVX1-NEXT: retq
3870 ; XOPAVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
3872 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3873 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
3874 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
3875 ; XOPAVX2-NEXT: retq
3876 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 15, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 15>
3877 ret <16 x i16> %shuffle
3880 define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08(<16 x i16> %a, <16 x i16> %b) {
3881 ; AVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
3883 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3884 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
3885 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
3886 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
3887 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3888 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
3891 ; AVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
3893 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3894 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
3895 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1,30,31,26,27,28,29,24,25,22,23,18,19,20,21,16,17]
3898 ; AVX512VL-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
3899 ; AVX512VL: # %bb.0:
3900 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [7,5,6,4,3,1,2,8,15,13,14,12,11,9,10,8]
3901 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3902 ; AVX512VL-NEXT: retq
3904 ; XOPAVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
3906 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3907 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5],xmm1[0,1]
3908 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
3909 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3910 ; XOPAVX1-NEXT: retq
3912 ; XOPAVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
3914 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
3915 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
3916 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1,30,31,26,27,28,29,24,25,22,23,18,19,20,21,16,17]
3917 ; XOPAVX2-NEXT: retq
3918 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 8, i32 15, i32 13, i32 14, i32 12, i32 11, i32 9, i32 10, i32 8>
3919 ret <16 x i16> %shuffle
3922 define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
3923 ; AVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3925 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3926 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
3927 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u]
3928 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
3929 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
3930 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3933 ; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3934 ; AVX2-SLOW: # %bb.0:
3935 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u,18,19,16,17,26,27,24,25,26,27,24,25,18,19,u,u]
3936 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3937 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3938 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3939 ; AVX2-SLOW-NEXT: retq
3941 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3942 ; AVX2-FAST-ALL: # %bb.0:
3943 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u>
3944 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
3945 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,6,7,4,5,2,3,8,9,18,19,16,17,22,23,20,21,22,23,20,21,18,19,16,17]
3946 ; AVX2-FAST-ALL-NEXT: retq
3948 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3949 ; AVX2-FAST-PERLANE: # %bb.0:
3950 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u,18,19,16,17,26,27,24,25,26,27,24,25,18,19,u,u]
3951 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3952 ; AVX2-FAST-PERLANE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3953 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3954 ; AVX2-FAST-PERLANE-NEXT: retq
3956 ; AVX512VL-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3957 ; AVX512VL: # %bb.0:
3958 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,5,4,5,4,1,8,9,8,13,12,13,12,9,8]
3959 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
3960 ; AVX512VL-NEXT: retq
3962 ; XOPAVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3964 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3965 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3],xmm1[0,1]
3966 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
3967 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3968 ; XOPAVX1-NEXT: retq
3970 ; XOPAVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
3972 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u,18,19,16,17,26,27,24,25,26,27,24,25,18,19,u,u]
3973 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3974 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3975 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3976 ; XOPAVX2-NEXT: retq
3977 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 8, i32 9, i32 8, i32 13, i32 12, i32 13, i32 12, i32 9, i32 8>
3978 ret <16 x i16> %shuffle
3981 define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
3982 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
3984 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3985 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
3986 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u]
3987 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
3988 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
3989 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3992 ; AVX2-SLOW-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
3993 ; AVX2-SLOW: # %bb.0:
3994 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u,26,27,24,25,18,19,16,17,26,27,24,25,18,19,u,u]
3995 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3996 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
3997 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
3998 ; AVX2-SLOW-NEXT: retq
4000 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
4001 ; AVX2-FAST-ALL: # %bb.0:
4002 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,4,u,6,4,u,u>
4003 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4004 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,2,3,0,1,6,7,8,9,18,19,16,17,22,23,20,21,18,19,16,17,22,23,20,21]
4005 ; AVX2-FAST-ALL-NEXT: retq
4007 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
4008 ; AVX2-FAST-PERLANE: # %bb.0:
4009 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u,26,27,24,25,18,19,16,17,26,27,24,25,18,19,u,u]
4010 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4011 ; AVX2-FAST-PERLANE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4012 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4013 ; AVX2-FAST-PERLANE-NEXT: retq
4015 ; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
4016 ; AVX512VL: # %bb.0:
4017 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [5,4,1,0,5,4,1,8,13,12,9,8,13,12,9,8]
4018 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4019 ; AVX512VL-NEXT: retq
4021 ; XOPAVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
4023 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4024 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3],xmm1[0,1]
4025 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
4026 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4027 ; XOPAVX1-NEXT: retq
4029 ; XOPAVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
4031 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u,26,27,24,25,18,19,16,17,26,27,24,25,18,19,u,u]
4032 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4033 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4034 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4035 ; XOPAVX2-NEXT: retq
4036 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 8, i32 13, i32 12, i32 9, i32 8, i32 13, i32 12, i32 9, i32 8>
4037 ret <16 x i16> %shuffle
4040 define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12(<16 x i16> %a, <16 x i16> %b) {
4041 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4043 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4044 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4045 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u]
4046 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4047 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
4048 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4051 ; AVX2-SLOW-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4052 ; AVX2-SLOW: # %bb.0:
4053 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u,26,27,24,25,18,19,16,17,18,19,16,17,26,27,u,u]
4054 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4055 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4056 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4057 ; AVX2-SLOW-NEXT: retq
4059 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4060 ; AVX2-FAST-ALL: # %bb.0:
4061 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
4062 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4063 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,6,7,4,5,2,3,8,9,18,19,16,17,22,23,20,21,22,23,20,21,18,19,16,17]
4064 ; AVX2-FAST-ALL-NEXT: retq
4066 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4067 ; AVX2-FAST-PERLANE: # %bb.0:
4068 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u,26,27,24,25,18,19,16,17,18,19,16,17,26,27,u,u]
4069 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4070 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4071 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4072 ; AVX2-FAST-PERLANE-NEXT: retq
4074 ; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4075 ; AVX512VL: # %bb.0:
4076 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [5,4,1,0,1,0,5,12,13,12,9,8,9,8,13,12]
4077 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4078 ; AVX512VL-NEXT: retq
4080 ; XOPAVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4082 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4083 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11],xmm1[8,9]
4084 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
4085 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4086 ; XOPAVX1-NEXT: retq
4088 ; XOPAVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
4090 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u,26,27,24,25,18,19,16,17,18,19,16,17,26,27,u,u]
4091 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4092 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4093 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4094 ; XOPAVX2-NEXT: retq
4095 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 12, i32 13, i32 12, i32 9, i32 8, i32 9, i32 8, i32 13, i32 12>
4096 ret <16 x i16> %shuffle
4099 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08(<16 x i16> %a, <16 x i16> %b) {
4100 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4102 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4103 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
4104 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u]
4105 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4106 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
4107 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4110 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4111 ; AVX2-SLOW: # %bb.0:
4112 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u,16,17,24,25,24,25,16,17,16,17,24,25,24,25,u,u]
4113 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4114 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4115 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4116 ; AVX2-SLOW-NEXT: retq
4118 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4119 ; AVX2-FAST-ALL: # %bb.0:
4120 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u>
4121 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4122 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,4,5,4,5,8,9,16,17,20,21,20,21,16,17,16,17,20,21,20,21,16,17]
4123 ; AVX2-FAST-ALL-NEXT: retq
4125 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4126 ; AVX2-FAST-PERLANE: # %bb.0:
4127 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u,16,17,24,25,24,25,16,17,16,17,24,25,24,25,u,u]
4128 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4129 ; AVX2-FAST-PERLANE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4130 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4131 ; AVX2-FAST-PERLANE-NEXT: retq
4133 ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4134 ; AVX512VL: # %bb.0:
4135 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,4,0,0,4,4,8,8,12,12,8,8,12,12,8]
4136 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4137 ; AVX512VL-NEXT: retq
4139 ; XOPAVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4141 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4142 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9],xmm1[0,1]
4143 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
4144 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4145 ; XOPAVX1-NEXT: retq
4147 ; XOPAVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
4149 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u,16,17,24,25,24,25,16,17,16,17,24,25,24,25,u,u]
4150 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4151 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4152 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4153 ; XOPAVX2-NEXT: retq
4154 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8>
4155 ret <16 x i16> %shuffle
4158 define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12(<16 x i16> %a, <16 x i16> %b) {
4159 ; AVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4161 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4162 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4163 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u]
4164 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4165 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
4166 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4169 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4170 ; AVX2-SLOW: # %bb.0:
4171 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u,24,25,16,17,16,17,24,25,24,25,16,17,16,17,u,u]
4172 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4173 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4174 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4175 ; AVX2-SLOW-NEXT: retq
4177 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4178 ; AVX2-FAST-ALL: # %bb.0:
4179 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
4180 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4181 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,4,5,4,5,8,9,16,17,20,21,20,21,16,17,16,17,20,21,20,21,16,17]
4182 ; AVX2-FAST-ALL-NEXT: retq
4184 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4185 ; AVX2-FAST-PERLANE: # %bb.0:
4186 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u,24,25,16,17,16,17,24,25,24,25,16,17,16,17,u,u]
4187 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4188 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4189 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4190 ; AVX2-FAST-PERLANE-NEXT: retq
4192 ; AVX512VL-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4193 ; AVX512VL: # %bb.0:
4194 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,0,0,4,4,0,0,12,12,8,8,12,12,8,8,12]
4195 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4196 ; AVX512VL-NEXT: retq
4198 ; XOPAVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4200 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4201 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1],xmm1[8,9]
4202 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
4203 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4204 ; XOPAVX1-NEXT: retq
4206 ; XOPAVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
4208 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u,24,25,16,17,16,17,24,25,24,25,16,17,16,17,u,u]
4209 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4210 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4211 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4212 ; XOPAVX2-NEXT: retq
4213 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12>
4214 ret <16 x i16> %shuffle
4217 define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
4218 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
4220 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4221 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
4222 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
4223 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
4224 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4225 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
4228 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
4230 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4231 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
4232 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7,20,21,28,29,24,25,16,17,26,27,18,19,30,31,22,23]
4235 ; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
4236 ; AVX512VL: # %bb.0:
4237 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,6,4,0,5,1,7,11,10,14,12,8,13,9,15,11]
4238 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4239 ; AVX512VL-NEXT: retq
4241 ; XOPAVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
4243 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4244 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15],xmm1[6,7]
4245 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
4246 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4247 ; XOPAVX1-NEXT: retq
4249 ; XOPAVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
4251 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4252 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
4253 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7,20,21,28,29,24,25,16,17,26,27,18,19,30,31,22,23]
4254 ; XOPAVX2-NEXT: retq
4255 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 11, i32 10, i32 14, i32 12, i32 8, i32 13, i32 9, i32 15, i32 11>
4256 ret <16 x i16> %shuffle
4259 define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
4260 ; AVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
4262 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4263 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
4264 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
4265 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
4266 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4267 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
4270 ; AVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
4272 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4273 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
4274 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7,20,21,16,17,28,29,24,25,26,27,18,19,30,31,22,23]
4277 ; AVX512VL-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
4278 ; AVX512VL: # %bb.0:
4279 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,0,6,4,5,1,7,11,10,8,14,12,13,9,15,11]
4280 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4281 ; AVX512VL-NEXT: retq
4283 ; XOPAVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
4285 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4286 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15],xmm1[6,7]
4287 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
4288 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4289 ; XOPAVX1-NEXT: retq
4291 ; XOPAVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
4293 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4294 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
4295 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7,20,21,16,17,28,29,24,25,26,27,18,19,30,31,22,23]
4296 ; XOPAVX2-NEXT: retq
4297 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 11, i32 10, i32 8, i32 14, i32 12, i32 13, i32 9, i32 15, i32 11>
4298 ret <16 x i16> %shuffle
4301 define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13(<16 x i16> %a, <16 x i16> %b) {
4302 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
4304 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4305 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
4306 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
4307 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
4308 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4309 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
4312 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
4314 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4315 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
4316 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11,20,21,28,29,24,25,16,17,18,19,22,23,30,31,26,27]
4319 ; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
4320 ; AVX512VL: # %bb.0:
4321 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,6,4,0,1,3,7,13,10,14,12,8,9,11,15,13]
4322 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4323 ; AVX512VL-NEXT: retq
4325 ; XOPAVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
4327 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4328 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15],xmm1[10,11]
4329 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
4330 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4331 ; XOPAVX1-NEXT: retq
4333 ; XOPAVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
4335 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4336 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
4337 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11,20,21,28,29,24,25,16,17,18,19,22,23,30,31,26,27]
4338 ; XOPAVX2-NEXT: retq
4339 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 13, i32 10, i32 14, i32 12, i32 8, i32 9, i32 11, i32 15, i32 13>
4340 ret <16 x i16> %shuffle
4343 define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11(<16 x i16> %a, <16 x i16> %b) {
4344 ; AVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4346 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4347 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
4348 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
4349 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
4350 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4351 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
4354 ; AVX2-SLOW-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4355 ; AVX2-SLOW: # %bb.0:
4356 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7]
4357 ; AVX2-SLOW-NEXT: vpermd %ymm0, %ymm1, %ymm0
4358 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23]
4359 ; AVX2-SLOW-NEXT: retq
4361 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4362 ; AVX2-FAST-ALL: # %bb.0:
4363 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,0,5,7,6,4,5]
4364 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4365 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,2,3,6,7,10,11,0,1,4,5,14,15,16,17,16,17,18,19,22,23,26,27,16,17,20,21,30,31]
4366 ; AVX2-FAST-ALL-NEXT: retq
4368 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4369 ; AVX2-FAST-PERLANE: # %bb.0:
4370 ; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7]
4371 ; AVX2-FAST-PERLANE-NEXT: vpermd %ymm0, %ymm1, %ymm0
4372 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23]
4373 ; AVX2-FAST-PERLANE-NEXT: retq
4375 ; AVX512VL-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4376 ; AVX512VL: # %bb.0:
4377 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,7,5,1,6,4,11,14,14,15,13,9,14,12,11]
4378 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4379 ; AVX512VL-NEXT: retq
4381 ; XOPAVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4383 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4384 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9],xmm1[6,7]
4385 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
4386 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4387 ; XOPAVX1-NEXT: retq
4389 ; XOPAVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
4391 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7]
4392 ; XOPAVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
4393 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23]
4394 ; XOPAVX2-NEXT: retq
4395 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 11, i32 14, i32 14, i32 15, i32 13, i32 9, i32 14, i32 12, i32 11>
4396 ret <16 x i16> %shuffle
4399 define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
4400 ; AVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4402 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4403 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4404 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u]
4405 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4406 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
4407 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4410 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4411 ; AVX2-SLOW: # %bb.0:
4412 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u,16,17,16,17,24,25,24,25,24,25,24,25,24,25,u,u]
4413 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4414 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4415 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4416 ; AVX2-SLOW-NEXT: retq
4418 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4419 ; AVX2-FAST-ALL: # %bb.0:
4420 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u>
4421 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4422 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,4,5,4,5,4,5,8,9,16,17,16,17,20,21,20,21,20,21,20,21,20,21,20,21]
4423 ; AVX2-FAST-ALL-NEXT: retq
4425 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4426 ; AVX2-FAST-PERLANE: # %bb.0:
4427 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u,16,17,16,17,24,25,24,25,24,25,24,25,24,25,u,u]
4428 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4429 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4430 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4431 ; AVX2-FAST-PERLANE-NEXT: retq
4433 ; AVX512VL-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4434 ; AVX512VL: # %bb.0:
4435 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,4,4,4,4,4,12,8,8,12,12,12,12,12,12]
4436 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4437 ; AVX512VL-NEXT: retq
4439 ; XOPAVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4441 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4442 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9],xmm1[8,9]
4443 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
4444 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4445 ; XOPAVX1-NEXT: retq
4447 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
4449 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u,16,17,16,17,24,25,24,25,24,25,24,25,24,25,u,u]
4450 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4451 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4452 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4453 ; XOPAVX2-NEXT: retq
4454 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
4455 ret <16 x i16> %shuffle
4458 define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
4459 ; AVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4461 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4462 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4463 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u]
4464 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4465 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
4466 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4469 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4470 ; AVX2-SLOW: # %bb.0:
4471 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u,24,25,24,25,16,17,16,17,24,25,24,25,24,25,u,u]
4472 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4473 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4474 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4475 ; AVX2-SLOW-NEXT: retq
4477 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4478 ; AVX2-FAST-ALL: # %bb.0:
4479 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
4480 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4481 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,0,1,0,1,0,1,8,9,16,17,16,17,20,21,20,21,16,17,16,17,16,17,16,17]
4482 ; AVX2-FAST-ALL-NEXT: retq
4484 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4485 ; AVX2-FAST-PERLANE: # %bb.0:
4486 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u,24,25,24,25,16,17,16,17,24,25,24,25,24,25,u,u]
4487 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4488 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4489 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4490 ; AVX2-FAST-PERLANE-NEXT: retq
4492 ; AVX512VL-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4493 ; AVX512VL: # %bb.0:
4494 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,0,0,4,4,4,12,12,12,8,8,12,12,12,12]
4495 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4496 ; AVX512VL-NEXT: retq
4498 ; XOPAVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4500 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4501 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9],xmm1[8,9]
4502 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
4503 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4504 ; XOPAVX1-NEXT: retq
4506 ; XOPAVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
4508 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u,24,25,24,25,16,17,16,17,24,25,24,25,24,25,u,u]
4509 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4510 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4511 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4512 ; XOPAVX2-NEXT: retq
4513 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
4514 ret <16 x i16> %shuffle
4517 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
4518 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4520 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4521 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4522 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u]
4523 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4524 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
4525 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4528 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4529 ; AVX2-SLOW: # %bb.0:
4530 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4531 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4532 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4533 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4534 ; AVX2-SLOW-NEXT: retq
4536 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4537 ; AVX2-FAST-ALL: # %bb.0:
4538 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u>
4539 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4540 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,4,5,4,5,4,5,8,9,16,17,20,21,20,21,16,17,20,21,20,21,20,21,20,21]
4541 ; AVX2-FAST-ALL-NEXT: retq
4543 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4544 ; AVX2-FAST-PERLANE: # %bb.0:
4545 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4546 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4547 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4548 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4549 ; AVX2-FAST-PERLANE-NEXT: retq
4551 ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4552 ; AVX512VL: # %bb.0:
4553 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,4,0,4,4,4,12,8,12,12,8,12,12,12,12]
4554 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4555 ; AVX512VL-NEXT: retq
4557 ; XOPAVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4559 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4560 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9],xmm1[8,9]
4561 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
4562 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4563 ; XOPAVX1-NEXT: retq
4565 ; XOPAVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
4567 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4568 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4569 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4570 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4571 ; XOPAVX2-NEXT: retq
4572 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
4573 ret <16 x i16> %shuffle
4576 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
4577 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4579 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4580 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
4581 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u]
4582 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4583 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
4584 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4587 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4588 ; AVX2-SLOW: # %bb.0:
4589 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u,16,17,24,25,24,25,16,17,16,17,16,17,16,17,u,u]
4590 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4591 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4592 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4593 ; AVX2-SLOW-NEXT: retq
4595 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4596 ; AVX2-FAST-ALL: # %bb.0:
4597 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u>
4598 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4599 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,0,1,0,1,8,9,16,17,20,21,20,21,16,17,16,17,16,17,16,17,16,17]
4600 ; AVX2-FAST-ALL-NEXT: retq
4602 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4603 ; AVX2-FAST-PERLANE: # %bb.0:
4604 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u,16,17,24,25,24,25,16,17,16,17,16,17,16,17,u,u]
4605 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4606 ; AVX2-FAST-PERLANE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4607 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4608 ; AVX2-FAST-PERLANE-NEXT: retq
4610 ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4611 ; AVX512VL: # %bb.0:
4612 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,4,0,0,0,0,8,8,12,12,8,8,8,8,8]
4613 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4614 ; AVX512VL-NEXT: retq
4616 ; XOPAVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4618 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4619 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1],xmm1[0,1]
4620 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
4621 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4622 ; XOPAVX1-NEXT: retq
4624 ; XOPAVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
4626 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u,16,17,24,25,24,25,16,17,16,17,16,17,16,17,u,u]
4627 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4628 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
4629 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4630 ; XOPAVX2-NEXT: retq
4631 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8, i32 8>
4632 ret <16 x i16> %shuffle
4635 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
4636 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4638 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4639 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
4640 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
4641 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
4642 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
4643 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
4644 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4647 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4648 ; AVX2-SLOW: # %bb.0:
4649 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4650 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
4651 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,2,2,0,4,5,6,7,8,10,10,8,12,13,14,15]
4652 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
4653 ; AVX2-SLOW-NEXT: retq
4655 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4656 ; AVX2-FAST-ALL: # %bb.0:
4657 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,3,7,4,6,7,u>
4658 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4659 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,4,5,6,7,8,9,14,15,16,17,20,21,20,21,16,17,20,21,22,23,24,25,26,27]
4660 ; AVX2-FAST-ALL-NEXT: retq
4662 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4663 ; AVX2-FAST-PERLANE: # %bb.0:
4664 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4665 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,u,u,16,17,24,25,24,25,16,17,24,25,26,27,28,29,u,u]
4666 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
4667 ; AVX2-FAST-PERLANE-NEXT: retq
4669 ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4670 ; AVX512VL: # %bb.0:
4671 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,4,0,4,5,6,15,8,12,12,8,12,13,14,15]
4672 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4673 ; AVX512VL-NEXT: retq
4675 ; XOPAVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4677 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4678 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13],xmm1[14,15]
4679 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
4680 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
4681 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4682 ; XOPAVX1-NEXT: retq
4684 ; XOPAVX2-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
4686 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
4687 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
4688 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,2,2,0,4,5,6,7,8,10,10,8,12,13,14,15]
4689 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
4690 ; XOPAVX2-NEXT: retq
4691 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 15, i32 8, i32 12, i32 12, i32 8, i32 12, i32 13, i32 14, i32 15>
4692 ret <16 x i16> %shuffle
4695 define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
4696 ; AVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4698 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4699 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4700 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u]
4701 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4702 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
4703 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4706 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4707 ; AVX2-SLOW: # %bb.0:
4708 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u,16,17,18,19,24,25,24,25,24,25,24,25,24,25,u,u]
4709 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4710 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4711 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4712 ; AVX2-SLOW-NEXT: retq
4714 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4715 ; AVX2-FAST-ALL: # %bb.0:
4716 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u>
4717 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4718 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,4,5,4,5,4,5,4,5,4,5,8,9,16,17,u,u,20,21,20,21,20,21,20,21,20,21,20,21]
4719 ; AVX2-FAST-ALL-NEXT: retq
4721 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4722 ; AVX2-FAST-PERLANE: # %bb.0:
4723 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u,16,17,18,19,24,25,24,25,24,25,24,25,24,25,u,u]
4724 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4725 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4726 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4727 ; AVX2-FAST-PERLANE-NEXT: retq
4729 ; AVX512VL-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4730 ; AVX512VL: # %bb.0:
4731 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,4,4,4,4,4,12,8,u,12,12,12,12,12,12>
4732 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4733 ; AVX512VL-NEXT: retq
4735 ; XOPAVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4737 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4738 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9],xmm1[8,9]
4739 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
4740 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4741 ; XOPAVX1-NEXT: retq
4743 ; XOPAVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
4745 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u,16,17,18,19,24,25,24,25,24,25,24,25,24,25,u,u]
4746 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4747 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4748 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4749 ; XOPAVX2-NEXT: retq
4750 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
4751 ret <16 x i16> %shuffle
4754 define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
4755 ; AVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4757 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4758 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4759 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u]
4760 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4761 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
4762 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4765 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4766 ; AVX2-SLOW: # %bb.0:
4767 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u,24,25,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4768 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4769 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4770 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4771 ; AVX2-SLOW-NEXT: retq
4773 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4774 ; AVX2-FAST-ALL: # %bb.0:
4775 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
4776 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4777 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,u,u,4,5,0,1,0,1,0,1,8,9,16,17,16,17,u,u,20,21,16,17,16,17,16,17,16,17]
4778 ; AVX2-FAST-ALL-NEXT: retq
4780 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4781 ; AVX2-FAST-PERLANE: # %bb.0:
4782 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u,24,25,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4783 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4784 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4785 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4786 ; AVX2-FAST-PERLANE-NEXT: retq
4788 ; AVX512VL-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4789 ; AVX512VL: # %bb.0:
4790 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <4,4,u,0,4,4,4,12,12,12,u,8,12,12,12,12>
4791 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4792 ; AVX512VL-NEXT: retq
4794 ; XOPAVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4796 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4797 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9],xmm1[8,9]
4798 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
4799 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4800 ; XOPAVX1-NEXT: retq
4802 ; XOPAVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
4804 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u,24,25,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4805 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4806 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4807 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4808 ; XOPAVX2-NEXT: retq
4809 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 undef, i32 8, i32 12, i32 12, i32 12, i32 12>
4810 ret <16 x i16> %shuffle
4813 define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
4814 ; AVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4816 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4817 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
4818 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u]
4819 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4820 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
4821 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4824 ; AVX2-SLOW-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4825 ; AVX2-SLOW: # %bb.0:
4826 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4827 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4828 ; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
4829 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4830 ; AVX2-SLOW-NEXT: retq
4832 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4833 ; AVX2-FAST-ALL: # %bb.0:
4834 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
4835 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4836 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,0,1,4,5,0,1,0,1,0,1,8,9,u,u,16,17,16,17,20,21,16,17,16,17,16,17,16,17]
4837 ; AVX2-FAST-ALL-NEXT: retq
4839 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4840 ; AVX2-FAST-PERLANE: # %bb.0:
4841 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4842 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4843 ; AVX2-FAST-PERLANE-NEXT: vpsllq $48, %ymm0, %ymm0
4844 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4845 ; AVX2-FAST-PERLANE-NEXT: retq
4847 ; AVX512VL-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4848 ; AVX512VL: # %bb.0:
4849 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <u,4,4,0,4,4,4,12,u,12,12,8,12,12,12,12>
4850 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4851 ; AVX512VL-NEXT: retq
4853 ; XOPAVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4855 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4856 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9],xmm1[8,9]
4857 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
4858 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4859 ; XOPAVX1-NEXT: retq
4861 ; XOPAVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
4863 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
4864 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4865 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
4866 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
4867 ; XOPAVX2-NEXT: retq
4868 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 undef, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
4869 ret <16 x i16> %shuffle
4872 define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
4873 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
4875 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4876 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
4877 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4878 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4879 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4882 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
4883 ; AVX2OR512VL: # %bb.0:
4884 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
4885 ; AVX2OR512VL-NEXT: retq
4887 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
4889 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4890 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
4891 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4892 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4893 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4894 ; XOPAVX1-NEXT: retq
4896 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
4898 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
4899 ; XOPAVX2-NEXT: retq
4900 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
4901 ret <16 x i16> %shuffle
4904 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
4905 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
4907 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4908 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
4909 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4910 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
4911 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4914 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
4916 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,2]
4917 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,10,11,0,1,2,3,4,5,14,15,16,17,18,19,24,25,26,27,16,17,18,19,20,21,30,31]
4920 ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
4921 ; AVX512VL: # %bb.0:
4922 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,8,9,4,5,6,11,12,13,8,9,12,13,14,11]
4923 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4924 ; AVX512VL-NEXT: retq
4926 ; XOPAVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
4928 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4929 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
4930 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
4931 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
4932 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4933 ; XOPAVX1-NEXT: retq
4935 ; XOPAVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
4937 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,2]
4938 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,10,11,0,1,2,3,4,5,14,15,16,17,18,19,24,25,26,27,16,17,18,19,20,21,30,31]
4939 ; XOPAVX2-NEXT: retq
4940 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11>
4941 ret <16 x i16> %shuffle
4944 define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
4945 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
4947 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4948 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
4949 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4950 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4951 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4954 ; AVX2OR512VL-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
4955 ; AVX2OR512VL: # %bb.0:
4956 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
4957 ; AVX2OR512VL-NEXT: retq
4959 ; XOPAVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
4961 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4962 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
4963 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4964 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4965 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4966 ; XOPAVX1-NEXT: retq
4968 ; XOPAVX2-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
4970 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
4971 ; XOPAVX2-NEXT: retq
4972 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
4973 ret <16 x i16> %shuffle
4976 define <16 x i16> @shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a) {
4977 ; AVX1-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
4979 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7]
4980 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
4981 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7]
4982 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
4985 ; AVX2OR512VL-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
4986 ; AVX2OR512VL: # %bb.0:
4987 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[8,9,u,u,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
4988 ; AVX2OR512VL-NEXT: retq
4990 ; XOPAVX1-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
4992 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7]
4993 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
4994 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7]
4995 ; XOPAVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
4996 ; XOPAVX1-NEXT: retq
4998 ; XOPAVX2-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
5000 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[8,9,u,u,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
5001 ; XOPAVX2-NEXT: retq
5002 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 2, i32 16, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5003 ret <16 x i16> %shuffle
5006 define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
5007 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
5009 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5010 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
5011 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
5012 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
5013 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5014 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5017 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
5019 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
5020 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
5021 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7,16,17,18,19,20,21,30,31,24,25,26,27,28,29,22,23]
5024 ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
5025 ; AVX512VL: # %bb.0:
5026 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,7,4,5,6,11,8,9,10,15,12,13,14,11]
5027 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
5028 ; AVX512VL-NEXT: retq
5030 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
5032 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5033 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13],xmm1[6,7]
5034 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
5035 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
5036 ; XOPAVX1-NEXT: retq
5038 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
5040 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
5041 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
5042 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7,16,17,18,19,20,21,30,31,24,25,26,27,28,29,22,23]
5043 ; XOPAVX2-NEXT: retq
5044 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 11>
5045 ret <16 x i16> %shuffle
5048 define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15(<16 x i16> %a, <16 x i16> %b) {
5049 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
5051 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5052 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,u,u]
5053 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5054 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
5055 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
5058 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
5060 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
5061 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,u,u,24,25,26,27,28,29,22,23,16,17,18,19,20,21,u,u]
5062 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5065 ; AVX512VL-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
5066 ; AVX512VL: # %bb.0:
5067 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,3,0,1,2,15,12,13,14,11,8,9,10,15]
5068 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
5069 ; AVX512VL-NEXT: retq
5071 ; XOPAVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
5073 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5074 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5],xmm1[14,15]
5075 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
5076 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
5077 ; XOPAVX1-NEXT: retq
5079 ; XOPAVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
5081 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
5082 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,u,u,24,25,26,27,28,29,22,23,16,17,18,19,20,21,u,u]
5083 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5084 ; XOPAVX2-NEXT: retq
5085 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 15, i32 12, i32 13, i32 14, i32 11, i32 8, i32 9, i32 10, i32 15>
5086 ret <16 x i16> %shuffle
5089 define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13(<16 x i16> %a, <16 x i16> %b) {
5090 ; AVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5092 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5093 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
5094 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
5095 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
5096 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5097 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5100 ; AVX2-SLOW-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5101 ; AVX2-SLOW: # %bb.0:
5102 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7]
5103 ; AVX2-SLOW-NEXT: vpermd %ymm0, %ymm1, %ymm0
5104 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27]
5105 ; AVX2-SLOW-NEXT: retq
5107 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5108 ; AVX2-FAST-ALL: # %bb.0:
5109 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,0,6,5,7,4,6]
5110 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
5111 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,6,7,10,11,8,9,0,1,6,7,2,3,14,15,18,19,22,23,26,27,24,25,16,17,22,23,18,19,30,31]
5112 ; AVX2-FAST-ALL-NEXT: retq
5114 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5115 ; AVX2-FAST-PERLANE: # %bb.0:
5116 ; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7]
5117 ; AVX2-FAST-PERLANE-NEXT: vpermd %ymm0, %ymm1, %ymm0
5118 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27]
5119 ; AVX2-FAST-PERLANE-NEXT: retq
5121 ; AVX512VL-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5122 ; AVX512VL: # %bb.0:
5123 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,1,0,2,7,3,13,11,15,9,8,10,15,11,13]
5124 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
5125 ; AVX512VL-NEXT: retq
5127 ; XOPAVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5129 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5130 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7],xmm1[10,11]
5131 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
5132 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
5133 ; XOPAVX1-NEXT: retq
5135 ; XOPAVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
5137 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7]
5138 ; XOPAVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
5139 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27]
5140 ; XOPAVX2-NEXT: retq
5141 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 13, i32 11, i32 15, i32 9, i32 8, i32 10, i32 15, i32 11, i32 13>
5142 ret <16 x i16> %shuffle
5145 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
5146 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
5148 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5149 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5150 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
5151 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3],xmm1[4,5,6,7]
5152 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5153 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5156 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
5158 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5159 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
5160 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5161 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
5164 ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
5165 ; AVX512VL: # %bb.0:
5166 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27]
5167 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5168 ; AVX512VL-NEXT: retq
5170 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
5172 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5173 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5174 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
5175 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3],xmm1[4,5,6,7]
5176 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5177 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5178 ; XOPAVX1-NEXT: retq
5180 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
5182 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5183 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
5184 ; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5185 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
5186 ; XOPAVX2-NEXT: retq
5187 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
5188 ret <16 x i16> %shuffle
5191 define <16 x i16> @shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31(<16 x i16> %a, <16 x i16> %b) {
5192 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
5194 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5195 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5196 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
5197 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
5198 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
5199 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,u,u,u,u,u,u,u,u]
5200 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5201 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5204 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
5206 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5207 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,u,u,16,17,24,25,18,19,26,27,20,21,28,29,22,23,u,u]
5208 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5209 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5212 ; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
5213 ; AVX512VL: # %bb.0:
5214 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,20,1,21,2,22,3,31,8,28,9,29,10,30,11,31]
5215 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5216 ; AVX512VL-NEXT: retq
5218 ; XOPAVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
5220 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5221 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5222 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
5223 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
5224 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],xmm3[14,15],xmm1[u,u,u,u,u,u,u,u]
5225 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5226 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5227 ; XOPAVX1-NEXT: retq
5229 ; XOPAVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
5231 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5232 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,u,u,16,17,24,25,18,19,26,27,20,21,28,29,22,23,u,u]
5233 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5234 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5235 ; XOPAVX2-NEXT: retq
5236 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 2, i32 22, i32 3, i32 31, i32 8, i32 28, i32 9, i32 29, i32 10, i32 30, i32 11, i32 31>
5237 ret <16 x i16> %shuffle
5240 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
5241 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
5243 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5244 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5245 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
5246 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
5247 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5248 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5251 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
5253 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
5254 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
5255 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5256 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
5259 ; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
5260 ; AVX512VL: # %bb.0:
5261 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31]
5262 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5263 ; AVX512VL-NEXT: retq
5265 ; XOPAVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
5267 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5268 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5269 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
5270 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
5271 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5272 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5273 ; XOPAVX1-NEXT: retq
5275 ; XOPAVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
5277 ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
5278 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
5279 ; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5280 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
5281 ; XOPAVX2-NEXT: retq
5282 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
5283 ret <16 x i16> %shuffle
5286 define <16 x i16> @shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27(<16 x i16> %a, <16 x i16> %b) {
5287 ; AVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
5289 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5290 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5291 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
5292 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
5293 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
5294 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,14,15]
5295 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5296 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5299 ; AVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
5301 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
5302 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5303 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
5304 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7,24,25,16,17,26,27,18,19,28,29,20,21,30,31,22,23]
5307 ; AVX512VL-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
5308 ; AVX512VL: # %bb.0:
5309 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,16,5,17,6,18,7,27,12,24,13,25,14,26,15,27]
5310 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5311 ; AVX512VL-NEXT: retq
5313 ; XOPAVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
5315 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5316 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5317 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
5318 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
5319 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,2,3,4,5],xmm2[6,7]
5320 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5321 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5322 ; XOPAVX1-NEXT: retq
5324 ; XOPAVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
5326 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
5327 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5328 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
5329 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7,24,25,16,17,26,27,18,19,28,29,20,21,30,31,22,23]
5330 ; XOPAVX2-NEXT: retq
5331 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 16, i32 5, i32 17, i32 6, i32 18, i32 7, i32 27, i32 12, i32 24, i32 13, i32 25, i32 14, i32 26, i32 15, i32 27>
5332 ret <16 x i16> %shuffle
5335 define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
5336 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5338 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5339 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3]
5340 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
5341 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
5342 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
5343 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
5344 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,6,6]
5345 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
5346 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
5347 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
5348 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5349 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5352 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5353 ; AVX2-SLOW: # %bb.0:
5354 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
5355 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm2[7],ymm1[8,9,10,11,12,13,14],ymm2[15]
5356 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,3,2,3,4,7,6,7]
5357 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5358 ; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5359 ; AVX2-SLOW-NEXT: retq
5361 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5362 ; AVX2-FAST-ALL: # %bb.0:
5363 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = <0,3,7,u,4,7,u,u>
5364 ; AVX2-FAST-ALL-NEXT: vpermd %ymm1, %ymm2, %ymm1
5365 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,10,11,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u]
5366 ; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5367 ; AVX2-FAST-ALL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5368 ; AVX2-FAST-ALL-NEXT: retq
5370 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5371 ; AVX2-FAST-PERLANE: # %bb.0:
5372 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
5373 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm2[7],ymm1[8,9,10,11,12,13,14],ymm2[15]
5374 ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,3,2,3,4,7,6,7]
5375 ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5376 ; AVX2-FAST-PERLANE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5377 ; AVX2-FAST-PERLANE-NEXT: retq
5379 ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5380 ; AVX512VL: # %bb.0:
5381 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,6,22,7,31,8,24,9,25,14,30,15,31]
5382 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5383 ; AVX512VL-NEXT: retq
5385 ; XOPAVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5387 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5388 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5389 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm3[0,1],xmm2[0,1],xmm3[2,3],xmm2[2,3],xmm3[12,13],xmm2[12,13],xmm3[14,15],xmm2[14,15]
5390 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0,1,2,3,12,13],xmm2[14,15],xmm1[u,u,u,u,u,u,u,u]
5391 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
5392 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5393 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5394 ; XOPAVX1-NEXT: retq
5396 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
5398 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
5399 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm2[7],ymm1[8,9,10,11,12,13,14],ymm2[15]
5400 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,3,2,3,4,7,6,7]
5401 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5402 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5403 ; XOPAVX2-NEXT: retq
5404 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 6, i32 22, i32 7, i32 31, i32 8, i32 24, i32 9, i32 25, i32 14, i32 30, i32 15, i32 31>
5405 ret <16 x i16> %shuffle
5408 define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25(<16 x i16> %a, <16 x i16> %b) {
5409 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5411 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5412 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3]
5413 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
5414 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
5415 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
5416 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
5417 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
5418 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
5419 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5420 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5423 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5424 ; AVX2-SLOW: # %bb.0:
5425 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
5426 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3,4,5,6,7,8],ymm2[9],ymm1[10,11,12,13,14,15]
5427 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,0,2,3,6,4,6,7]
5428 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5429 ; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5430 ; AVX2-SLOW-NEXT: retq
5432 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5433 ; AVX2-FAST-ALL: # %bb.0:
5434 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = <2,0,4,u,6,4,u,u>
5435 ; AVX2-FAST-ALL-NEXT: vpermd %ymm1, %ymm2, %ymm1
5436 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,10,11,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u]
5437 ; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5438 ; AVX2-FAST-ALL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5439 ; AVX2-FAST-ALL-NEXT: retq
5441 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5442 ; AVX2-FAST-PERLANE: # %bb.0:
5443 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
5444 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3,4,5,6,7,8],ymm2[9],ymm1[10,11,12,13,14,15]
5445 ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,0,2,3,6,4,6,7]
5446 ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5447 ; AVX2-FAST-PERLANE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5448 ; AVX2-FAST-PERLANE-NEXT: retq
5450 ; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5451 ; AVX512VL: # %bb.0:
5452 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,20,1,21,6,16,7,25,8,28,9,29,14,24,15,25]
5453 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5454 ; AVX512VL-NEXT: retq
5456 ; XOPAVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5458 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5459 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5460 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm3[0,1],xmm2[8,9],xmm3[2,3],xmm2[10,11],xmm3[12,13],xmm2[0,1],xmm3[14,15],xmm2[2,3]
5461 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1],xmm2[2,3],xmm1[u,u,u,u,u,u,u,u]
5462 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
5463 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5464 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5465 ; XOPAVX1-NEXT: retq
5467 ; XOPAVX2-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
5469 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
5470 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3,4,5,6,7,8],ymm2[9],ymm1[10,11,12,13,14,15]
5471 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,0,2,3,6,4,6,7]
5472 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
5473 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
5474 ; XOPAVX2-NEXT: retq
5475 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 6, i32 16, i32 7, i32 25, i32 8, i32 28, i32 9, i32 29, i32 14, i32 24, i32 15, i32 25>
5476 ret <16 x i16> %shuffle
5479 define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26(<16 x i16> %a, <16 x i16> %b) {
5480 ; AVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
5482 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5483 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
5484 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,u,u,u,u,u,u,u,u]
5485 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,0,3,2,4,5,6,7]
5486 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
5487 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
5488 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
5489 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
5490 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
5491 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
5494 ; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
5495 ; AVX2-SLOW: # %bb.0:
5496 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
5497 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21]
5498 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
5499 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,5,7]
5500 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
5501 ; AVX2-SLOW-NEXT: retq
5503 ; AVX2-FAST-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
5504 ; AVX2-FAST: # %bb.0:
5505 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,u,u,u,u,6,7,4,5,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21,u,u,u,u]
5506 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
5507 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21]
5508 ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
5509 ; AVX2-FAST-NEXT: retq
5511 ; AVX512VL-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
5512 ; AVX512VL: # %bb.0:
5513 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,17,16,3,2,19,26,9,8,25,24,11,10,27,26]
5514 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5515 ; AVX512VL-NEXT: retq
5517 ; XOPAVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
5519 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5520 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5521 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm3[2,3,0,1],xmm2[2,3,0,1],xmm3[6,7,4,5],xmm2[6,7,4,5]
5522 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[2,3,0,1,6,7],xmm2[4,5],xmm1[u,u,u,u,u,u,u,u]
5523 ; XOPAVX1-NEXT: vprotd $16, %xmm0, %xmm0
5524 ; XOPAVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5525 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5526 ; XOPAVX1-NEXT: retq
5528 ; XOPAVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
5530 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
5531 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21]
5532 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
5533 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,5,7]
5534 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
5535 ; XOPAVX2-NEXT: retq
5536 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26>
5537 ret <16 x i16> %shuffle
5540 define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11(<16 x i16> %a, <16 x i16> %b) {
5541 ; AVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
5543 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5544 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5545 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
5546 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm2[3],xmm0[4,5,6,7]
5547 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
5548 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5551 ; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
5553 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
5554 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
5555 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5556 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
5559 ; AVX512VL-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
5560 ; AVX512VL: # %bb.0:
5561 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27]
5562 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
5563 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
5564 ; AVX512VL-NEXT: retq
5566 ; XOPAVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
5568 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5569 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5570 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
5571 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm2[3],xmm0[4,5,6,7]
5572 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
5573 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5574 ; XOPAVX1-NEXT: retq
5576 ; XOPAVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
5578 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
5579 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
5580 ; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5581 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
5582 ; XOPAVX2-NEXT: retq
5583 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11>
5584 ret <16 x i16> %shuffle
5587 define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15(<16 x i16> %a, <16 x i16> %b) {
5588 ; AVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
5590 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5591 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5592 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
5593 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
5594 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
5595 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5598 ; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
5600 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
5601 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
5602 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
5603 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5606 ; AVX512VL-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
5607 ; AVX512VL: # %bb.0:
5608 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31]
5609 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
5610 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
5611 ; AVX512VL-NEXT: retq
5613 ; XOPAVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
5615 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5616 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5617 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
5618 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
5619 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
5620 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5621 ; XOPAVX1-NEXT: retq
5623 ; XOPAVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
5625 ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
5626 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
5627 ; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
5628 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5629 ; XOPAVX2-NEXT: retq
5630 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
5631 ret <16 x i16> %shuffle
5634 define <16 x i16> @shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31(<16 x i16> %a, <16 x i16> %b) {
5635 ; AVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
5637 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5638 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm2[0,1,2,3,4,6,5,7]
5639 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
5640 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,2,1,3,4,5,6,7]
5641 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
5642 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
5643 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,u,u,u,u,u,u,u,u]
5644 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
5645 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5646 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5649 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
5650 ; AVX2-SLOW: # %bb.0:
5651 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5652 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15]
5653 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15]
5654 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5655 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5656 ; AVX2-SLOW-NEXT: retq
5658 ; AVX2-FAST-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
5659 ; AVX2-FAST: # %bb.0:
5660 ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5661 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,2,3,6,7,8,9,12,13,10,11,u,u,16,17,20,21,18,19,22,23,24,25,28,29,26,27,u,u]
5662 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5663 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5664 ; AVX2-FAST-NEXT: retq
5666 ; AVX512VL-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
5667 ; AVX512VL: # %bb.0:
5668 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,1,3,20,22,21,31,8,10,9,11,28,30,29,31]
5669 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5670 ; AVX512VL-NEXT: retq
5672 ; XOPAVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
5674 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5675 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5676 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm3[0,1,4,5,2,3,6,7],xmm2[8,9,12,13,10,11,14,15]
5677 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11],xmm2[14,15],xmm1[u,u,u,u,u,u,u,u]
5678 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
5679 ; XOPAVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5680 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5681 ; XOPAVX1-NEXT: retq
5683 ; XOPAVX2-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
5685 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5686 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15]
5687 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15]
5688 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
5689 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
5690 ; XOPAVX2-NEXT: retq
5691 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 1, i32 3, i32 20, i32 22, i32 21, i32 31, i32 8, i32 10, i32 9, i32 11, i32 28, i32 30, i32 29, i32 31>
5692 ret <16 x i16> %shuffle
5695 define <16 x i16> @shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
5696 ; AVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
5698 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
5699 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
5700 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7]
5701 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
5702 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
5703 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
5704 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
5705 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
5706 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
5709 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
5710 ; AVX2-SLOW: # %bb.0:
5711 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
5712 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7]
5713 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15]
5714 ; AVX2-SLOW-NEXT: retq
5716 ; AVX2-FAST-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
5717 ; AVX2-FAST: # %bb.0:
5718 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
5719 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15,24,25,24,25,22,23,20,21,24,25,26,27,28,29,30,31]
5720 ; AVX2-FAST-NEXT: retq
5722 ; AVX512VL-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
5723 ; AVX512VL: # %bb.0:
5724 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <4,4,3,18,u,u,u,u,12,12,11,26,u,u,u,u>
5725 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5726 ; AVX512VL-NEXT: retq
5728 ; XOPAVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
5730 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5731 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5732 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8,9,8,9,6,7,20,21,8,9,10,11,12,13,14,15]
5733 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
5734 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm1, %xmm0, %xmm0
5735 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5736 ; XOPAVX1-NEXT: retq
5738 ; XOPAVX2-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
5740 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
5741 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7]
5742 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15]
5743 ; XOPAVX2-NEXT: retq
5744 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 3, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 12, i32 11, i32 26, i32 undef, i32 undef, i32 undef, i32 undef>
5745 ret <16 x i16> %shuffle
5748 define <16 x i16> @shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
5749 ; AVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
5751 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5752 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5753 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
5754 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
5755 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
5756 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
5757 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
5758 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5761 ; AVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
5763 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5764 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19]
5767 ; AVX512VL-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
5768 ; AVX512VL: # %bb.0:
5769 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <0,3,2,21,u,u,u,u,8,11,10,29,u,u,u,u>
5770 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5771 ; AVX512VL-NEXT: retq
5773 ; XOPAVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
5775 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5776 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5777 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,6,7,4,5,26,27,0,1,26,27,0,1,2,3]
5778 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
5779 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm1, %xmm0, %xmm0
5780 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5781 ; XOPAVX1-NEXT: retq
5783 ; XOPAVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
5785 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5786 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19]
5787 ; XOPAVX2-NEXT: retq
5788 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
5789 ret <16 x i16> %shuffle
5792 define <16 x i16> @shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
5793 ; ALL-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
5795 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[2,2,2,2,6,6,6,6]
5797 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
5798 ret <16 x i16> %shuffle
5801 define <16 x i16> @shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
5802 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
5804 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5805 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5806 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
5807 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7]
5808 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
5809 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
5810 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5813 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
5815 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,2,2,2,6,6,6,6]
5816 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
5819 ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
5820 ; AVX512VL: # %bb.0:
5821 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <0,1,2,21,u,u,u,u,8,9,10,29,u,u,u,u>
5822 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5823 ; AVX512VL-NEXT: retq
5825 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
5827 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5828 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5829 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
5830 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7]
5831 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
5832 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
5833 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5834 ; XOPAVX1-NEXT: retq
5836 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
5838 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,2,2,2,6,6,6,6]
5839 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
5840 ; XOPAVX2-NEXT: retq
5841 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
5842 ret <16 x i16> %shuffle
5845 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
5846 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
5848 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5849 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
5850 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
5851 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
5852 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
5853 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5856 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
5858 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
5859 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
5862 ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
5863 ; AVX512VL: # %bb.0:
5864 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,u,u,4,5,6,27,u,u,u,u,12,13,14,27>
5865 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
5866 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
5867 ; AVX512VL-NEXT: retq
5869 ; XOPAVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
5871 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5872 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
5873 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
5874 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
5875 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
5876 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5877 ; XOPAVX1-NEXT: retq
5879 ; XOPAVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
5881 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
5882 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
5883 ; XOPAVX2-NEXT: retq
5884 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11>
5885 ret <16 x i16> %shuffle
5888 define <16 x i16> @shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
5889 ; AVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
5891 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5892 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5893 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
5894 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7]
5895 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
5896 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
5897 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5900 ; AVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
5902 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
5903 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
5906 ; AVX512VL-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
5907 ; AVX512VL: # %bb.0:
5908 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <4,5,6,19,u,u,u,u,12,13,14,27,u,u,u,u>
5909 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
5910 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
5911 ; AVX512VL-NEXT: retq
5913 ; XOPAVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
5915 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5916 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5917 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
5918 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7]
5919 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
5920 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
5921 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5922 ; XOPAVX1-NEXT: retq
5924 ; XOPAVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
5926 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
5927 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
5928 ; XOPAVX2-NEXT: retq
5929 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
5930 ret <16 x i16> %shuffle
5933 define <16 x i16> @shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
5934 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
5936 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5937 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
5938 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
5939 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4,5,6],xmm0[7]
5940 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
5941 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
5942 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
5943 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
5946 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
5948 ; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5949 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
5950 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
5951 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7,16,17,18,19,20,21,26,27,24,25,26,27,28,29,22,23]
5954 ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
5955 ; AVX512VL: # %bb.0:
5956 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,21,20,21,22,11,8,9,10,29,28,29,30,11]
5957 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
5958 ; AVX512VL-NEXT: retq
5960 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
5962 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5963 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5964 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm2 = xmm3[0,1,2,3,4,5],xmm2[10,11,8,9,10,11,12,13],xmm3[6,7]
5965 ; XOPAVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
5966 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
5967 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3,4,5,6],xmm0[7]
5968 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5969 ; XOPAVX1-NEXT: retq
5971 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
5973 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
5974 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
5975 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
5976 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7,16,17,18,19,20,21,26,27,24,25,26,27,28,29,22,23]
5977 ; XOPAVX2-NEXT: retq
5978 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 20, i32 21, i32 22, i32 11, i32 8, i32 9, i32 10, i32 29, i32 28, i32 29, i32 30, i32 11>
5979 ret <16 x i16> %shuffle
5982 define <16 x i16> @shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15(<16 x i16> %a, <16 x i16> %b) {
5983 ; AVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
5985 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5986 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5987 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7]
5988 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
5989 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
5990 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5993 ; AVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
5995 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
5996 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15]
5999 ; AVX512VL-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
6000 ; AVX512VL: # %bb.0:
6001 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,17,2,3,20,21,22,15,8,25,10,11,28,29,30,15]
6002 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
6003 ; AVX512VL-NEXT: retq
6005 ; XOPAVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
6007 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6008 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6009 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7]
6010 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
6011 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
6012 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6013 ; XOPAVX1-NEXT: retq
6015 ; XOPAVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
6017 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
6018 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15]
6019 ; XOPAVX2-NEXT: retq
6020 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 20, i32 21, i32 22, i32 15, i32 8, i32 25, i32 10, i32 11, i32 28, i32 29, i32 30, i32 15>
6021 ret <16 x i16> %shuffle
6024 define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25(<16 x i16> %a, <16 x i16> %b) {
6025 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6027 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
6028 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
6029 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6030 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
6031 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7]
6032 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7]
6033 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
6034 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
6035 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
6036 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6039 ; AVX2-SLOW-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6040 ; AVX2-SLOW: # %bb.0:
6041 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6042 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
6043 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,1,1,4,5,6,7,9,9,9,9,12,13,14,15]
6044 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15]
6045 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
6046 ; AVX2-SLOW-NEXT: retq
6048 ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6049 ; AVX2-FAST-ALL: # %bb.0:
6050 ; AVX2-FAST-ALL-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
6051 ; AVX2-FAST-ALL-NEXT: vpermd %ymm1, %ymm2, %ymm1
6052 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,2,3,2,3,8,9,10,11,14,15,u,u,18,19,18,19,18,19,18,19,24,25,26,27,30,31,u,u]
6053 ; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
6054 ; AVX2-FAST-ALL-NEXT: retq
6056 ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6057 ; AVX2-FAST-PERLANE: # %bb.0:
6058 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,2,3,2,3,8,9,10,11,14,15,u,u,18,19,18,19,18,19,18,19,24,25,26,27,30,31,u,u]
6059 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6060 ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
6061 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
6062 ; AVX2-FAST-PERLANE-NEXT: retq
6064 ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6065 ; AVX512VL: # %bb.0:
6066 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,u,1,u,5,7,25,u,u,u,9,u,13,15,25>
6067 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
6068 ; AVX512VL-NEXT: retq
6070 ; XOPAVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6072 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6073 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
6074 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
6075 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2,3,2,3,2,3,2,3,8,9,10,11,14,15,30,31]
6076 ; XOPAVX1-NEXT: vpperm %xmm3, %xmm1, %xmm2, %xmm2
6077 ; XOPAVX1-NEXT: vpperm %xmm3, %xmm1, %xmm0, %xmm0
6078 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6079 ; XOPAVX1-NEXT: retq
6081 ; XOPAVX2-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
6083 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6084 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
6085 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,1,1,4,5,6,7,9,9,9,9,12,13,14,15]
6086 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15]
6087 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
6088 ; XOPAVX2-NEXT: retq
6089 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 25, i32 undef, i32 undef, i32 undef, i32 9, i32 undef, i32 13, i32 15, i32 25>
6090 ret <16 x i16> %shuffle
6093 define <16 x i16> @shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu(<16 x i16> %a, <16 x i16> %b) {
6094 ; AVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
6096 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6097 ; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [0,1,4,5,8,9,4,5,0,1,4,5,8,9,4,5]
6098 ; AVX1-NEXT: # xmm3 = mem[0,0]
6099 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
6100 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
6101 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
6102 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
6103 ; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
6104 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
6105 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
6106 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6109 ; AVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
6111 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,4,5,u,u,u,u,u,u,u,u,16,17,20,21,24,25,20,21]
6112 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
6113 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
6116 ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
6117 ; AVX512VL: # %bb.0:
6118 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,20,u,0,2,4,u,u,u,28,u,8,10,12,u>
6119 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
6120 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
6121 ; AVX512VL-NEXT: retq
6123 ; XOPAVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
6125 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6126 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6127 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8,9,10,11,8,9,10,11,16,17,20,21,24,25,20,21]
6128 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
6129 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm1, %xmm0, %xmm0
6130 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6131 ; XOPAVX1-NEXT: retq
6133 ; XOPAVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
6135 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,4,5,u,u,u,u,u,u,u,u,16,17,20,21,24,25,20,21]
6136 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
6137 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
6138 ; XOPAVX2-NEXT: retq
6139 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef>
6140 ret <16 x i16> %shuffle
6143 define <16 x i16> @shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
6144 ; AVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
6146 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6147 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6148 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6149 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
6150 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
6151 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6154 ; AVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
6156 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15]
6157 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
6158 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4],ymm1[5,6,7,8,9,10,11],ymm0[12],ymm1[13,14,15]
6159 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6162 ; AVX512VL-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
6163 ; AVX512VL: # %bb.0:
6164 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12]
6165 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
6166 ; AVX512VL-NEXT: retq
6168 ; XOPAVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
6170 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6171 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6172 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6173 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
6174 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
6175 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6176 ; XOPAVX1-NEXT: retq
6178 ; XOPAVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
6180 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15]
6181 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
6182 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4],ymm1[5,6,7,8,9,10,11],ymm0[12],ymm1[13,14,15]
6183 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6184 ; XOPAVX2-NEXT: retq
6185 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 12, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12>
6186 ret <16 x i16> %shuffle
6189 define <16 x i16> @shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
6190 ; AVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
6192 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6193 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6194 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6195 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
6196 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6199 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
6200 ; AVX2OR512VL: # %bb.0:
6201 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25]
6202 ; AVX2OR512VL-NEXT: retq
6204 ; XOPAVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
6206 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6207 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6208 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6209 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
6210 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6211 ; XOPAVX1-NEXT: retq
6213 ; XOPAVX2-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
6215 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25]
6216 ; XOPAVX2-NEXT: retq
6217 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 22, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 30, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
6218 ret <16 x i16> %shuffle
6221 define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
6222 ; AVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
6224 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
6225 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
6226 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6227 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6228 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
6231 ; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
6233 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
6234 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7,8,9,10,11],ymm1[12],ymm0[13,14,15]
6235 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6238 ; AVX512VL-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
6239 ; AVX512VL: # %bb.0:
6240 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [5,6,7,0,1,2,3,12,13,14,15,8,9,10,11,12]
6241 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
6242 ; AVX512VL-NEXT: retq
6244 ; XOPAVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
6246 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
6247 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
6248 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6249 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6250 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
6251 ; XOPAVX1-NEXT: retq
6253 ; XOPAVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
6255 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
6256 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7,8,9,10,11],ymm1[12],ymm0[13,14,15]
6257 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6258 ; XOPAVX2-NEXT: retq
6259 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12>
6260 ret <16 x i16> %shuffle
6263 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
6264 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
6266 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6267 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6268 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6269 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6272 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
6273 ; AVX2OR512VL: # %bb.0:
6274 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6275 ; AVX2OR512VL-NEXT: retq
6277 ; XOPAVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
6279 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6280 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6281 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
6282 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6283 ; XOPAVX1-NEXT: retq
6285 ; XOPAVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
6287 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6288 ; XOPAVX2-NEXT: retq
6289 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
6290 ret <16 x i16> %shuffle
6293 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
6294 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
6296 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
6297 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6298 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
6299 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6302 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
6303 ; AVX2OR512VL: # %bb.0:
6304 ; AVX2OR512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25]
6305 ; AVX2OR512VL-NEXT: retq
6307 ; XOPAVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
6309 ; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
6310 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6311 ; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
6312 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6313 ; XOPAVX1-NEXT: retq
6315 ; XOPAVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
6317 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25]
6318 ; XOPAVX2-NEXT: retq
6319 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
6320 ret <16 x i16> %shuffle
6323 define <16 x i16> @shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10(<16 x i16> %a, <16 x i16> %b) {
6324 ; AVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
6326 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6327 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6328 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6329 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
6330 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,0,1,4,5,10,11]
6331 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
6332 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
6333 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6336 ; AVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
6338 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15]
6339 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
6340 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4,5,6,7,8,9],ymm0[10],ymm1[11,12,13,14,15]
6341 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6344 ; AVX512VL-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
6345 ; AVX512VL: # %bb.0:
6346 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26]
6347 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
6348 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
6349 ; AVX512VL-NEXT: retq
6351 ; XOPAVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
6353 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6354 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6355 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6356 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
6357 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,4,5,10,11]
6358 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6359 ; XOPAVX1-NEXT: retq
6361 ; XOPAVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
6363 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15]
6364 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
6365 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4,5,6,7,8,9],ymm0[10],ymm1[11,12,13,14,15]
6366 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6367 ; XOPAVX2-NEXT: retq
6368 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10>
6369 ret <16 x i16> %shuffle
6372 define <16 x i16> @shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
6373 ; AVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
6375 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6376 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6377 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6378 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
6379 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6382 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
6383 ; AVX2OR512VL: # %bb.0:
6384 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21]
6385 ; AVX2OR512VL-NEXT: retq
6387 ; XOPAVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
6389 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6390 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6391 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6392 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
6393 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6394 ; XOPAVX1-NEXT: retq
6396 ; XOPAVX2-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
6398 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21]
6399 ; XOPAVX2-NEXT: retq
6400 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 20, i32 21, i32 22, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 undef, i32 undef, i32 9, i32 undef>
6401 ret <16 x i16> %shuffle
6404 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10(<16 x i16> %a, <16 x i16> %b) {
6405 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
6407 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
6408 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
6409 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6410 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6411 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
6414 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
6416 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
6417 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
6418 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6421 ; AVX512VL-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
6422 ; AVX512VL: # %bb.0:
6423 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,4,5,6,7,0,1,10,11,12,13,14,15,8,9,10]
6424 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
6425 ; AVX512VL-NEXT: retq
6427 ; XOPAVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
6429 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
6430 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
6431 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6432 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6433 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
6434 ; XOPAVX1-NEXT: retq
6436 ; XOPAVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
6438 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
6439 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
6440 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6441 ; XOPAVX2-NEXT: retq
6442 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10>
6443 ret <16 x i16> %shuffle
6446 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
6447 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
6449 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6450 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6451 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6452 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6455 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
6456 ; AVX2OR512VL: # %bb.0:
6457 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6458 ; AVX2OR512VL-NEXT: retq
6460 ; XOPAVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
6462 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6463 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6464 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
6465 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6466 ; XOPAVX1-NEXT: retq
6468 ; XOPAVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
6470 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6471 ; XOPAVX2-NEXT: retq
6472 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 9, i32 undef>
6473 ret <16 x i16> %shuffle
6476 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
6477 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
6479 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
6480 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6481 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
6482 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6485 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
6486 ; AVX2OR512VL: # %bb.0:
6487 ; AVX2OR512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
6488 ; AVX2OR512VL-NEXT: retq
6490 ; XOPAVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
6492 ; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
6493 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6494 ; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
6495 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
6496 ; XOPAVX1-NEXT: retq
6498 ; XOPAVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
6500 ; XOPAVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
6501 ; XOPAVX2-NEXT: retq
6502 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
6503 ret <16 x i16> %shuffle
6506 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26(<16 x i16> %a, <16 x i16> %b) {
6507 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
6509 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6510 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6511 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6512 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
6513 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,u,u,0,1,4,5,10,11]
6514 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
6515 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7]
6516 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6519 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
6521 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
6522 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6523 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
6524 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6527 ; AVX512VL-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
6528 ; AVX512VL: # %bb.0:
6529 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26]
6530 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
6531 ; AVX512VL-NEXT: retq
6533 ; XOPAVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
6535 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6536 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6537 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6538 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
6539 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,4,5,10,11]
6540 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6541 ; XOPAVX1-NEXT: retq
6543 ; XOPAVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
6545 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
6546 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6547 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
6548 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
6549 ; XOPAVX2-NEXT: retq
6550 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26>
6551 ret <16 x i16> %shuffle
6554 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu(<16 x i16> %a, <16 x i16> %b) {
6555 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
6557 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6558 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6559 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6560 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
6561 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6564 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
6565 ; AVX2OR512VL: # %bb.0:
6566 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21]
6567 ; AVX2OR512VL-NEXT: retq
6569 ; XOPAVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
6571 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6572 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6573 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
6574 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
6575 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6576 ; XOPAVX1-NEXT: retq
6578 ; XOPAVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
6580 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21]
6581 ; XOPAVX2-NEXT: retq
6582 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 17, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 25, i32 undef>
6583 ret <16 x i16> %shuffle
6586 define <16 x i16> @shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28(<16 x i16> %a, <16 x i16> %b) {
6587 ; AVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
6589 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6590 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6591 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6592 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
6593 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
6594 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6597 ; AVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
6599 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15]
6600 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6601 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7,8,9,10,11],ymm1[12],ymm0[13,14,15]
6602 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6605 ; AVX512VL-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
6606 ; AVX512VL: # %bb.0:
6607 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12]
6608 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
6609 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
6610 ; AVX512VL-NEXT: retq
6612 ; XOPAVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
6614 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6615 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6616 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6617 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
6618 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
6619 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6620 ; XOPAVX1-NEXT: retq
6622 ; XOPAVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
6624 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15]
6625 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
6626 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7,8,9,10,11],ymm1[12],ymm0[13,14,15]
6627 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
6628 ; XOPAVX2-NEXT: retq
6629 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28>
6630 ret <16 x i16> %shuffle
6633 define <16 x i16> @shuffle_v16i16_00_02_04_06_01_03_05_07_31_30_29_28_27_26_25_24(<16 x i16> %a, <16 x i16> %b) {
6634 ; AVX1-LABEL: shuffle_v16i16_00_02_04_06_01_03_05_07_31_30_29_28_27_26_25_24:
6636 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,2,3,6,7,10,11,14,15]
6637 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
6638 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1]
6639 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
6642 ; AVX2-LABEL: shuffle_v16i16_00_02_04_06_01_03_05_07_31_30_29_28_27_26_25_24:
6644 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
6645 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,2,3,6,7,10,11,14,15,30,31,28,29,26,27,24,25,22,23,20,21,18,19,16,17]
6648 ; AVX512VL-LABEL: shuffle_v16i16_00_02_04_06_01_03_05_07_31_30_29_28_27_26_25_24:
6649 ; AVX512VL: # %bb.0:
6650 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,1,3,5,7,31,30,29,28,27,26,25,24]
6651 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
6652 ; AVX512VL-NEXT: retq
6654 ; XOPAVX1-LABEL: shuffle_v16i16_00_02_04_06_01_03_05_07_31_30_29_28_27_26_25_24:
6656 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,2,3,6,7,10,11,14,15]
6657 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
6658 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1]
6659 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
6660 ; XOPAVX1-NEXT: retq
6662 ; XOPAVX2-LABEL: shuffle_v16i16_00_02_04_06_01_03_05_07_31_30_29_28_27_26_25_24:
6664 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
6665 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,2,3,6,7,10,11,14,15,30,31,28,29,26,27,24,25,22,23,20,21,18,19,16,17]
6666 ; XOPAVX2-NEXT: retq
6667 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24>
6668 ret <16 x i16> %shuffle
6671 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu(<16 x i16> %a, <16 x i16> %b) {
6672 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
6674 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6675 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6676 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6677 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
6678 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6681 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
6682 ; AVX2OR512VL: # %bb.0:
6683 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25]
6684 ; AVX2OR512VL-NEXT: retq
6686 ; XOPAVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
6688 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6689 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6690 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
6691 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
6692 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6693 ; XOPAVX1-NEXT: retq
6695 ; XOPAVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
6697 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25]
6698 ; XOPAVX2-NEXT: retq
6699 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 25, i32 26, i32 27, i32 undef>
6700 ret <16 x i16> %shuffle
6703 define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu(<16 x i16> %a, <16 x i16> %b) {
6704 ; AVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
6706 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6707 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
6708 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6709 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,4,4]
6710 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
6711 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
6712 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
6713 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
6714 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6717 ; AVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
6719 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
6720 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
6723 ; AVX512VL-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
6724 ; AVX512VL: # %bb.0:
6725 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <7,u,19,u,4,4,21,u,15,u,27,u,12,12,29,u>
6726 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
6727 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
6728 ; AVX512VL-NEXT: retq
6730 ; XOPAVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
6732 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
6733 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
6734 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [14,15,10,11,22,23,24,25,8,9,8,9,26,27,28,29]
6735 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
6736 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0
6737 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6738 ; XOPAVX1-NEXT: retq
6740 ; XOPAVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
6742 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
6743 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
6744 ; XOPAVX2-NEXT: retq
6745 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
6746 ret <16 x i16> %shuffle
6749 define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19(<16 x i16> %a, <16 x i16> %b) {
6750 ; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
6752 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6753 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
6756 ; AVX2OR512VL-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
6757 ; AVX2OR512VL: # %bb.0:
6758 ; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6759 ; AVX2OR512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
6760 ; AVX2OR512VL-NEXT: retq
6762 ; XOPAVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
6764 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6765 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
6766 ; XOPAVX1-NEXT: retq
6768 ; XOPAVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
6770 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6771 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
6772 ; XOPAVX2-NEXT: retq
6773 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19>
6774 ret <16 x i16> %shuffle
6777 define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) {
6778 ; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6780 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6781 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6782 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
6785 ; AVX2-SLOW-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6786 ; AVX2-SLOW: # %bb.0:
6787 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6788 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %ymm0
6789 ; AVX2-SLOW-NEXT: retq
6791 ; AVX2-FAST-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6792 ; AVX2-FAST: # %bb.0:
6793 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
6794 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
6795 ; AVX2-FAST-NEXT: retq
6797 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6798 ; AVX512VL-SLOW: # %bb.0:
6799 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6800 ; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm0, %ymm0
6801 ; AVX512VL-SLOW-NEXT: retq
6803 ; AVX512VL-FAST-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6804 ; AVX512VL-FAST: # %bb.0:
6805 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
6806 ; AVX512VL-FAST-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
6807 ; AVX512VL-FAST-NEXT: retq
6809 ; XOPAVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6811 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6812 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6813 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
6814 ; XOPAVX1-NEXT: retq
6816 ; XOPAVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
6818 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6819 ; XOPAVX2-NEXT: vpbroadcastd %xmm0, %ymm0
6820 ; XOPAVX2-NEXT: retq
6821 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
6822 ret <16 x i16> %shuffle
6825 define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a, <16 x i16> %b) {
6826 ; AVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
6828 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6829 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
6830 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6831 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
6834 ; AVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
6836 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
6837 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
6840 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
6841 ; AVX512VL-SLOW: # %bb.0:
6842 ; AVX512VL-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0
6843 ; AVX512VL-SLOW-NEXT: vpbroadcastw %xmm0, %ymm0
6844 ; AVX512VL-SLOW-NEXT: retq
6846 ; AVX512VL-FAST-CROSSLANE-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
6847 ; AVX512VL-FAST-CROSSLANE: # %bb.0:
6848 ; AVX512VL-FAST-CROSSLANE-NEXT: vpbroadcastw {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
6849 ; AVX512VL-FAST-CROSSLANE-NEXT: vpermw %ymm0, %ymm1, %ymm0
6850 ; AVX512VL-FAST-CROSSLANE-NEXT: retq
6852 ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
6853 ; AVX512VL-FAST-PERLANE: # %bb.0:
6854 ; AVX512VL-FAST-PERLANE-NEXT: vextracti128 $1, %ymm0, %xmm0
6855 ; AVX512VL-FAST-PERLANE-NEXT: vpbroadcastw %xmm0, %ymm0
6856 ; AVX512VL-FAST-PERLANE-NEXT: retq
6858 ; XOPAVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
6860 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6861 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
6862 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6863 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
6864 ; XOPAVX1-NEXT: retq
6866 ; XOPAVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
6868 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
6869 ; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0
6870 ; XOPAVX2-NEXT: retq
6871 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
6872 ret <16 x i16> %shuffle
6875 define <16 x i16> @shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
6876 ; ALL-LABEL: shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u:
6878 ; ALL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6880 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
6881 ret <16 x i16> %shuffle
6884 define <16 x i16> @shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
6885 ; AVX1-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6887 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6888 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6891 ; AVX2-SLOW-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6892 ; AVX2-SLOW: # %bb.0:
6893 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6894 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
6895 ; AVX2-SLOW-NEXT: retq
6897 ; AVX2-FAST-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6898 ; AVX2-FAST: # %bb.0:
6899 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
6900 ; AVX2-FAST-NEXT: retq
6902 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6903 ; AVX512VL-SLOW: # %bb.0:
6904 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6905 ; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
6906 ; AVX512VL-SLOW-NEXT: retq
6908 ; AVX512VL-FAST-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6909 ; AVX512VL-FAST: # %bb.0:
6910 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
6911 ; AVX512VL-FAST-NEXT: retq
6913 ; XOPAVX1-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6915 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6916 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6917 ; XOPAVX1-NEXT: retq
6919 ; XOPAVX2-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
6921 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
6922 ; XOPAVX2-NEXT: vpbroadcastd %xmm0, %xmm0
6923 ; XOPAVX2-NEXT: retq
6924 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
6925 ret <16 x i16> %shuffle
6928 define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
6929 ; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6931 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6932 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
6933 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6936 ; AVX2-SLOW-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6937 ; AVX2-SLOW: # %bb.0:
6938 ; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0
6939 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
6940 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
6941 ; AVX2-SLOW-NEXT: retq
6943 ; AVX2-FAST-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6944 ; AVX2-FAST: # %bb.0:
6945 ; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
6946 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
6947 ; AVX2-FAST-NEXT: retq
6949 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6950 ; AVX512VL-SLOW: # %bb.0:
6951 ; AVX512VL-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0
6952 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
6953 ; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
6954 ; AVX512VL-SLOW-NEXT: retq
6956 ; AVX512VL-FAST-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6957 ; AVX512VL-FAST: # %bb.0:
6958 ; AVX512VL-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
6959 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
6960 ; AVX512VL-FAST-NEXT: retq
6962 ; XOPAVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6964 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6965 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
6966 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
6967 ; XOPAVX1-NEXT: retq
6969 ; XOPAVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
6971 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
6972 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
6973 ; XOPAVX2-NEXT: vpbroadcastd %xmm0, %xmm0
6974 ; XOPAVX2-NEXT: retq
6975 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
6976 ret <16 x i16> %shuffle
6979 define <16 x i16> @shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25(<16 x i16> %a0, <16 x i16> %a1) {
6980 ; AVX1-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
6982 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
6983 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
6984 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
6985 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6986 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6987 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
6990 ; AVX2-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
6992 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
6993 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
6996 ; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
6997 ; AVX512VL: # %bb.0:
6998 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25]
6999 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
7000 ; AVX512VL-NEXT: retq
7002 ; XOPAVX1-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
7004 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
7005 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
7006 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
7007 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7008 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
7009 ; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
7010 ; XOPAVX1-NEXT: retq
7012 ; XOPAVX2-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
7014 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
7015 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
7016 ; XOPAVX2-NEXT: retq
7017 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 0, i32 16, i32 1, i32 17, i32 10, i32 26, i32 11, i32 27, i32 8, i32 24, i32 9, i32 25>
7021 define <16 x i16> @shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25(<16 x i16> %a0, <16 x i16> %a1) {
7022 ; AVX1-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
7024 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
7025 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
7026 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
7027 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,2,4,5,6,7]
7028 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
7029 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
7030 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[0,1,0,1]
7031 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,7,7]
7032 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
7033 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
7034 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
7035 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
7036 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
7037 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
7038 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
7041 ; AVX2-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
7043 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
7044 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,0,2]
7047 ; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
7048 ; AVX512VL: # %bb.0:
7049 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [2,18,3,19,10,26,11,27,0,16,1,17,8,24,9,25]
7050 ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
7051 ; AVX512VL-NEXT: retq
7053 ; XOPAVX1-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
7055 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
7056 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
7057 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
7058 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
7059 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm2 = xmm0[0,1],xmm1[0,1],xmm0[4,5],xmm1[4,5],xmm0[2,3],xmm1[2,3],xmm0[6,7],xmm1[6,7]
7060 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9],xmm1[8,9],xmm0[12,13],xmm1[12,13],xmm0[10,11],xmm1[10,11],xmm0[14,15],xmm1[14,15]
7061 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
7062 ; XOPAVX1-NEXT: retq
7064 ; XOPAVX2-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
7066 ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
7067 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,0,2]
7068 ; XOPAVX2-NEXT: retq
7069 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 0, i32 16, i32 1, i32 17, i32 10, i32 26, i32 11, i32 27, i32 8, i32 24, i32 9, i32 25>
7070 %2 = bitcast <16 x i16> %1 to <4 x i64>
7071 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
7072 %4 = bitcast <4 x i64> %3 to <16 x i16>
7076 define <16 x i16> @shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30(<8 x i32> %a0, <8 x i32> %a1) {
7077 ; AVX1-LABEL: shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7079 ; AVX1-NEXT: vpsrad $25, %xmm0, %xmm2
7080 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7081 ; AVX1-NEXT: vpsrad $25, %xmm0, %xmm0
7082 ; AVX1-NEXT: vpsrad $25, %xmm1, %xmm3
7083 ; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
7084 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7085 ; AVX1-NEXT: vpsrad $25, %xmm1, %xmm1
7086 ; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
7087 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
7090 ; AVX2OR512VL-LABEL: shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7091 ; AVX2OR512VL: # %bb.0:
7092 ; AVX2OR512VL-NEXT: vpsrad $25, %ymm0, %ymm0
7093 ; AVX2OR512VL-NEXT: vpsrad $25, %ymm1, %ymm1
7094 ; AVX2OR512VL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
7095 ; AVX2OR512VL-NEXT: retq
7097 ; XOPAVX1-LABEL: shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7099 ; XOPAVX1-NEXT: vpsrad $25, %xmm0, %xmm2
7100 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7101 ; XOPAVX1-NEXT: vpsrad $25, %xmm0, %xmm0
7102 ; XOPAVX1-NEXT: vpsrad $25, %xmm1, %xmm3
7103 ; XOPAVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
7104 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7105 ; XOPAVX1-NEXT: vpsrad $25, %xmm1, %xmm1
7106 ; XOPAVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
7107 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
7108 ; XOPAVX1-NEXT: retq
7110 ; XOPAVX2-LABEL: shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7112 ; XOPAVX2-NEXT: vpsrad $25, %ymm0, %ymm0
7113 ; XOPAVX2-NEXT: vpsrad $25, %ymm1, %ymm1
7114 ; XOPAVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
7115 ; XOPAVX2-NEXT: retq
7116 %1 = ashr <8 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
7117 %2 = ashr <8 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
7118 %3 = bitcast <8 x i32> %1 to <16 x i16>
7119 %4 = bitcast <8 x i32> %2 to <16 x i16>
7120 %5 = shufflevector <16 x i16> %3, <16 x i16> %4, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
7124 define <16 x i16> @shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30(<8 x i32> %a0, <8 x i32> %a1) {
7125 ; AVX1-LABEL: shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7127 ; AVX1-NEXT: vpsrld $25, %xmm0, %xmm2
7128 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7129 ; AVX1-NEXT: vpsrld $25, %xmm0, %xmm0
7130 ; AVX1-NEXT: vpsrld $25, %xmm1, %xmm3
7131 ; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
7132 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7133 ; AVX1-NEXT: vpsrld $25, %xmm1, %xmm1
7134 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
7135 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
7138 ; AVX2OR512VL-LABEL: shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7139 ; AVX2OR512VL: # %bb.0:
7140 ; AVX2OR512VL-NEXT: vpsrld $25, %ymm0, %ymm0
7141 ; AVX2OR512VL-NEXT: vpsrld $25, %ymm1, %ymm1
7142 ; AVX2OR512VL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
7143 ; AVX2OR512VL-NEXT: retq
7145 ; XOPAVX1-LABEL: shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7147 ; XOPAVX1-NEXT: vpsrld $25, %xmm0, %xmm2
7148 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7149 ; XOPAVX1-NEXT: vpsrld $25, %xmm0, %xmm0
7150 ; XOPAVX1-NEXT: vpsrld $25, %xmm1, %xmm3
7151 ; XOPAVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
7152 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7153 ; XOPAVX1-NEXT: vpsrld $25, %xmm1, %xmm1
7154 ; XOPAVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
7155 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
7156 ; XOPAVX1-NEXT: retq
7158 ; XOPAVX2-LABEL: shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30:
7160 ; XOPAVX2-NEXT: vpsrld $25, %ymm0, %ymm0
7161 ; XOPAVX2-NEXT: vpsrld $25, %ymm1, %ymm1
7162 ; XOPAVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
7163 ; XOPAVX2-NEXT: retq
7164 %1 = lshr <8 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
7165 %2 = lshr <8 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
7166 %3 = bitcast <8 x i32> %1 to <16 x i16>
7167 %4 = bitcast <8 x i32> %2 to <16 x i16>
7168 %5 = shufflevector <16 x i16> %3, <16 x i16> %4, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
7172 define <16 x i16> @shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13(<16 x i16> %a) {
7173 ; AVX1-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7175 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,6,7,5]
7176 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7177 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
7178 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
7179 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
7182 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7183 ; AVX2-SLOW: # %bb.0:
7184 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,5,8,9,10,11,12,14,15,13]
7185 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
7186 ; AVX2-SLOW-NEXT: retq
7188 ; AVX2-FAST-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7189 ; AVX2-FAST: # %bb.0:
7190 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11,24,25,28,29,30,31,26,27,24,25,28,29,30,31,26,27]
7191 ; AVX2-FAST-NEXT: retq
7193 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7194 ; AVX512VL-SLOW: # %bb.0:
7195 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,5,8,9,10,11,12,14,15,13]
7196 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
7197 ; AVX512VL-SLOW-NEXT: retq
7199 ; AVX512VL-FAST-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7200 ; AVX512VL-FAST: # %bb.0:
7201 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11,24,25,28,29,30,31,26,27,24,25,28,29,30,31,26,27]
7202 ; AVX512VL-FAST-NEXT: retq
7204 ; XOPAVX1-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7206 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,6,7,5]
7207 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7208 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
7209 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
7210 ; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
7211 ; XOPAVX1-NEXT: retq
7213 ; XOPAVX2-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
7215 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,5,8,9,10,11,12,14,15,13]
7216 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
7217 ; XOPAVX2-NEXT: retq
7218 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5, i32 12, i32 14, i32 15, i32 undef, i32 undef, i32 14, i32 15, i32 13>
7219 ret <16 x i16> %shuffle
7222 define <16 x i16> @shuffle_v16i16_03_02_01_00_04_05_06_07_11_10_09_08_12_13_14_15_v8i16(<8 x i16> %a, <8 x i16> %b) {
7223 ; AVX1-LABEL: shuffle_v16i16_03_02_01_00_04_05_06_07_11_10_09_08_12_13_14_15_v8i16:
7225 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
7226 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
7227 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
7230 ; AVX2OR512VL-LABEL: shuffle_v16i16_03_02_01_00_04_05_06_07_11_10_09_08_12_13_14_15_v8i16:
7231 ; AVX2OR512VL: # %bb.0:
7232 ; AVX2OR512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
7233 ; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
7234 ; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
7235 ; AVX2OR512VL-NEXT: retq
7237 ; XOPAVX1-LABEL: shuffle_v16i16_03_02_01_00_04_05_06_07_11_10_09_08_12_13_14_15_v8i16:
7239 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
7240 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
7241 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
7242 ; XOPAVX1-NEXT: retq
7244 ; XOPAVX2-LABEL: shuffle_v16i16_03_02_01_00_04_05_06_07_11_10_09_08_12_13_14_15_v8i16:
7246 ; XOPAVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
7247 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
7248 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
7249 ; XOPAVX2-NEXT: retq
7250 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
7251 %2 = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
7252 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7256 define <16 x i16> @shuffle_v16i16_00_01_02_04_07_06_05_04_08_09_10_11_15_14_13_12_v8i16(<8 x i16> %a, <8 x i16> %b) {
7257 ; AVX1-LABEL: shuffle_v16i16_00_01_02_04_07_06_05_04_08_09_10_11_15_14_13_12_v8i16:
7259 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
7260 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
7261 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
7264 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_01_02_04_07_06_05_04_08_09_10_11_15_14_13_12_v8i16:
7265 ; AVX2OR512VL: # %bb.0:
7266 ; AVX2OR512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
7267 ; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
7268 ; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
7269 ; AVX2OR512VL-NEXT: retq
7271 ; XOPAVX1-LABEL: shuffle_v16i16_00_01_02_04_07_06_05_04_08_09_10_11_15_14_13_12_v8i16:
7273 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
7274 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
7275 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
7276 ; XOPAVX1-NEXT: retq
7278 ; XOPAVX2-LABEL: shuffle_v16i16_00_01_02_04_07_06_05_04_08_09_10_11_15_14_13_12_v8i16:
7280 ; XOPAVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
7281 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
7282 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
7283 ; XOPAVX2-NEXT: retq
7284 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4>
7285 %2 = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4>
7286 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7290 define <16 x i16> @shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14(<16 x i16> %a) {
7291 ; AVX1-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
7293 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7]
7294 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,7,6]
7295 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7296 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
7297 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
7298 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
7301 ; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
7302 ; AVX2-SLOW: # %bb.0:
7303 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
7304 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14]
7305 ; AVX2-SLOW-NEXT: retq
7307 ; AVX2-FAST-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
7308 ; AVX2-FAST: # %bb.0:
7309 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29]
7310 ; AVX2-FAST-NEXT: retq
7312 ; AVX512VL-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
7313 ; AVX512VL: # %bb.0:
7314 ; AVX512VL-NEXT: vprold $16, %ymm0, %ymm0
7315 ; AVX512VL-NEXT: retq
7317 ; XOPAVX1-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
7319 ; XOPAVX1-NEXT: vprotd $16, %xmm0, %xmm1
7320 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7321 ; XOPAVX1-NEXT: vprotd $16, %xmm0, %xmm0
7322 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
7323 ; XOPAVX1-NEXT: retq
7325 ; XOPAVX2-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
7327 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
7328 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14]
7329 ; XOPAVX2-NEXT: retq
7330 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
7331 ret <16 x i16> %shuffle
7334 define <16 x i16> @shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14(<16 x i16> %a) {
7335 ; AVX1-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
7337 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,0,1,2,4,5,6,7]
7338 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,4,5,6]
7339 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7340 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,0,1,2,4,5,6,7]
7341 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
7342 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
7345 ; AVX2-SLOW-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
7346 ; AVX2-SLOW: # %bb.0:
7347 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,0,1,2,4,5,6,7,11,8,9,10,12,13,14,15]
7348 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14]
7349 ; AVX2-SLOW-NEXT: retq
7351 ; AVX2-FAST-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
7352 ; AVX2-FAST: # %bb.0:
7353 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,2,3,4,5,14,15,8,9,10,11,12,13,22,23,16,17,18,19,20,21,30,31,24,25,26,27,28,29]
7354 ; AVX2-FAST-NEXT: retq
7356 ; AVX512VL-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
7357 ; AVX512VL: # %bb.0:
7358 ; AVX512VL-NEXT: vprolq $16, %ymm0, %ymm0
7359 ; AVX512VL-NEXT: retq
7361 ; XOPAVX1-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
7363 ; XOPAVX1-NEXT: vprotq $16, %xmm0, %xmm1
7364 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7365 ; XOPAVX1-NEXT: vprotq $16, %xmm0, %xmm0
7366 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
7367 ; XOPAVX1-NEXT: retq
7369 ; XOPAVX2-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
7371 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,0,1,2,4,5,6,7,11,8,9,10,12,13,14,15]
7372 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14]
7373 ; XOPAVX2-NEXT: retq
7374 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14>
7375 ret <16 x i16> %shuffle
7378 define <16 x i16> @insert_v16i16_0elt_into_zero_vector(ptr %ptr) {
7379 ; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
7381 ; ALL-NEXT: movzwl (%rdi), %eax
7382 ; ALL-NEXT: vmovd %eax, %xmm0
7384 %val = load i16, ptr %ptr
7385 %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0
7389 define <16 x i16> @concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31(<16 x i16> %a, <16 x i16> %b) {
7390 ; ALL-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
7392 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
7394 %alo = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7395 %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7396 %shuf = shufflevector <8 x i16> %alo, <8 x i16> %bhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7397 ret <16 x i16> %shuf
7400 define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc(<16 x i16> %a, <16 x i16> %b) {
7401 ; AVX1OR2-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
7403 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
7404 ; AVX1OR2-NEXT: retq
7406 ; AVX512VL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
7407 ; AVX512VL: # %bb.0:
7408 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
7409 ; AVX512VL-NEXT: retq
7411 ; XOP-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
7413 ; XOP-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
7415 %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7416 %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7417 %bc0hi = bitcast <8 x i16> %ahi to <16 x i8>
7418 %bc1hi = bitcast <8 x i16> %bhi to <16 x i8>
7419 %shuffle8 = shufflevector <16 x i8> %bc0hi, <16 x i8> %bc1hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7420 %shuffle16 = bitcast <32 x i8> %shuffle8 to <16 x i16>
7421 ret <16 x i16> %shuffle16
7424 define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) {
7425 ; AVX1-LABEL: PR24935:
7427 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
7428 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
7429 ; AVX1-NEXT: vpalignr {{.*#+}} xmm4 = xmm3[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
7430 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2],xmm4[3,4,5,6,7]
7431 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
7432 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm5 = xmm4[0,1,2,3,5,5,5,5]
7433 ; AVX1-NEXT: vpshufb {{.*#+}} xmm6 = xmm0[2,3,u,u,u,u,u,u,u,u,8,9,0,1,u,u]
7434 ; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0],xmm5[1],xmm6[2,3],xmm5[4],xmm6[5,6,7]
7435 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm5[0,1],xmm2[2,3],xmm5[4,5,6],xmm2[7]
7436 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
7437 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
7438 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5],xmm1[6,7]
7439 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,7,4,5,u,u,10,11,4,5,14,15,u,u,0,1]
7440 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3,4,5],xmm0[6],xmm1[7]
7441 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
7444 ; AVX2-SLOW-LABEL: PR24935:
7445 ; AVX2-SLOW: # %bb.0:
7446 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm1[8,9],zero,zero,zero,zero,ymm1[14,15,12,13,0,1,24,25,24,25],zero,zero,ymm1[24,25,16,17,30,31,28,29,16,17]
7447 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
7448 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5],zero,zero,ymm1[10,11,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
7449 ; AVX2-SLOW-NEXT: vpor %ymm2, %ymm1, %ymm1
7450 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm2 = ymm0[1,1,1,1,4,5,6,7,9,9,9,9,12,13,14,15]
7451 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,5,5,5,5,8,9,10,11,13,13,13,13]
7452 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
7453 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u]
7454 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15]
7455 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255]
7456 ; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
7457 ; AVX2-SLOW-NEXT: retq
7459 ; AVX2-FAST-ALL-LABEL: PR24935:
7460 ; AVX2-FAST-ALL: # %bb.0:
7461 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = <0,5,u,u,0,4,6,2>
7462 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm2, %ymm0
7463 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[2,3],zero,zero,zero,zero,zero,zero,ymm0[6,7],zero,zero,ymm0[18,19,22,23],zero,zero,zero,zero,ymm0[26,27,28,29,16,17],zero,zero
7464 ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = <5,6,3,0,0,6,4,u>
7465 ; AVX2-FAST-ALL-NEXT: vpermd %ymm1, %ymm2, %ymm1
7466 ; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[2,3,0,1],zero,zero,ymm1[6,7,0,1,10,11],zero,zero,ymm1[12,13],zero,zero,zero,zero,ymm1[16,17,20,21],zero,zero,zero,zero,zero,zero,ymm1[24,25]
7467 ; AVX2-FAST-ALL-NEXT: vpor %ymm0, %ymm1, %ymm0
7468 ; AVX2-FAST-ALL-NEXT: retq
7470 ; AVX2-FAST-PERLANE-LABEL: PR24935:
7471 ; AVX2-FAST-PERLANE: # %bb.0:
7472 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm1[8,9],zero,zero,zero,zero,ymm1[14,15,12,13,0,1,24,25,24,25],zero,zero,ymm1[24,25,16,17,30,31,28,29,16,17]
7473 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
7474 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5],zero,zero,ymm1[10,11,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
7475 ; AVX2-FAST-PERLANE-NEXT: vpor %ymm2, %ymm1, %ymm1
7476 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,2,3,2,3,u,u,10,11,u,u,u,u,u,u,u,u,18,19,18,19,u,u,26,27,u,u,u,u,u,u]
7477 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
7478 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u]
7479 ; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15]
7480 ; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255]
7481 ; AVX2-FAST-PERLANE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
7482 ; AVX2-FAST-PERLANE-NEXT: retq
7484 ; AVX512VL-LABEL: PR24935:
7485 ; AVX512VL: # %bb.0:
7486 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [11,10,17,13,10,7,27,0,17,25,0,12,29,20,16,8]
7487 ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
7488 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
7489 ; AVX512VL-NEXT: retq
7491 ; XOPAVX1-LABEL: PR24935:
7493 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
7494 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm2[u,u,u,u],xmm1[0,1],xmm2[8,9,u,u,u,u,u,u,0,1]
7495 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
7496 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm5 = xmm0[2,3],xmm4[2,3],xmm0[u,u,u,u],xmm4[10,11],xmm0[8,9,0,1,u,u]
7497 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1],xmm3[2,3],xmm5[4,5,6],xmm3[7]
7498 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm2[6,7,4,5,u,u,10,11,4,5],xmm1[14,15],xmm2[u,u],xmm1[0,1]
7499 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
7500 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
7501 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3,4,5],xmm0[6],xmm1[7]
7502 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
7503 ; XOPAVX1-NEXT: retq
7505 ; XOPAVX2-LABEL: PR24935:
7507 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm1[8,9],zero,zero,zero,zero,ymm1[14,15,12,13,0,1,24,25,24,25],zero,zero,ymm1[24,25,16,17,30,31,28,29,16,17]
7508 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
7509 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5],zero,zero,ymm1[10,11,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
7510 ; XOPAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
7511 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm2 = ymm0[1,1,1,1,4,5,6,7,9,9,9,9,12,13,14,15]
7512 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,5,5,5,5,8,9,10,11,13,13,13,13]
7513 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
7514 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u]
7515 ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15]
7516 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255]
7517 ; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
7518 ; XOPAVX2-NEXT: retq
7519 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 27, i32 26, i32 1, i32 29, i32 26, i32 23, i32 11, i32 16, i32 1, i32 9, i32 16, i32 28, i32 13, i32 4, i32 0, i32 24>
7520 ret <16 x i16> %shuffle
7523 define <16 x i16> @PR34369(<16 x i16> %vec, <16 x i16> %mask) {
7524 ; AVX1-LABEL: PR34369:
7526 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
7527 ; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm2[u,u,u,u,u,u,10,11,u,u,u,u,u,u,4,5]
7528 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u]
7529 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3],xmm0[4,5,6],xmm3[7]
7530 ; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[14,15,0,1,12,13,0,1,2,3,4,5,8,9,8,9]
7531 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
7532 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
7533 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
7534 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
7535 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
7536 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
7537 ; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
7540 ; AVX2-SLOW-LABEL: PR34369:
7541 ; AVX2-SLOW: # %bb.0:
7542 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25]
7543 ; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0
7544 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,1]
7545 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
7546 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3],xmm2[4,5,6],xmm0[7]
7547 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
7548 ; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
7549 ; AVX2-SLOW-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
7550 ; AVX2-SLOW-NEXT: vpand %ymm0, %ymm1, %ymm0
7551 ; AVX2-SLOW-NEXT: retq
7553 ; AVX2-FAST-LABEL: PR34369:
7554 ; AVX2-FAST: # %bb.0:
7555 ; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm2
7556 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,10,11,u,u,u,u,u,u,4,5]
7557 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25]
7558 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[3],xmm0[4,5,6],xmm2[7]
7559 ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
7560 ; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
7561 ; AVX2-FAST-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
7562 ; AVX2-FAST-NEXT: vpand %ymm0, %ymm1, %ymm0
7563 ; AVX2-FAST-NEXT: retq
7565 ; AVX512VL-LABEL: PR34369:
7566 ; AVX512VL: # %bb.0:
7567 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,0,13,5,2,2,10,15,8,14,8,9,10,12,12]
7568 ; AVX512VL-NEXT: vptestnmw %ymm1, %ymm1, %k1
7569 ; AVX512VL-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z}
7570 ; AVX512VL-NEXT: retq
7572 ; XOPAVX1-LABEL: PR34369:
7574 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
7575 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[6,7,0,1,0,1],xmm2[10,11],xmm0[10,11,4,5,4,5],xmm2[4,5]
7576 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[14,15,0,1,12,13,0,1,2,3,4,5,8,9,8,9]
7577 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
7578 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
7579 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
7580 ; XOPAVX1-NEXT: vpcomeqw %xmm3, %xmm2, %xmm2
7581 ; XOPAVX1-NEXT: vpcomeqw %xmm3, %xmm1, %xmm1
7582 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
7583 ; XOPAVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
7584 ; XOPAVX1-NEXT: retq
7586 ; XOPAVX2-LABEL: PR34369:
7588 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
7589 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25]
7590 ; XOPAVX2-NEXT: vpperm {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5],xmm2[10,11],xmm0[8,9,10,11,12,13],xmm2[4,5]
7591 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
7592 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
7593 ; XOPAVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
7594 ; XOPAVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
7595 ; XOPAVX2-NEXT: retq
7596 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 9, i32 10, i32 12, i32 12>
7597 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
7598 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
7602 define <16 x i16> @insert_dup_mem_v16i16_i32(ptr %ptr) {
7603 ; AVX1-LABEL: insert_dup_mem_v16i16_i32:
7605 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
7606 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7607 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7608 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7611 ; AVX2OR512VL-LABEL: insert_dup_mem_v16i16_i32:
7612 ; AVX2OR512VL: # %bb.0:
7613 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %ymm0
7614 ; AVX2OR512VL-NEXT: retq
7616 ; XOPAVX1-LABEL: insert_dup_mem_v16i16_i32:
7618 ; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
7619 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7620 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7621 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7622 ; XOPAVX1-NEXT: retq
7624 ; XOPAVX2-LABEL: insert_dup_mem_v16i16_i32:
7626 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %ymm0
7627 ; XOPAVX2-NEXT: retq
7628 %tmp = load i32, ptr %ptr, align 4
7629 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
7630 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
7631 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer
7632 ret <16 x i16> %tmp3
7635 define <16 x i16> @insert_dup_mem_v16i16_sext_i16(ptr %ptr) {
7636 ; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
7638 ; AVX1-NEXT: movzwl (%rdi), %eax
7639 ; AVX1-NEXT: vmovd %eax, %xmm0
7640 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7641 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7642 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7645 ; AVX2OR512VL-LABEL: insert_dup_mem_v16i16_sext_i16:
7646 ; AVX2OR512VL: # %bb.0:
7647 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %ymm0
7648 ; AVX2OR512VL-NEXT: retq
7650 ; XOPAVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
7652 ; XOPAVX1-NEXT: movzwl (%rdi), %eax
7653 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
7654 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7655 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7656 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7657 ; XOPAVX1-NEXT: retq
7659 ; XOPAVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
7661 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %ymm0
7662 ; XOPAVX2-NEXT: retq
7663 %tmp = load i16, ptr %ptr, align 2
7664 %tmp1 = sext i16 %tmp to i32
7665 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
7666 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
7667 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer
7668 ret <16 x i16> %tmp4
7671 define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(ptr %ptr) #0 {
7672 ; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i32:
7674 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
7675 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
7676 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7677 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7680 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v16i16_i32:
7681 ; AVX2OR512VL: # %bb.0:
7682 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %ymm0
7683 ; AVX2OR512VL-NEXT: retq
7685 ; XOPAVX1-LABEL: insert_dup_elt1_mem_v16i16_i32:
7687 ; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
7688 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
7689 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7690 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7691 ; XOPAVX1-NEXT: retq
7693 ; XOPAVX2-LABEL: insert_dup_elt1_mem_v16i16_i32:
7695 ; XOPAVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
7696 ; XOPAVX2-NEXT: retq
7697 %tmp = load i32, ptr %ptr, align 4
7698 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
7699 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
7700 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
7701 ret <16 x i16> %tmp3
7704 define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(ptr %ptr) #0 {
7705 ; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i32:
7707 ; AVX1-NEXT: vbroadcastss (%rdi), %xmm0
7708 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
7709 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7710 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7713 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v16i16_i32:
7714 ; AVX2OR512VL: # %bb.0:
7715 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %ymm0
7716 ; AVX2OR512VL-NEXT: retq
7718 ; XOPAVX1-LABEL: insert_dup_elt3_mem_v16i16_i32:
7720 ; XOPAVX1-NEXT: vbroadcastss (%rdi), %xmm0
7721 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
7722 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7723 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7724 ; XOPAVX1-NEXT: retq
7726 ; XOPAVX2-LABEL: insert_dup_elt3_mem_v16i16_i32:
7728 ; XOPAVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
7729 ; XOPAVX2-NEXT: retq
7730 %tmp = load i32, ptr %ptr, align 4
7731 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
7732 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
7733 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7734 ret <16 x i16> %tmp3
7737 define <16 x i16> @insert_dup_mem_v16i16_i64(ptr %ptr) {
7738 ; AVX1-LABEL: insert_dup_mem_v16i16_i64:
7740 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7741 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7742 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7743 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7746 ; AVX2OR512VL-LABEL: insert_dup_mem_v16i16_i64:
7747 ; AVX2OR512VL: # %bb.0:
7748 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %ymm0
7749 ; AVX2OR512VL-NEXT: retq
7751 ; XOPAVX1-LABEL: insert_dup_mem_v16i16_i64:
7753 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7754 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7755 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7756 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7757 ; XOPAVX1-NEXT: retq
7759 ; XOPAVX2-LABEL: insert_dup_mem_v16i16_i64:
7761 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %ymm0
7762 ; XOPAVX2-NEXT: retq
7763 %tmp = load i64, ptr %ptr, align 4
7764 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
7765 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
7766 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer
7767 ret <16 x i16> %tmp3
7770 define <16 x i16> @insert_dup_elt1_mem_v16i16_i64(ptr %ptr) {
7771 ; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i64:
7773 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7774 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
7775 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7776 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7779 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v16i16_i64:
7780 ; AVX2OR512VL: # %bb.0:
7781 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %ymm0
7782 ; AVX2OR512VL-NEXT: retq
7784 ; XOPAVX1-LABEL: insert_dup_elt1_mem_v16i16_i64:
7786 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7787 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
7788 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7789 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7790 ; XOPAVX1-NEXT: retq
7792 ; XOPAVX2-LABEL: insert_dup_elt1_mem_v16i16_i64:
7794 ; XOPAVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
7795 ; XOPAVX2-NEXT: retq
7796 %tmp = load i64, ptr %ptr, align 4
7797 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
7798 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
7799 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
7800 ret <16 x i16> %tmp3
7803 define <16 x i16> @insert_dup_elt3_mem_v16i16_i64(ptr %ptr) {
7804 ; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i64:
7806 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7807 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
7808 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7809 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7812 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v16i16_i64:
7813 ; AVX2OR512VL: # %bb.0:
7814 ; AVX2OR512VL-NEXT: vpbroadcastw 6(%rdi), %ymm0
7815 ; AVX2OR512VL-NEXT: retq
7817 ; XOPAVX1-LABEL: insert_dup_elt3_mem_v16i16_i64:
7819 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7820 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
7821 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7822 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7823 ; XOPAVX1-NEXT: retq
7825 ; XOPAVX2-LABEL: insert_dup_elt3_mem_v16i16_i64:
7827 ; XOPAVX2-NEXT: vpbroadcastw 6(%rdi), %ymm0
7828 ; XOPAVX2-NEXT: retq
7829 %tmp = load i64, ptr %ptr, align 4
7830 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
7831 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
7832 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7833 ret <16 x i16> %tmp3
7836 define <16 x i16> @insert_dup_elt7_mem_v16i16_i64(ptr %ptr) {
7837 ; AVX1-LABEL: insert_dup_elt7_mem_v16i16_i64:
7839 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7840 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
7841 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7844 ; AVX2OR512VL-LABEL: insert_dup_elt7_mem_v16i16_i64:
7845 ; AVX2OR512VL: # %bb.0:
7846 ; AVX2OR512VL-NEXT: vpbroadcastw 6(%rdi), %ymm0
7847 ; AVX2OR512VL-NEXT: retq
7849 ; XOPAVX1-LABEL: insert_dup_elt7_mem_v16i16_i64:
7851 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
7852 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
7853 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7854 ; XOPAVX1-NEXT: retq
7856 ; XOPAVX2-LABEL: insert_dup_elt7_mem_v16i16_i64:
7858 ; XOPAVX2-NEXT: vpbroadcastw 6(%rdi), %ymm0
7859 ; XOPAVX2-NEXT: retq
7860 %tmp = load i64, ptr %ptr, align 4
7861 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 1
7862 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
7863 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
7864 ret <16 x i16> %tmp3
7867 define <16 x i16> @insert_dup_mem_v16i16_sext_i16_i64(ptr %ptr) {
7868 ; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
7870 ; AVX1-NEXT: movzwl (%rdi), %eax
7871 ; AVX1-NEXT: vmovd %eax, %xmm0
7872 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7873 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7874 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7877 ; AVX2OR512VL-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
7878 ; AVX2OR512VL: # %bb.0:
7879 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %ymm0
7880 ; AVX2OR512VL-NEXT: retq
7882 ; XOPAVX1-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
7884 ; XOPAVX1-NEXT: movzwl (%rdi), %eax
7885 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
7886 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7887 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7888 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7889 ; XOPAVX1-NEXT: retq
7891 ; XOPAVX2-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
7893 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %ymm0
7894 ; XOPAVX2-NEXT: retq
7895 %tmp = load i16, ptr %ptr, align 2
7896 %tmp1 = sext i16 %tmp to i64
7897 %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %tmp1, i32 0
7898 %tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16>
7899 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer
7900 ret <16 x i16> %tmp4
7903 define <16 x i16> @unpckh_v16i16(<16 x i16> %x, <16 x i16> %y) {
7904 ; AVX1-LABEL: unpckh_v16i16:
7906 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7907 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7910 ; AVX2OR512VL-LABEL: unpckh_v16i16:
7911 ; AVX2OR512VL: # %bb.0:
7912 ; AVX2OR512VL-NEXT: vextracti128 $1, %ymm1, %xmm1
7913 ; AVX2OR512VL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7914 ; AVX2OR512VL-NEXT: retq
7916 ; XOPAVX1-LABEL: unpckh_v16i16:
7918 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7919 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7920 ; XOPAVX1-NEXT: retq
7922 ; XOPAVX2-LABEL: unpckh_v16i16:
7924 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
7925 ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7926 ; XOPAVX2-NEXT: retq
7927 %unpckh = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 4, i32 28, i32 5, i32 29, i32 6, i32 30, i32 7, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
7928 ret <16 x i16> %unpckh
7931 define <16 x i16> @pr43230(<16 x i16> %a, <16 x i16> %b) {
7932 ; AVX1-LABEL: pr43230:
7934 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7935 ; AVX1-NEXT: vpsllw $12, %xmm1, %xmm2
7936 ; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
7937 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
7938 ; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2
7939 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7940 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3
7941 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
7942 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
7943 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
7944 ; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1
7945 ; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
7946 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
7947 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
7948 ; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
7949 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
7950 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7951 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
7954 ; AVX2-LABEL: pr43230:
7956 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
7957 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
7958 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
7959 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
7960 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[26,27],zero,zero
7963 ; AVX512VL-LABEL: pr43230:
7964 ; AVX512VL: # %bb.0:
7965 ; AVX512VL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
7966 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
7967 ; AVX512VL-NEXT: retq
7969 ; XOPAVX1-LABEL: pr43230:
7971 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
7972 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
7973 ; XOPAVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
7974 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7975 ; XOPAVX1-NEXT: vpshlw %xmm1, %xmm0, %xmm0
7976 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7977 ; XOPAVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
7978 ; XOPAVX1-NEXT: retq
7980 ; XOPAVX2-LABEL: pr43230:
7982 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
7983 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
7984 ; XOPAVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
7985 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
7986 ; XOPAVX2-NEXT: vpshlw %xmm1, %xmm0, %xmm0
7987 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
7988 ; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
7989 ; XOPAVX2-NEXT: retq
7990 %shr = lshr <16 x i16> %a, %b
7991 %shuf = shufflevector <16 x i16> zeroinitializer, <16 x i16> %shr, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 30, i32 15>
7992 ret <16 x i16> %shuf