1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST
11 define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
12 ; SSE-LABEL: shuffle_v8i16_01012323:
14 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
17 ; AVX-LABEL: shuffle_v8i16_01012323:
19 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
21 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
22 ret <8 x i16> %shuffle
24 define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
25 ; SSE-LABEL: shuffle_v8i16_67452301:
27 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
30 ; AVX-LABEL: shuffle_v8i16_67452301:
32 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
34 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
35 ret <8 x i16> %shuffle
37 define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
38 ; SSE2-LABEL: shuffle_v8i16_456789AB:
40 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
43 ; SSSE3-LABEL: shuffle_v8i16_456789AB:
45 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
46 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
49 ; SSE41-LABEL: shuffle_v8i16_456789AB:
51 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
52 ; SSE41-NEXT: movdqa %xmm1, %xmm0
55 ; AVX-LABEL: shuffle_v8i16_456789AB:
57 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
59 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
60 ret <8 x i16> %shuffle
63 define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
64 ; SSE-LABEL: shuffle_v8i16_00000000:
66 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
67 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
70 ; AVX1-LABEL: shuffle_v8i16_00000000:
72 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
73 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
76 ; AVX2OR512VL-LABEL: shuffle_v8i16_00000000:
77 ; AVX2OR512VL: # %bb.0:
78 ; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %xmm0
79 ; AVX2OR512VL-NEXT: retq
80 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
81 ret <8 x i16> %shuffle
83 define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
84 ; SSE-LABEL: shuffle_v8i16_00004444:
86 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
87 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
90 ; AVX1-LABEL: shuffle_v8i16_00004444:
92 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
93 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
96 ; AVX2-SLOW-LABEL: shuffle_v8i16_00004444:
98 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
99 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
100 ; AVX2-SLOW-NEXT: retq
102 ; AVX2-FAST-LABEL: shuffle_v8i16_00004444:
103 ; AVX2-FAST: # %bb.0:
104 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
105 ; AVX2-FAST-NEXT: retq
107 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_00004444:
108 ; AVX512VL-SLOW: # %bb.0:
109 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
110 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
111 ; AVX512VL-SLOW-NEXT: retq
113 ; AVX512VL-FAST-LABEL: shuffle_v8i16_00004444:
114 ; AVX512VL-FAST: # %bb.0:
115 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
116 ; AVX512VL-FAST-NEXT: retq
117 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
118 ret <8 x i16> %shuffle
120 define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
121 ; SSE-LABEL: shuffle_v8i16_u0u1u2u3:
123 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
126 ; AVX-LABEL: shuffle_v8i16_u0u1u2u3:
128 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
130 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
131 ret <8 x i16> %shuffle
133 define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
134 ; SSE-LABEL: shuffle_v8i16_u4u5u6u7:
136 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
139 ; AVX-LABEL: shuffle_v8i16_u4u5u6u7:
141 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
143 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
144 ret <8 x i16> %shuffle
146 define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
147 ; SSE-LABEL: shuffle_v8i16_31206745:
149 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
150 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
153 ; AVX1-LABEL: shuffle_v8i16_31206745:
155 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
156 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
159 ; AVX2-SLOW-LABEL: shuffle_v8i16_31206745:
160 ; AVX2-SLOW: # %bb.0:
161 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
162 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
163 ; AVX2-SLOW-NEXT: retq
165 ; AVX2-FAST-LABEL: shuffle_v8i16_31206745:
166 ; AVX2-FAST: # %bb.0:
167 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
168 ; AVX2-FAST-NEXT: retq
170 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_31206745:
171 ; AVX512VL-SLOW: # %bb.0:
172 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
173 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
174 ; AVX512VL-SLOW-NEXT: retq
176 ; AVX512VL-FAST-LABEL: shuffle_v8i16_31206745:
177 ; AVX512VL-FAST: # %bb.0:
178 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
179 ; AVX512VL-FAST-NEXT: retq
180 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
181 ret <8 x i16> %shuffle
183 define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
184 ; SSE2-LABEL: shuffle_v8i16_44440000:
186 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
187 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
188 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
191 ; SSSE3-LABEL: shuffle_v8i16_44440000:
193 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
196 ; SSE41-LABEL: shuffle_v8i16_44440000:
198 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
201 ; AVX-LABEL: shuffle_v8i16_44440000:
203 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
205 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
206 ret <8 x i16> %shuffle
208 define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
209 ; SSE-LABEL: shuffle_v8i16_23016745:
211 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
214 ; AVX-LABEL: shuffle_v8i16_23016745:
216 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,2]
218 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
219 ret <8 x i16> %shuffle
221 define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
222 ; SSE-LABEL: shuffle_v8i16_23026745:
224 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
225 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
228 ; AVX1-LABEL: shuffle_v8i16_23026745:
230 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
231 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
234 ; AVX2-SLOW-LABEL: shuffle_v8i16_23026745:
235 ; AVX2-SLOW: # %bb.0:
236 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
237 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
238 ; AVX2-SLOW-NEXT: retq
240 ; AVX2-FAST-LABEL: shuffle_v8i16_23026745:
241 ; AVX2-FAST: # %bb.0:
242 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
243 ; AVX2-FAST-NEXT: retq
245 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23026745:
246 ; AVX512VL-SLOW: # %bb.0:
247 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
248 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
249 ; AVX512VL-SLOW-NEXT: retq
251 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23026745:
252 ; AVX512VL-FAST: # %bb.0:
253 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
254 ; AVX512VL-FAST-NEXT: retq
255 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
256 ret <8 x i16> %shuffle
258 define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
259 ; SSE-LABEL: shuffle_v8i16_23016747:
261 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
262 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
265 ; AVX1-LABEL: shuffle_v8i16_23016747:
267 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
268 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
271 ; AVX2-SLOW-LABEL: shuffle_v8i16_23016747:
272 ; AVX2-SLOW: # %bb.0:
273 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
274 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
275 ; AVX2-SLOW-NEXT: retq
277 ; AVX2-FAST-LABEL: shuffle_v8i16_23016747:
278 ; AVX2-FAST: # %bb.0:
279 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
280 ; AVX2-FAST-NEXT: retq
282 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23016747:
283 ; AVX512VL-SLOW: # %bb.0:
284 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
285 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
286 ; AVX512VL-SLOW-NEXT: retq
288 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23016747:
289 ; AVX512VL-FAST: # %bb.0:
290 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
291 ; AVX512VL-FAST-NEXT: retq
292 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
293 ret <8 x i16> %shuffle
295 define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
296 ; SSE2-LABEL: shuffle_v8i16_75643120:
298 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
299 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
300 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
303 ; SSSE3-LABEL: shuffle_v8i16_75643120:
305 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
308 ; SSE41-LABEL: shuffle_v8i16_75643120:
310 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
313 ; AVX-LABEL: shuffle_v8i16_75643120:
315 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
317 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
318 ret <8 x i16> %shuffle
321 define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
322 ; SSE2-LABEL: shuffle_v8i16_10545410:
324 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
325 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
326 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
329 ; SSSE3-LABEL: shuffle_v8i16_10545410:
331 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
334 ; SSE41-LABEL: shuffle_v8i16_10545410:
336 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
339 ; AVX-LABEL: shuffle_v8i16_10545410:
341 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
343 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
344 ret <8 x i16> %shuffle
346 define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
347 ; SSE2-LABEL: shuffle_v8i16_54105410:
349 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
350 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
351 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
354 ; SSSE3-LABEL: shuffle_v8i16_54105410:
356 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
359 ; SSE41-LABEL: shuffle_v8i16_54105410:
361 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
364 ; AVX-LABEL: shuffle_v8i16_54105410:
366 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
368 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
369 ret <8 x i16> %shuffle
371 define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
372 ; SSE2-LABEL: shuffle_v8i16_54101054:
374 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
375 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
376 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
379 ; SSSE3-LABEL: shuffle_v8i16_54101054:
381 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
384 ; SSE41-LABEL: shuffle_v8i16_54101054:
386 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
389 ; AVX-LABEL: shuffle_v8i16_54101054:
391 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
393 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
394 ret <8 x i16> %shuffle
396 define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
397 ; SSE2-LABEL: shuffle_v8i16_04400440:
399 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
400 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
401 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
404 ; SSSE3-LABEL: shuffle_v8i16_04400440:
406 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
409 ; SSE41-LABEL: shuffle_v8i16_04400440:
411 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
414 ; AVX-LABEL: shuffle_v8i16_04400440:
416 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
418 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
419 ret <8 x i16> %shuffle
421 define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
422 ; SSE2-LABEL: shuffle_v8i16_40044004:
424 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
425 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
426 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
429 ; SSSE3-LABEL: shuffle_v8i16_40044004:
431 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
434 ; SSE41-LABEL: shuffle_v8i16_40044004:
436 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
439 ; AVX-LABEL: shuffle_v8i16_40044004:
441 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
443 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
444 ret <8 x i16> %shuffle
447 define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
448 ; SSE2-LABEL: shuffle_v8i16_26405173:
450 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
451 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
452 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
453 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
454 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
457 ; SSSE3-LABEL: shuffle_v8i16_26405173:
459 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
462 ; SSE41-LABEL: shuffle_v8i16_26405173:
464 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
467 ; AVX-LABEL: shuffle_v8i16_26405173:
469 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
471 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
472 ret <8 x i16> %shuffle
474 define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
475 ; SSE2-LABEL: shuffle_v8i16_20645173:
477 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
478 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
479 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
480 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
481 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
484 ; SSSE3-LABEL: shuffle_v8i16_20645173:
486 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
489 ; SSE41-LABEL: shuffle_v8i16_20645173:
491 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
494 ; AVX-LABEL: shuffle_v8i16_20645173:
496 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
498 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
499 ret <8 x i16> %shuffle
501 define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
502 ; SSE2-LABEL: shuffle_v8i16_26401375:
504 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
505 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
506 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
507 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
510 ; SSSE3-LABEL: shuffle_v8i16_26401375:
512 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
515 ; SSE41-LABEL: shuffle_v8i16_26401375:
517 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
520 ; AVX-LABEL: shuffle_v8i16_26401375:
522 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
524 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
525 ret <8 x i16> %shuffle
528 define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
529 ; SSE2-LABEL: shuffle_v8i16_66751643:
531 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
532 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
533 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
534 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
535 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
538 ; SSSE3-LABEL: shuffle_v8i16_66751643:
540 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
543 ; SSE41-LABEL: shuffle_v8i16_66751643:
545 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
548 ; AVX-LABEL: shuffle_v8i16_66751643:
550 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
552 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
553 ret <8 x i16> %shuffle
556 define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
557 ; SSE2-LABEL: shuffle_v8i16_60514754:
559 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
560 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
561 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
562 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
565 ; SSSE3-LABEL: shuffle_v8i16_60514754:
567 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
570 ; SSE41-LABEL: shuffle_v8i16_60514754:
572 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
575 ; AVX-LABEL: shuffle_v8i16_60514754:
577 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
579 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
580 ret <8 x i16> %shuffle
583 define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
584 ; SSE2-LABEL: shuffle_v8i16_00444444:
586 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
587 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
588 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
591 ; SSSE3-LABEL: shuffle_v8i16_00444444:
593 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
596 ; SSE41-LABEL: shuffle_v8i16_00444444:
598 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
601 ; AVX-LABEL: shuffle_v8i16_00444444:
603 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
605 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
606 ret <8 x i16> %shuffle
608 define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
609 ; SSE2-LABEL: shuffle_v8i16_44004444:
611 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
612 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
613 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
616 ; SSSE3-LABEL: shuffle_v8i16_44004444:
618 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
621 ; SSE41-LABEL: shuffle_v8i16_44004444:
623 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
626 ; AVX-LABEL: shuffle_v8i16_44004444:
628 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
630 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
631 ret <8 x i16> %shuffle
633 define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
634 ; SSE2-LABEL: shuffle_v8i16_04404444:
636 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
637 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
638 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
641 ; SSSE3-LABEL: shuffle_v8i16_04404444:
643 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
646 ; SSE41-LABEL: shuffle_v8i16_04404444:
648 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
651 ; AVX-LABEL: shuffle_v8i16_04404444:
653 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
655 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
656 ret <8 x i16> %shuffle
658 define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
659 ; SSE2-LABEL: shuffle_v8i16_04400000:
661 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
662 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
663 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
666 ; SSSE3-LABEL: shuffle_v8i16_04400000:
668 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
671 ; SSE41-LABEL: shuffle_v8i16_04400000:
673 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
676 ; AVX-LABEL: shuffle_v8i16_04400000:
678 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
680 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
681 ret <8 x i16> %shuffle
683 define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
684 ; SSE-LABEL: shuffle_v8i16_04404567:
686 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
687 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
690 ; AVX1-LABEL: shuffle_v8i16_04404567:
692 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
693 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
696 ; AVX2-SLOW-LABEL: shuffle_v8i16_04404567:
697 ; AVX2-SLOW: # %bb.0:
698 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
699 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
700 ; AVX2-SLOW-NEXT: retq
702 ; AVX2-FAST-LABEL: shuffle_v8i16_04404567:
703 ; AVX2-FAST: # %bb.0:
704 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
705 ; AVX2-FAST-NEXT: retq
707 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_04404567:
708 ; AVX512VL-SLOW: # %bb.0:
709 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
710 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
711 ; AVX512VL-SLOW-NEXT: retq
713 ; AVX512VL-FAST-LABEL: shuffle_v8i16_04404567:
714 ; AVX512VL-FAST: # %bb.0:
715 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
716 ; AVX512VL-FAST-NEXT: retq
717 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
718 ret <8 x i16> %shuffle
721 define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
722 ; SSE2-LABEL: shuffle_v8i16_0X444444:
724 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
725 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
726 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
729 ; SSSE3-LABEL: shuffle_v8i16_0X444444:
731 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
734 ; SSE41-LABEL: shuffle_v8i16_0X444444:
736 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
739 ; AVX-LABEL: shuffle_v8i16_0X444444:
741 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
743 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
744 ret <8 x i16> %shuffle
746 define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
747 ; SSE2-LABEL: shuffle_v8i16_44X04444:
749 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
750 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
751 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
754 ; SSSE3-LABEL: shuffle_v8i16_44X04444:
756 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
759 ; SSE41-LABEL: shuffle_v8i16_44X04444:
761 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
764 ; AVX-LABEL: shuffle_v8i16_44X04444:
766 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
768 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
769 ret <8 x i16> %shuffle
771 define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
772 ; SSE2-LABEL: shuffle_v8i16_X4404444:
774 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
775 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
776 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
779 ; SSSE3-LABEL: shuffle_v8i16_X4404444:
781 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
784 ; SSE41-LABEL: shuffle_v8i16_X4404444:
786 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
789 ; AVX-LABEL: shuffle_v8i16_X4404444:
791 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
793 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
794 ret <8 x i16> %shuffle
797 define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
798 ; SSE2-LABEL: shuffle_v8i16_0127XXXX:
800 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
801 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
802 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
805 ; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
807 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
810 ; SSE41-LABEL: shuffle_v8i16_0127XXXX:
812 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
815 ; AVX-LABEL: shuffle_v8i16_0127XXXX:
817 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
819 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
820 ret <8 x i16> %shuffle
823 define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
824 ; SSE2-LABEL: shuffle_v8i16_XXXX4563:
826 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
827 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
828 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
831 ; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
833 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
836 ; SSE41-LABEL: shuffle_v8i16_XXXX4563:
838 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
841 ; AVX-LABEL: shuffle_v8i16_XXXX4563:
843 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
845 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
846 ret <8 x i16> %shuffle
849 define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
850 ; SSE2-LABEL: shuffle_v8i16_4563XXXX:
852 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
853 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
854 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
857 ; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
859 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
862 ; SSE41-LABEL: shuffle_v8i16_4563XXXX:
864 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
867 ; AVX-LABEL: shuffle_v8i16_4563XXXX:
869 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
871 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
872 ret <8 x i16> %shuffle
875 define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
876 ; SSE2-LABEL: shuffle_v8i16_01274563:
878 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
879 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
880 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
883 ; SSSE3-LABEL: shuffle_v8i16_01274563:
885 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
888 ; SSE41-LABEL: shuffle_v8i16_01274563:
890 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
893 ; AVX-LABEL: shuffle_v8i16_01274563:
895 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
897 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
898 ret <8 x i16> %shuffle
901 define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
902 ; SSE2-LABEL: shuffle_v8i16_45630127:
904 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
905 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
906 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
909 ; SSSE3-LABEL: shuffle_v8i16_45630127:
911 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
914 ; SSE41-LABEL: shuffle_v8i16_45630127:
916 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
919 ; AVX-LABEL: shuffle_v8i16_45630127:
921 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
923 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
924 ret <8 x i16> %shuffle
927 define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
928 ; SSE2-LABEL: shuffle_v8i16_37102735:
930 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
931 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
932 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
933 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
934 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
935 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
938 ; SSSE3-LABEL: shuffle_v8i16_37102735:
940 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
943 ; SSE41-LABEL: shuffle_v8i16_37102735:
945 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
948 ; AVX-LABEL: shuffle_v8i16_37102735:
950 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
952 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
953 ret <8 x i16> %shuffle
956 define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
957 ; SSE-LABEL: shuffle_v8i16_08192a3b:
959 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
962 ; AVX-LABEL: shuffle_v8i16_08192a3b:
964 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
966 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
967 ret <8 x i16> %shuffle
970 define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
971 ; SSE-LABEL: shuffle_v8i16_0c1d2e3f:
973 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
974 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
977 ; AVX1OR2-LABEL: shuffle_v8i16_0c1d2e3f:
979 ; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
980 ; AVX1OR2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
983 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_0c1d2e3f:
984 ; AVX512VL-SLOW: # %bb.0:
985 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
986 ; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
987 ; AVX512VL-SLOW-NEXT: retq
989 ; AVX512VL-FAST-LABEL: shuffle_v8i16_0c1d2e3f:
990 ; AVX512VL-FAST: # %bb.0:
991 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,12,1,13,2,14,3,15]
992 ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
993 ; AVX512VL-FAST-NEXT: retq
994 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
995 ret <8 x i16> %shuffle
998 define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
999 ; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
1001 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1004 ; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
1006 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1008 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1009 ret <8 x i16> %shuffle
1012 define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
1013 ; SSE-LABEL: shuffle_v8i16_48596a7b:
1015 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1016 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1019 ; AVX1OR2-LABEL: shuffle_v8i16_48596a7b:
1021 ; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1022 ; AVX1OR2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1023 ; AVX1OR2-NEXT: retq
1025 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_48596a7b:
1026 ; AVX512VL-SLOW: # %bb.0:
1027 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1028 ; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1029 ; AVX512VL-SLOW-NEXT: retq
1031 ; AVX512VL-FAST-LABEL: shuffle_v8i16_48596a7b:
1032 ; AVX512VL-FAST: # %bb.0:
1033 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8,5,9,6,10,7,11]
1034 ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1035 ; AVX512VL-FAST-NEXT: retq
1036 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
1037 ret <8 x i16> %shuffle
1040 define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
1041 ; SSE-LABEL: shuffle_v8i16_08196e7f:
1043 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1044 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1045 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1048 ; AVX1OR2-LABEL: shuffle_v8i16_08196e7f:
1050 ; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1051 ; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1052 ; AVX1OR2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1053 ; AVX1OR2-NEXT: retq
1055 ; AVX512VL-LABEL: shuffle_v8i16_08196e7f:
1056 ; AVX512VL: # %bb.0:
1057 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,8,1,9,6,14,7,15]
1058 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1059 ; AVX512VL-NEXT: retq
1060 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
1061 ret <8 x i16> %shuffle
1064 define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
1065 ; SSE-LABEL: shuffle_v8i16_0c1d6879:
1067 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1068 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1069 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1072 ; AVX1OR2-LABEL: shuffle_v8i16_0c1d6879:
1074 ; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1075 ; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1076 ; AVX1OR2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1077 ; AVX1OR2-NEXT: retq
1079 ; AVX512VL-LABEL: shuffle_v8i16_0c1d6879:
1080 ; AVX512VL: # %bb.0:
1081 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,12,1,13,6,8,7,9]
1082 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1083 ; AVX512VL-NEXT: retq
1084 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
1085 ret <8 x i16> %shuffle
1088 define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
1089 ; SSE-LABEL: shuffle_v8i16_109832ba:
1091 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1092 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1093 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1096 ; AVX1-LABEL: shuffle_v8i16_109832ba:
1098 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1099 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1100 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1103 ; AVX2-SLOW-LABEL: shuffle_v8i16_109832ba:
1104 ; AVX2-SLOW: # %bb.0:
1105 ; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1106 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1107 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1108 ; AVX2-SLOW-NEXT: retq
1110 ; AVX2-FAST-LABEL: shuffle_v8i16_109832ba:
1111 ; AVX2-FAST: # %bb.0:
1112 ; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1113 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
1114 ; AVX2-FAST-NEXT: retq
1116 ; AVX512VL-LABEL: shuffle_v8i16_109832ba:
1117 ; AVX512VL: # %bb.0:
1118 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,9,8,3,2,11,10]
1119 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1120 ; AVX512VL-NEXT: retq
1121 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
1122 ret <8 x i16> %shuffle
1125 define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
1126 ; SSE-LABEL: shuffle_v8i16_8091a2b3:
1128 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1129 ; SSE-NEXT: movdqa %xmm1, %xmm0
1132 ; AVX-LABEL: shuffle_v8i16_8091a2b3:
1134 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1136 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
1137 ret <8 x i16> %shuffle
1139 define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
1140 ; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
1142 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1143 ; SSE-NEXT: movdqa %xmm1, %xmm0
1146 ; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
1148 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1150 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
1151 ret <8 x i16> %shuffle
1154 define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
1155 ; SSE2-LABEL: shuffle_v8i16_0213cedf:
1157 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
1158 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
1159 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1162 ; SSSE3-LABEL: shuffle_v8i16_0213cedf:
1164 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
1165 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
1166 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1169 ; SSE41-LABEL: shuffle_v8i16_0213cedf:
1171 ; SSE41-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1172 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1173 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1176 ; AVX1-LABEL: shuffle_v8i16_0213cedf:
1178 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1179 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1180 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1183 ; AVX2-SLOW-LABEL: shuffle_v8i16_0213cedf:
1184 ; AVX2-SLOW: # %bb.0:
1185 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1186 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1187 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1188 ; AVX2-SLOW-NEXT: retq
1190 ; AVX2-FAST-LABEL: shuffle_v8i16_0213cedf:
1191 ; AVX2-FAST: # %bb.0:
1192 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15]
1193 ; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1194 ; AVX2-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1195 ; AVX2-FAST-NEXT: retq
1197 ; AVX512VL-LABEL: shuffle_v8i16_0213cedf:
1198 ; AVX512VL: # %bb.0:
1199 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2,1,3,12,14,13,15]
1200 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1201 ; AVX512VL-NEXT: retq
1202 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
1203 ret <8 x i16> %shuffle
1206 define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
1207 ; SSE2-LABEL: shuffle_v8i16_443aXXXX:
1209 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
1210 ; SSE2-NEXT: pand %xmm2, %xmm0
1211 ; SSE2-NEXT: pandn %xmm1, %xmm2
1212 ; SSE2-NEXT: por %xmm0, %xmm2
1213 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
1214 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1217 ; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
1219 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
1220 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1221 ; SSSE3-NEXT: por %xmm1, %xmm0
1224 ; SSE41-LABEL: shuffle_v8i16_443aXXXX:
1226 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1227 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1228 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1231 ; AVX1-LABEL: shuffle_v8i16_443aXXXX:
1233 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1234 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1235 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1238 ; AVX2-SLOW-LABEL: shuffle_v8i16_443aXXXX:
1239 ; AVX2-SLOW: # %bb.0:
1240 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1241 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1242 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1243 ; AVX2-SLOW-NEXT: retq
1245 ; AVX2-FAST-LABEL: shuffle_v8i16_443aXXXX:
1246 ; AVX2-FAST: # %bb.0:
1247 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1248 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
1249 ; AVX2-FAST-NEXT: retq
1251 ; AVX512VL-LABEL: shuffle_v8i16_443aXXXX:
1252 ; AVX512VL: # %bb.0:
1253 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [4,4,3,10,4,5,6,7]
1254 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1255 ; AVX512VL-NEXT: retq
1256 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
1257 ret <8 x i16> %shuffle
1260 define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
1261 ; SSE2-LABEL: shuffle_v8i16_032dXXXX:
1263 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1264 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,0]
1265 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
1266 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1267 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1270 ; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
1272 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1273 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1274 ; SSSE3-NEXT: por %xmm1, %xmm0
1277 ; SSE41-LABEL: shuffle_v8i16_032dXXXX:
1279 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1280 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1283 ; AVX1-LABEL: shuffle_v8i16_032dXXXX:
1285 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1286 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1289 ; AVX2-LABEL: shuffle_v8i16_032dXXXX:
1291 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1292 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1295 ; AVX512VL-LABEL: shuffle_v8i16_032dXXXX:
1296 ; AVX512VL: # %bb.0:
1297 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,3,2,13,0,13,0,1]
1298 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1299 ; AVX512VL-NEXT: retq
1300 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1301 ret <8 x i16> %shuffle
1303 define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
1304 ; SSE-LABEL: shuffle_v8i16_XXXdXXXX:
1306 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1309 ; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
1311 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,2,3,3]
1313 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1314 ret <8 x i16> %shuffle
1317 define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
1318 ; SSE2-LABEL: shuffle_v8i16_012dXXXX:
1320 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1321 ; SSE2-NEXT: pand %xmm2, %xmm0
1322 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1323 ; SSE2-NEXT: pandn %xmm1, %xmm2
1324 ; SSE2-NEXT: por %xmm2, %xmm0
1327 ; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
1329 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1330 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1331 ; SSSE3-NEXT: por %xmm1, %xmm0
1334 ; SSE41-LABEL: shuffle_v8i16_012dXXXX:
1336 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1337 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1340 ; AVX1OR2-LABEL: shuffle_v8i16_012dXXXX:
1342 ; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1343 ; AVX1OR2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1344 ; AVX1OR2-NEXT: retq
1346 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_012dXXXX:
1347 ; AVX512VL-SLOW: # %bb.0:
1348 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1349 ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1350 ; AVX512VL-SLOW-NEXT: retq
1352 ; AVX512VL-FAST-LABEL: shuffle_v8i16_012dXXXX:
1353 ; AVX512VL-FAST: # %bb.0:
1354 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,13,4,5,6,7]
1355 ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1356 ; AVX512VL-FAST-NEXT: retq
1357 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1358 ret <8 x i16> %shuffle
1361 define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
1362 ; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
1364 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
1365 ; SSE2-NEXT: pand %xmm2, %xmm1
1366 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1367 ; SSE2-NEXT: pandn %xmm0, %xmm2
1368 ; SSE2-NEXT: por %xmm1, %xmm2
1369 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1372 ; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
1374 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
1375 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
1376 ; SSSE3-NEXT: por %xmm1, %xmm0
1379 ; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
1381 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1382 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1385 ; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
1387 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1388 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1391 ; AVX2-LABEL: shuffle_v8i16_XXXXcde3:
1393 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1394 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1397 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXXXcde3:
1398 ; AVX512VL-SLOW: # %bb.0:
1399 ; AVX512VL-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
1400 ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1401 ; AVX512VL-SLOW-NEXT: retq
1403 ; AVX512VL-FAST-LABEL: shuffle_v8i16_XXXXcde3:
1404 ; AVX512VL-FAST: # %bb.0:
1405 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,11]
1406 ; AVX512VL-FAST-NEXT: vpermi2w %xmm0, %xmm1, %xmm2
1407 ; AVX512VL-FAST-NEXT: vmovdqa %xmm2, %xmm0
1408 ; AVX512VL-FAST-NEXT: retq
1409 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
1410 ret <8 x i16> %shuffle
1413 define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
1414 ; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
1416 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1417 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1418 ; SSE2-NEXT: pand %xmm2, %xmm1
1419 ; SSE2-NEXT: pandn %xmm0, %xmm2
1420 ; SSE2-NEXT: por %xmm1, %xmm2
1421 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1424 ; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
1426 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
1427 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
1428 ; SSSE3-NEXT: por %xmm1, %xmm0
1431 ; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
1433 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1434 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1437 ; AVX1OR2-LABEL: shuffle_v8i16_cde3XXXX:
1439 ; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1440 ; AVX1OR2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1441 ; AVX1OR2-NEXT: retq
1443 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_cde3XXXX:
1444 ; AVX512VL-SLOW: # %bb.0:
1445 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1446 ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1447 ; AVX512VL-SLOW-NEXT: retq
1449 ; AVX512VL-FAST-LABEL: shuffle_v8i16_cde3XXXX:
1450 ; AVX512VL-FAST: # %bb.0:
1451 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,11,0,1,2,3]
1452 ; AVX512VL-FAST-NEXT: vpermi2w %xmm0, %xmm1, %xmm2
1453 ; AVX512VL-FAST-NEXT: vmovdqa %xmm2, %xmm0
1454 ; AVX512VL-FAST-NEXT: retq
1455 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1456 ret <8 x i16> %shuffle
1459 define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
1460 ; SSE2-LABEL: shuffle_v8i16_012dcde3:
1462 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1463 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,1]
1464 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
1465 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
1466 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1467 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
1468 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
1469 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
1472 ; SSSE3-LABEL: shuffle_v8i16_012dcde3:
1474 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
1475 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
1476 ; SSSE3-NEXT: por %xmm1, %xmm0
1479 ; SSE41-LABEL: shuffle_v8i16_012dcde3:
1481 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1482 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1485 ; AVX1-LABEL: shuffle_v8i16_012dcde3:
1487 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1488 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1491 ; AVX2-LABEL: shuffle_v8i16_012dcde3:
1493 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1494 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1497 ; AVX512VL-LABEL: shuffle_v8i16_012dcde3:
1498 ; AVX512VL: # %bb.0:
1499 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,13,12,13,14,3]
1500 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1501 ; AVX512VL-NEXT: retq
1502 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
1503 ret <8 x i16> %shuffle
1506 define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) {
1507 ; SSE2-LABEL: shuffle_v8i16_0923cde7:
1509 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1510 ; SSE2-NEXT: andps %xmm2, %xmm0
1511 ; SSE2-NEXT: andnps %xmm1, %xmm2
1512 ; SSE2-NEXT: orps %xmm2, %xmm0
1515 ; SSSE3-LABEL: shuffle_v8i16_0923cde7:
1517 ; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1518 ; SSSE3-NEXT: andps %xmm2, %xmm0
1519 ; SSSE3-NEXT: andnps %xmm1, %xmm2
1520 ; SSSE3-NEXT: orps %xmm2, %xmm0
1523 ; SSE41-LABEL: shuffle_v8i16_0923cde7:
1525 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1528 ; AVX-LABEL: shuffle_v8i16_0923cde7:
1530 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1532 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
1533 ret <8 x i16> %shuffle
1536 define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
1537 ; SSE2-LABEL: shuffle_v8i16_XXX1X579:
1539 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0]
1540 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
1541 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1542 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1543 ; SSE2-NEXT: pand %xmm1, %xmm0
1544 ; SSE2-NEXT: pandn %xmm2, %xmm1
1545 ; SSE2-NEXT: por %xmm0, %xmm1
1546 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1549 ; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
1551 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
1552 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
1553 ; SSSE3-NEXT: por %xmm1, %xmm0
1556 ; SSE41-LABEL: shuffle_v8i16_XXX1X579:
1558 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1559 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1560 ; SSE41-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1561 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1564 ; AVX1-LABEL: shuffle_v8i16_XXX1X579:
1566 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1567 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1568 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1569 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1572 ; AVX2-SLOW-LABEL: shuffle_v8i16_XXX1X579:
1573 ; AVX2-SLOW: # %bb.0:
1574 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1
1575 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1576 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1577 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1578 ; AVX2-SLOW-NEXT: retq
1580 ; AVX2-FAST-LABEL: shuffle_v8i16_XXX1X579:
1581 ; AVX2-FAST: # %bb.0:
1582 ; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
1583 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15]
1584 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1585 ; AVX2-FAST-NEXT: retq
1587 ; AVX512VL-LABEL: shuffle_v8i16_XXX1X579:
1588 ; AVX512VL: # %bb.0:
1589 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,1,4,5,7,9]
1590 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1591 ; AVX512VL-NEXT: retq
1592 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
1593 ret <8 x i16> %shuffle
1596 define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
1597 ; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
1599 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
1600 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1601 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
1602 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2],xmm1[2,3]
1605 ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
1607 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
1608 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
1609 ; SSSE3-NEXT: por %xmm1, %xmm0
1612 ; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
1614 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1615 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1616 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1619 ; AVX1-LABEL: shuffle_v8i16_XX4X8acX:
1621 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1622 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1623 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1626 ; AVX2-LABEL: shuffle_v8i16_XX4X8acX:
1628 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1629 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1630 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1633 ; AVX512VL-LABEL: shuffle_v8i16_XX4X8acX:
1634 ; AVX512VL: # %bb.0:
1635 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,4,5,8,10,12,10]
1636 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1637 ; AVX512VL-NEXT: retq
1638 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
1639 ret <8 x i16> %shuffle
1642 define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
1643 ; SSE-LABEL: shuffle_v8i16_8zzzzzzz:
1645 ; SSE-NEXT: movzwl %di, %eax
1646 ; SSE-NEXT: movd %eax, %xmm0
1649 ; AVX-LABEL: shuffle_v8i16_8zzzzzzz:
1651 ; AVX-NEXT: movzwl %di, %eax
1652 ; AVX-NEXT: vmovd %eax, %xmm0
1654 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1655 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1656 ret <8 x i16> %shuffle
1659 define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
1660 ; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
1662 ; SSE-NEXT: pxor %xmm0, %xmm0
1663 ; SSE-NEXT: pinsrw $1, %edi, %xmm0
1666 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1668 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1669 ; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
1671 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1672 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
1673 ret <8 x i16> %shuffle
1676 define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
1677 ; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
1679 ; SSE-NEXT: pxor %xmm0, %xmm0
1680 ; SSE-NEXT: pinsrw $5, %edi, %xmm0
1683 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1685 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1686 ; AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
1688 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1689 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
1690 ret <8 x i16> %shuffle
1693 define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
1694 ; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
1696 ; SSE-NEXT: pxor %xmm0, %xmm0
1697 ; SSE-NEXT: pinsrw $7, %edi, %xmm0
1700 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1702 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1703 ; AVX-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0
1705 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1706 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
1707 ret <8 x i16> %shuffle
1710 define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
1711 ; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
1713 ; SSE-NEXT: pxor %xmm0, %xmm0
1714 ; SSE-NEXT: pinsrw $2, %edi, %xmm0
1717 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1719 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1720 ; AVX-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0
1722 %a = insertelement <8 x i16> undef, i16 %i, i32 3
1723 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
1724 ret <8 x i16> %shuffle
1727 define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
1728 ; SSE2-LABEL: shuffle_v8i16_def01234:
1730 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1731 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1732 ; SSE2-NEXT: por %xmm1, %xmm0
1735 ; SSSE3-LABEL: shuffle_v8i16_def01234:
1737 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1740 ; SSE41-LABEL: shuffle_v8i16_def01234:
1742 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1745 ; AVX-LABEL: shuffle_v8i16_def01234:
1747 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1749 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
1750 ret <8 x i16> %shuffle
1753 define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
1754 ; SSE2-LABEL: shuffle_v8i16_ueuu123u:
1756 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1757 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1758 ; SSE2-NEXT: por %xmm1, %xmm0
1761 ; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
1763 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1766 ; SSE41-LABEL: shuffle_v8i16_ueuu123u:
1768 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1771 ; AVX-LABEL: shuffle_v8i16_ueuu123u:
1773 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1775 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1776 ret <8 x i16> %shuffle
1779 define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
1780 ; SSE2-LABEL: shuffle_v8i16_56701234:
1782 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1783 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1784 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1785 ; SSE2-NEXT: por %xmm1, %xmm0
1788 ; SSSE3-LABEL: shuffle_v8i16_56701234:
1790 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1793 ; SSE41-LABEL: shuffle_v8i16_56701234:
1795 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1798 ; AVX-LABEL: shuffle_v8i16_56701234:
1800 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1802 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
1803 ret <8 x i16> %shuffle
1806 define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
1807 ; SSE2-LABEL: shuffle_v8i16_u6uu123u:
1809 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1810 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1811 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1812 ; SSE2-NEXT: por %xmm1, %xmm0
1815 ; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
1817 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1820 ; SSE41-LABEL: shuffle_v8i16_u6uu123u:
1822 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1825 ; AVX-LABEL: shuffle_v8i16_u6uu123u:
1827 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1829 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1830 ret <8 x i16> %shuffle
1833 define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
1834 ; SSE-LABEL: shuffle_v8i16_uuuu123u:
1836 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1839 ; AVX-LABEL: shuffle_v8i16_uuuu123u:
1841 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1843 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1844 ret <8 x i16> %shuffle
1847 define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
1848 ; SSE2-LABEL: shuffle_v8i16_bcdef012:
1850 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1851 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1852 ; SSE2-NEXT: por %xmm1, %xmm0
1855 ; SSSE3-LABEL: shuffle_v8i16_bcdef012:
1857 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1860 ; SSE41-LABEL: shuffle_v8i16_bcdef012:
1862 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1865 ; AVX-LABEL: shuffle_v8i16_bcdef012:
1867 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1869 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
1870 ret <8 x i16> %shuffle
1873 define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
1874 ; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
1876 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1877 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1878 ; SSE2-NEXT: por %xmm1, %xmm0
1881 ; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
1883 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1886 ; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
1888 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1891 ; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
1893 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1895 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
1896 ret <8 x i16> %shuffle
1899 define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
1900 ; SSE2-LABEL: shuffle_v8i16_34567012:
1902 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1903 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1904 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1905 ; SSE2-NEXT: por %xmm1, %xmm0
1908 ; SSSE3-LABEL: shuffle_v8i16_34567012:
1910 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1913 ; SSE41-LABEL: shuffle_v8i16_34567012:
1915 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1918 ; AVX-LABEL: shuffle_v8i16_34567012:
1920 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1922 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
1923 ret <8 x i16> %shuffle
1926 define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
1927 ; SSE2-LABEL: shuffle_v8i16_u456uu1u:
1929 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1930 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1931 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1932 ; SSE2-NEXT: por %xmm1, %xmm0
1935 ; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
1937 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1940 ; SSE41-LABEL: shuffle_v8i16_u456uu1u:
1942 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1945 ; AVX-LABEL: shuffle_v8i16_u456uu1u:
1947 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1949 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
1950 ret <8 x i16> %shuffle
1953 define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
1954 ; SSE-LABEL: shuffle_v8i16_u456uuuu:
1956 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1959 ; AVX-LABEL: shuffle_v8i16_u456uuuu:
1961 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1963 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
1964 ret <8 x i16> %shuffle
1967 define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
1968 ; SSE2-LABEL: shuffle_v8i16_3456789a:
1970 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1971 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1972 ; SSE2-NEXT: por %xmm1, %xmm0
1975 ; SSSE3-LABEL: shuffle_v8i16_3456789a:
1977 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1978 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1981 ; SSE41-LABEL: shuffle_v8i16_3456789a:
1983 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1984 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1987 ; AVX-LABEL: shuffle_v8i16_3456789a:
1989 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1991 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
1992 ret <8 x i16> %shuffle
1995 define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
1996 ; SSE2-LABEL: shuffle_v8i16_u456uu9u:
1998 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1999 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
2000 ; SSE2-NEXT: por %xmm1, %xmm0
2003 ; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
2005 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2006 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2009 ; SSE41-LABEL: shuffle_v8i16_u456uu9u:
2011 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2012 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2015 ; AVX-LABEL: shuffle_v8i16_u456uu9u:
2017 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2019 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
2020 ret <8 x i16> %shuffle
2023 define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
2024 ; SSE2-LABEL: shuffle_v8i16_56789abc:
2026 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2027 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
2028 ; SSE2-NEXT: por %xmm1, %xmm0
2031 ; SSSE3-LABEL: shuffle_v8i16_56789abc:
2033 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2034 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2037 ; SSE41-LABEL: shuffle_v8i16_56789abc:
2039 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2040 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2043 ; AVX-LABEL: shuffle_v8i16_56789abc:
2045 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2047 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
2048 ret <8 x i16> %shuffle
2051 define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
2052 ; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
2054 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2055 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
2056 ; SSE2-NEXT: por %xmm1, %xmm0
2059 ; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
2061 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2062 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2065 ; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
2067 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2068 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2071 ; AVX-LABEL: shuffle_v8i16_u6uu9abu:
2073 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2075 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
2076 ret <8 x i16> %shuffle
2079 define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
2080 ; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
2082 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2083 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
2086 ; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
2088 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2089 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
2092 ; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
2094 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2097 ; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
2099 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2101 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
2102 ret <8 x i16> %shuffle
2105 define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
2106 ; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
2108 ; SSE2-NEXT: pxor %xmm1, %xmm1
2109 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2110 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2113 ; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
2115 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2116 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2117 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2120 ; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
2122 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2125 ; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
2127 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2129 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
2130 ret <8 x i16> %shuffle
2133 define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
2134 ; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
2136 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2139 ; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
2141 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2144 ; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
2146 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2149 ; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
2151 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2153 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
2154 ret <8 x i16> %shuffle
2157 define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
2158 ; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
2160 ; SSE2-NEXT: pxor %xmm1, %xmm1
2161 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2164 ; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
2166 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2167 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2170 ; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
2172 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2175 ; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
2177 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2179 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
2180 ret <8 x i16> %shuffle
2183 define <8 x i16> @shuffle_v8i16_01100110(<8 x i16> %a) {
2184 ; SSE-LABEL: shuffle_v8i16_01100110:
2186 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2187 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2190 ; AVX1-LABEL: shuffle_v8i16_01100110:
2192 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2193 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2196 ; AVX2-SLOW-LABEL: shuffle_v8i16_01100110:
2197 ; AVX2-SLOW: # %bb.0:
2198 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2199 ; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
2200 ; AVX2-SLOW-NEXT: retq
2202 ; AVX2-FAST-LABEL: shuffle_v8i16_01100110:
2203 ; AVX2-FAST: # %bb.0:
2204 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2205 ; AVX2-FAST-NEXT: retq
2207 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_01100110:
2208 ; AVX512VL-SLOW: # %bb.0:
2209 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2210 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2211 ; AVX512VL-SLOW-NEXT: retq
2213 ; AVX512VL-FAST-LABEL: shuffle_v8i16_01100110:
2214 ; AVX512VL-FAST: # %bb.0:
2215 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2216 ; AVX512VL-FAST-NEXT: retq
2217 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0>
2218 ret <8 x i16> %shuffle
2221 define <8 x i16> @shuffle_v8i16_01u0u110(<8 x i16> %a) {
2222 ; SSE-LABEL: shuffle_v8i16_01u0u110:
2224 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2225 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2228 ; AVX1-LABEL: shuffle_v8i16_01u0u110:
2230 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2231 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2234 ; AVX2-SLOW-LABEL: shuffle_v8i16_01u0u110:
2235 ; AVX2-SLOW: # %bb.0:
2236 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2237 ; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
2238 ; AVX2-SLOW-NEXT: retq
2240 ; AVX2-FAST-LABEL: shuffle_v8i16_01u0u110:
2241 ; AVX2-FAST: # %bb.0:
2242 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2243 ; AVX2-FAST-NEXT: retq
2245 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_01u0u110:
2246 ; AVX512VL-SLOW: # %bb.0:
2247 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2248 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2249 ; AVX512VL-SLOW-NEXT: retq
2251 ; AVX512VL-FAST-LABEL: shuffle_v8i16_01u0u110:
2252 ; AVX512VL-FAST: # %bb.0:
2253 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2254 ; AVX512VL-FAST-NEXT: retq
2255 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 1, i32 0>
2256 ret <8 x i16> %shuffle
2259 define <8 x i16> @shuffle_v8i16_467uu675(<8 x i16> %a) {
2260 ; SSE-LABEL: shuffle_v8i16_467uu675:
2262 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2263 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2266 ; AVX1-LABEL: shuffle_v8i16_467uu675:
2268 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2269 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2272 ; AVX2-SLOW-LABEL: shuffle_v8i16_467uu675:
2273 ; AVX2-SLOW: # %bb.0:
2274 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2275 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2276 ; AVX2-SLOW-NEXT: retq
2278 ; AVX2-FAST-LABEL: shuffle_v8i16_467uu675:
2279 ; AVX2-FAST: # %bb.0:
2280 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
2281 ; AVX2-FAST-NEXT: retq
2283 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_467uu675:
2284 ; AVX512VL-SLOW: # %bb.0:
2285 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2286 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2287 ; AVX512VL-SLOW-NEXT: retq
2289 ; AVX512VL-FAST-LABEL: shuffle_v8i16_467uu675:
2290 ; AVX512VL-FAST: # %bb.0:
2291 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
2292 ; AVX512VL-FAST-NEXT: retq
2293 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5>
2294 ret <8 x i16> %shuffle
2298 ; Shuffle to logical bit shifts
2300 define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
2301 ; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
2303 ; SSE-NEXT: pslld $16, %xmm0
2306 ; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
2308 ; AVX-NEXT: vpslld $16, %xmm0, %xmm0
2310 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
2311 ret <8 x i16> %shuffle
2314 define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
2315 ; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
2317 ; SSE-NEXT: psllq $48, %xmm0
2320 ; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
2322 ; AVX-NEXT: vpsllq $48, %xmm0, %xmm0
2324 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
2325 ret <8 x i16> %shuffle
2328 define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
2329 ; SSE-LABEL: shuffle_v8i16_zz01zX4X:
2331 ; SSE-NEXT: psllq $32, %xmm0
2334 ; AVX-LABEL: shuffle_v8i16_zz01zX4X:
2336 ; AVX-NEXT: vpsllq $32, %xmm0, %xmm0
2338 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
2339 ret <8 x i16> %shuffle
2342 define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
2343 ; SSE-LABEL: shuffle_v8i16_z0X2z456:
2345 ; SSE-NEXT: psllq $16, %xmm0
2348 ; AVX-LABEL: shuffle_v8i16_z0X2z456:
2350 ; AVX-NEXT: vpsllq $16, %xmm0, %xmm0
2352 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
2353 ret <8 x i16> %shuffle
2356 define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
2357 ; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
2359 ; SSE-NEXT: psrld $16, %xmm0
2362 ; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
2364 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
2366 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
2367 ret <8 x i16> %shuffle
2370 define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
2371 ; SSE-LABEL: shuffle_v8i16_1X3z567z:
2373 ; SSE-NEXT: psrlq $16, %xmm0
2376 ; AVX-LABEL: shuffle_v8i16_1X3z567z:
2378 ; AVX-NEXT: vpsrlq $16, %xmm0, %xmm0
2380 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
2381 ret <8 x i16> %shuffle
2384 define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
2385 ; SSE-LABEL: shuffle_v8i16_23zz67zz:
2387 ; SSE-NEXT: psrlq $32, %xmm0
2390 ; AVX-LABEL: shuffle_v8i16_23zz67zz:
2392 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
2394 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
2395 ret <8 x i16> %shuffle
2398 define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
2399 ; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
2401 ; SSE-NEXT: psrlq $48, %xmm0
2404 ; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
2406 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
2408 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
2409 ret <8 x i16> %shuffle
2412 define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
2413 ; SSE-LABEL: shuffle_v8i16_01u3zzuz:
2415 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2418 ; AVX-LABEL: shuffle_v8i16_01u3zzuz:
2420 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2422 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
2423 ret <8 x i16> %shuffle
2426 define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) {
2427 ; SSE2-LABEL: shuffle_v8i16_0z234567:
2429 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2432 ; SSSE3-LABEL: shuffle_v8i16_0z234567:
2434 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2437 ; SSE41-LABEL: shuffle_v8i16_0z234567:
2439 ; SSE41-NEXT: pxor %xmm1, %xmm1
2440 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2443 ; AVX-LABEL: shuffle_v8i16_0z234567:
2445 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2446 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2448 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2449 ret <8 x i16> %shuffle
2452 define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) {
2453 ; SSE2-LABEL: shuffle_v8i16_0zzzz5z7:
2455 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2458 ; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7:
2460 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2463 ; SSE41-LABEL: shuffle_v8i16_0zzzz5z7:
2465 ; SSE41-NEXT: pxor %xmm1, %xmm1
2466 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2469 ; AVX-LABEL: shuffle_v8i16_0zzzz5z7:
2471 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2472 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2474 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
2475 ret <8 x i16> %shuffle
2478 define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
2479 ; SSE2-LABEL: shuffle_v8i16_0123456z:
2481 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2484 ; SSSE3-LABEL: shuffle_v8i16_0123456z:
2486 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2489 ; SSE41-LABEL: shuffle_v8i16_0123456z:
2491 ; SSE41-NEXT: pxor %xmm1, %xmm1
2492 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2495 ; AVX-LABEL: shuffle_v8i16_0123456z:
2497 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2498 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2500 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
2501 ret <8 x i16> %shuffle
2504 define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
2505 ; SSE-LABEL: shuffle_v8i16_fu3ucc5u:
2507 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2508 ; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2509 ; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2510 ; SSE-NEXT: movdqa %xmm1, %xmm0
2513 ; AVX1OR2-LABEL: shuffle_v8i16_fu3ucc5u:
2515 ; AVX1OR2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2516 ; AVX1OR2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2517 ; AVX1OR2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2518 ; AVX1OR2-NEXT: retq
2520 ; AVX512VL-LABEL: shuffle_v8i16_fu3ucc5u:
2521 ; AVX512VL: # %bb.0:
2522 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [7,5,11,12,4,4,13,14]
2523 ; AVX512VL-NEXT: vpermi2w %xmm0, %xmm1, %xmm2
2524 ; AVX512VL-NEXT: vmovdqa %xmm2, %xmm0
2525 ; AVX512VL-NEXT: retq
2526 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
2527 ret <8 x i16> %shuffle
2530 define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
2531 ; SSE-LABEL: shuffle_v8i16_8012345u:
2533 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2536 ; AVX-LABEL: shuffle_v8i16_8012345u:
2538 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2540 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef>
2542 ret <8 x i16> %shuffle
2546 define <8 x i16> @shuffle_v8i16_9zzzuuuu(<8 x i16> %x) {
2547 ; SSE-LABEL: shuffle_v8i16_9zzzuuuu:
2549 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
2550 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2553 ; AVX1-LABEL: shuffle_v8i16_9zzzuuuu:
2555 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
2556 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2559 ; AVX2-SLOW-LABEL: shuffle_v8i16_9zzzuuuu:
2560 ; AVX2-SLOW: # %bb.0:
2561 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
2562 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2563 ; AVX2-SLOW-NEXT: retq
2565 ; AVX2-FAST-LABEL: shuffle_v8i16_9zzzuuuu:
2566 ; AVX2-FAST: # %bb.0:
2567 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2568 ; AVX2-FAST-NEXT: retq
2570 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_9zzzuuuu:
2571 ; AVX512VL-SLOW: # %bb.0:
2572 ; AVX512VL-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
2573 ; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2574 ; AVX512VL-SLOW-NEXT: retq
2576 ; AVX512VL-FAST-LABEL: shuffle_v8i16_9zzzuuuu:
2577 ; AVX512VL-FAST: # %bb.0:
2578 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2579 ; AVX512VL-FAST-NEXT: retq
2580 %r = shufflevector <8 x i16> zeroinitializer, <8 x i16> %x, <8 x i32> <i32 9, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2585 define <8 x i16> @shuffle_v8i16_2zzzuuuu(<8 x i16> %x) {
2586 ; SSE-LABEL: shuffle_v8i16_2zzzuuuu:
2588 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2589 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2592 ; AVX1-LABEL: shuffle_v8i16_2zzzuuuu:
2594 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2595 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2598 ; AVX2-SLOW-LABEL: shuffle_v8i16_2zzzuuuu:
2599 ; AVX2-SLOW: # %bb.0:
2600 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2601 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2602 ; AVX2-SLOW-NEXT: retq
2604 ; AVX2-FAST-LABEL: shuffle_v8i16_2zzzuuuu:
2605 ; AVX2-FAST: # %bb.0:
2606 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2607 ; AVX2-FAST-NEXT: retq
2609 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_2zzzuuuu:
2610 ; AVX512VL-SLOW: # %bb.0:
2611 ; AVX512VL-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2612 ; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2613 ; AVX512VL-SLOW-NEXT: retq
2615 ; AVX512VL-FAST-LABEL: shuffle_v8i16_2zzzuuuu:
2616 ; AVX512VL-FAST: # %bb.0:
2617 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2618 ; AVX512VL-FAST-NEXT: retq
2619 %r = shufflevector <8 x i16> %x, <8 x i16> zeroinitializer, <8 x i32> <i32 2, i32 9, i32 10, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
2623 define <8 x i16> @shuffle_v8i16_3uu6zzzz(<8 x i16> %x) {
2624 ; SSE-LABEL: shuffle_v8i16_3uu6zzzz:
2626 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2627 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
2630 ; AVX1-LABEL: shuffle_v8i16_3uu6zzzz:
2632 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2633 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
2636 ; AVX2-SLOW-LABEL: shuffle_v8i16_3uu6zzzz:
2637 ; AVX2-SLOW: # %bb.0:
2638 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2639 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
2640 ; AVX2-SLOW-NEXT: retq
2642 ; AVX2-FAST-LABEL: shuffle_v8i16_3uu6zzzz:
2643 ; AVX2-FAST: # %bb.0:
2644 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13],zero,zero,zero,zero,zero,zero,zero,zero
2645 ; AVX2-FAST-NEXT: retq
2647 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_3uu6zzzz:
2648 ; AVX512VL-SLOW: # %bb.0:
2649 ; AVX512VL-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2650 ; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
2651 ; AVX512VL-SLOW-NEXT: retq
2653 ; AVX512VL-FAST-LABEL: shuffle_v8i16_3uu6zzzz:
2654 ; AVX512VL-FAST: # %bb.0:
2655 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13],zero,zero,zero,zero,zero,zero,zero,zero
2656 ; AVX512VL-FAST-NEXT: retq
2657 %r = shufflevector <8 x i16> %x, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 8, i32 8, i32 8, i32 8>
2661 define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) {
2662 ; SSE2-LABEL: mask_v8i16_012345ef:
2664 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
2665 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
2666 ; SSE2-NEXT: movaps %xmm1, %xmm0
2669 ; SSSE3-LABEL: mask_v8i16_012345ef:
2671 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
2672 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
2673 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2676 ; SSE41-LABEL: mask_v8i16_012345ef:
2678 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
2681 ; AVX-LABEL: mask_v8i16_012345ef:
2683 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
2685 %1 = bitcast <8 x i16> %a to <2 x i64>
2686 %2 = bitcast <8 x i16> %b to <2 x i64>
2687 %3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
2688 %4 = and <2 x i64> %2, <i64 -1, i64 4294967295>
2689 %5 = or <2 x i64> %4, %3
2690 %6 = bitcast <2 x i64> %5 to <8 x i16>
2694 define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
2695 ; SSE-LABEL: insert_dup_mem_v8i16_i32:
2697 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2698 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2699 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2702 ; AVX1-LABEL: insert_dup_mem_v8i16_i32:
2704 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2705 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2706 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2709 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i32:
2710 ; AVX2OR512VL: # %bb.0:
2711 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
2712 ; AVX2OR512VL-NEXT: retq
2713 %tmp = load i32, i32* %ptr, align 4
2714 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2715 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2716 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
2720 define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
2721 ; SSE-LABEL: insert_dup_mem_v8i16_sext_i16:
2723 ; SSE-NEXT: movzwl (%rdi), %eax
2724 ; SSE-NEXT: movd %eax, %xmm0
2725 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2726 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2729 ; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
2731 ; AVX1-NEXT: movzwl (%rdi), %eax
2732 ; AVX1-NEXT: vmovd %eax, %xmm0
2733 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2734 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2737 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
2738 ; AVX2OR512VL: # %bb.0:
2739 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
2740 ; AVX2OR512VL-NEXT: retq
2741 %tmp = load i16, i16* %ptr, align 2
2742 %tmp1 = sext i16 %tmp to i32
2743 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2744 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2745 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
2749 define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
2750 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_i32:
2752 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2753 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
2754 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2757 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
2759 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2760 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
2761 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2764 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v8i16_i32:
2765 ; AVX2OR512VL: # %bb.0:
2766 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0
2767 ; AVX2OR512VL-NEXT: retq
2768 %tmp = load i32, i32* %ptr, align 4
2769 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2770 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2771 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2775 define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
2776 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
2778 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2779 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,0,1,4,5,6,7]
2780 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2783 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
2785 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2786 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2789 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32:
2791 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2792 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2795 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
2797 ; AVX1-NEXT: vbroadcastss (%rdi), %xmm0
2798 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
2799 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2802 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v8i16_i32:
2803 ; AVX2OR512VL: # %bb.0:
2804 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0
2805 ; AVX2OR512VL-NEXT: retq
2806 %tmp = load i32, i32* %ptr, align 4
2807 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
2808 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2809 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2813 define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
2814 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2816 ; SSE-NEXT: movswl (%rdi), %eax
2817 ; SSE-NEXT: movd %eax, %xmm0
2818 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
2819 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2822 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2824 ; AVX1-NEXT: movswl (%rdi), %eax
2825 ; AVX1-NEXT: vmovd %eax, %xmm0
2826 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
2827 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2830 ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2832 ; AVX2-NEXT: movswl (%rdi), %eax
2833 ; AVX2-NEXT: shrl $16, %eax
2834 ; AVX2-NEXT: vmovd %eax, %xmm0
2835 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2838 ; AVX512VL-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2839 ; AVX512VL: # %bb.0:
2840 ; AVX512VL-NEXT: movswl (%rdi), %eax
2841 ; AVX512VL-NEXT: shrl $16, %eax
2842 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
2843 ; AVX512VL-NEXT: retq
2844 %tmp = load i16, i16* %ptr, align 2
2845 %tmp1 = sext i16 %tmp to i32
2846 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2847 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2848 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2852 define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
2853 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2855 ; SSE2-NEXT: movswl (%rdi), %eax
2856 ; SSE2-NEXT: movd %eax, %xmm0
2857 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,0,1,4,5,6,7]
2858 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2861 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2863 ; SSSE3-NEXT: movswl (%rdi), %eax
2864 ; SSSE3-NEXT: movd %eax, %xmm0
2865 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2868 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2870 ; SSE41-NEXT: movswl (%rdi), %eax
2871 ; SSE41-NEXT: movd %eax, %xmm0
2872 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2875 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2877 ; AVX1-NEXT: movswl (%rdi), %eax
2878 ; AVX1-NEXT: vmovd %eax, %xmm0
2879 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2882 ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2884 ; AVX2-NEXT: movswl (%rdi), %eax
2885 ; AVX2-NEXT: shrl $16, %eax
2886 ; AVX2-NEXT: vmovd %eax, %xmm0
2887 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2890 ; AVX512VL-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2891 ; AVX512VL: # %bb.0:
2892 ; AVX512VL-NEXT: movswl (%rdi), %eax
2893 ; AVX512VL-NEXT: shrl $16, %eax
2894 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
2895 ; AVX512VL-NEXT: retq
2896 %tmp = load i16, i16* %ptr, align 2
2897 %tmp1 = sext i16 %tmp to i32
2898 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
2899 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2900 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>